123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219 |
- #include <stdio.h>
- #include <ctype.h>
- #include <stdlib.h>
- #include <string.h>
- #include "svm.h"
- int print_null(const char *s, ...) { return 0; }
- static int (*info)(const char *fmt, ...) = &printf;
- struct svm_node *x;
- int max_nr_attr = 64;
- struct svm_model *model;
- int predict_probability = 0;
- static char *line = NULL;
- static int max_line_len;
- static char *readline(FILE *input) {
- int len;
- if (fgets(line, max_line_len, input) == NULL)
- return NULL;
- while (strrchr(line, '\n') == NULL) {
- max_line_len *= 2;
- line = (char *) realloc(line, max_line_len);
- len = (int) strlen(line);
- if (fgets(line + len, max_line_len - len, input) == NULL)
- break;
- }
- return line;
- }
- void exit_input_error(int line_num) {
- fprintf(stderr, "Wrong input format at line %d\n", line_num);
- exit(1);
- }
- void predict(FILE *input, FILE *output) {
- int correct = 0;
- int total = 0;
- double error = 0;
- double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
- int svm_type = svm_get_svm_type(model);
- int nr_class = svm_get_nr_class(model);
- double *prob_estimates = NULL;
- int j;
- if (predict_probability) {
- if (svm_type == NU_SVR || svm_type == EPSILON_SVR)
- info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",
- svm_get_svr_probability(model));
- else if (svm_type == ONE_CLASS) {
- // nr_class = 2 for ONE_CLASS
- prob_estimates = (double *) malloc(nr_class * sizeof(double));
- fprintf(output, "label normal outlier\n");
- } else {
- int *labels = (int *) malloc(nr_class * sizeof(int));
- svm_get_labels(model, labels);
- prob_estimates = (double *) malloc(nr_class * sizeof(double));
- fprintf(output, "labels");
- for (j = 0; j < nr_class; j++)
- fprintf(output, " %d", labels[j]);
- fprintf(output, "\n");
- free(labels);
- }
- }
- max_line_len = 1024;
- line = (char *) malloc(max_line_len * sizeof(char));
- while (readline(input) != NULL) {
- int i = 0;
- double target_label, predict_label;
- char *idx, *val, *label, *endptr;
- int inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0
- label = strtok(line, " \t\n");
- if (label == NULL) // empty line
- exit_input_error(total + 1);
- target_label = strtod(label, &endptr);
- if (endptr == label || *endptr != '\0')
- exit_input_error(total + 1);
- while (1) {
- if (i >= max_nr_attr - 1) // need one more for index = -1
- {
- max_nr_attr *= 2;
- x = (struct svm_node *) realloc(x, max_nr_attr * sizeof(struct svm_node));
- }
- idx = strtok(NULL, ":");
- val = strtok(NULL, " \t");
- if (val == NULL)
- break;
- errno = 0;
- x[i].index = (int) strtol(idx, &endptr, 10);
- if (endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
- exit_input_error(total + 1);
- else
- inst_max_index = x[i].index;
- errno = 0;
- x[i].value = strtod(val, &endptr);
- if (endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
- exit_input_error(total + 1);
- ++i;
- }
- x[i].index = -1;
- if (predict_probability && (svm_type == C_SVC || svm_type == NU_SVC || svm_type == ONE_CLASS)) {
- predict_label = svm_predict_probability(model, x, prob_estimates);
- fprintf(output, "%g", predict_label);
- for (j = 0; j < nr_class; j++)
- fprintf(output, " %g", prob_estimates[j]);
- fprintf(output, "\n");
- } else {
- predict_label = svm_predict(model, x);
- fprintf(output, "%.17g\n", predict_label);
- }
- if (predict_label == target_label)
- ++correct;
- error += (predict_label - target_label) * (predict_label - target_label);
- sump += predict_label;
- sumt += target_label;
- sumpp += predict_label * predict_label;
- sumtt += target_label * target_label;
- sumpt += predict_label * target_label;
- ++total;
- }
- if (svm_type == NU_SVR || svm_type == EPSILON_SVR) {
- info("Mean squared error = %g (regression)\n", error / total);
- info("Squared correlation coefficient = %g (regression)\n",
- ((total * sumpt - sump * sumt) * (total * sumpt - sump * sumt)) /
- ((total * sumpp - sump * sump) * (total * sumtt - sumt * sumt))
- );
- } else
- info("Accuracy = %g%% (%d/%d) (classification)\n",
- (double) correct / total * 100, correct, total);
- if (predict_probability)
- free(prob_estimates);
- }
- void exit_with_help() {
- printf(
- "Usage: svm-predict [options] test_file model_file output_file\n"
- "options:\n"
- "-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); for one-class SVM only 0 is supported\n"
- "-q : quiet mode (no outputs)\n"
- );
- exit(1);
- }
- int main(int argc, char **argv) {
- FILE *input, *output;
- int i;
- // parse options
- for (i = 1; i < argc; i++) {
- if (argv[i][0] != '-') break;
- ++i;
- switch (argv[i - 1][1]) {
- case 'b':
- predict_probability = atoi(argv[i]);
- break;
- case 'q':
- info = &print_null;
- i--;
- break;
- default:
- fprintf(stderr, "Unknown option: -%c\n", argv[i - 1][1]);
- exit_with_help();
- }
- }
- if (i >= argc - 2)
- exit_with_help();
- input = fopen(argv[i], "r");
- if (input == NULL) {
- fprintf(stderr, "can't open input file %s\n", argv[i]);
- exit(1);
- }
- output = fopen(argv[i + 2], "w");
- if (output == NULL) {
- fprintf(stderr, "can't open output file %s\n", argv[i + 2]);
- exit(1);
- }
- if ((model = svm_load_model(argv[i + 1])) == 0) {
- fprintf(stderr, "can't open model file %s\n", argv[i + 1]);
- exit(1);
- }
- x = (struct svm_node *) malloc(max_nr_attr * sizeof(struct svm_node));
- if (predict_probability) {
- if (svm_check_probability_model(model) == 0) {
- fprintf(stderr, "Model does not support probabiliy estimates\n");
- exit(1);
- }
- } else {
- if (svm_check_probability_model(model) != 0)
- info("Model supports probability estimates, but disabled in prediction.\n");
- }
- predict(input, output);
- svm_free_and_destroy_model(&model);
- free(x);
- free(line);
- fclose(input);
- fclose(output);
- return 0;
- }
|