#include #define PY_SSIZE_T_CLEAN #include #include "svm.h" #include "_svm_cython_blas_helpers.h" #ifndef MAX #define MAX(x, y) (((x) > (y)) ? (x) : (y)) #endif /* * Convert scipy.sparse.csr to libsvm's sparse data structure */ struct svm_csr_node **csr_to_libsvm (double *values, int* indices, int* indptr, int n_samples) { struct svm_csr_node **sparse, *temp; int i, j=0, k=0, n; sparse = malloc (n_samples * sizeof(struct svm_csr_node *)); if (sparse == NULL) return NULL; for (i=0; isvm_type = svm_type; param->kernel_type = kernel_type; param->degree = degree; param->coef0 = coef0; param->nu = nu; param->cache_size = cache_size; param->C = C; param->eps = eps; param->p = p; param->shrinking = shrinking; param->probability = probability; param->nr_weight = nr_weight; param->weight_label = (int *) weight_label; param->weight = (double *) weight; param->gamma = gamma; param->max_iter = max_iter; param->random_seed = random_seed; return param; } /* * Create and return a svm_csr_problem struct from a scipy.sparse.csr matrix. It is * up to the user to free resulting structure. * * TODO: precomputed kernel. */ struct svm_csr_problem * csr_set_problem (char *values, Py_ssize_t *n_indices, char *indices, Py_ssize_t *n_indptr, char *indptr, char *Y, char *sample_weight, int kernel_type) { struct svm_csr_problem *problem; problem = malloc (sizeof (struct svm_csr_problem)); if (problem == NULL) return NULL; problem->l = (int) n_indptr[0] - 1; problem->y = (double *) Y; problem->x = csr_to_libsvm((double *) values, (int *) indices, (int *) indptr, problem->l); /* should be removed once we implement weighted samples */ problem->W = (double *) sample_weight; if (problem->x == NULL) { free(problem); return NULL; } return problem; } struct svm_csr_model *csr_set_model(struct svm_parameter *param, int nr_class, char *SV_data, Py_ssize_t *SV_indices_dims, char *SV_indices, Py_ssize_t *SV_indptr_dims, char *SV_intptr, char *sv_coef, char *rho, char *nSV, char *probA, char *probB) { struct svm_csr_model *model; double *dsv_coef = (double *) sv_coef; int i, m; m = nr_class * (nr_class-1)/2; if ((model = malloc(sizeof(struct svm_csr_model))) == NULL) goto model_error; if ((model->nSV = malloc(nr_class * sizeof(int))) == NULL) goto nsv_error; if ((model->label = malloc(nr_class * sizeof(int))) == NULL) goto label_error; if ((model->sv_coef = malloc((nr_class-1)*sizeof(double *))) == NULL) goto sv_coef_error; if ((model->rho = malloc( m * sizeof(double))) == NULL) goto rho_error; // This is only allocated in dynamic memory while training. model->n_iter = NULL; /* in the case of precomputed kernels we do not use dense_to_precomputed because we don't want the leading 0. As indices start at 1 (not at 0) this will work */ model->l = (int) SV_indptr_dims[0] - 1; model->SV = csr_to_libsvm((double *) SV_data, (int *) SV_indices, (int *) SV_intptr, model->l); model->nr_class = nr_class; model->param = *param; /* * regression and one-class does not use nSV, label. */ if (param->svm_type < 2) { memcpy(model->nSV, nSV, model->nr_class * sizeof(int)); for(i=0; i < model->nr_class; i++) model->label[i] = i; } for (i=0; i < model->nr_class-1; i++) { /* * We cannot squash all this mallocs in a single call since * svm_destroy_model will free each element of the array. */ if ((model->sv_coef[i] = malloc((model->l) * sizeof(double))) == NULL) { int j; for (j=0; jsv_coef[j]); goto sv_coef_i_error; } memcpy(model->sv_coef[i], dsv_coef, (model->l) * sizeof(double)); dsv_coef += model->l; } for (i=0; irho)[i] = -((double *) rho)[i]; } /* * just to avoid segfaults, these features are not wrapped but * svm_destroy_model will try to free them. */ if (param->probability) { if ((model->probA = malloc(m * sizeof(double))) == NULL) goto probA_error; memcpy(model->probA, probA, m * sizeof(double)); if ((model->probB = malloc(m * sizeof(double))) == NULL) goto probB_error; memcpy(model->probB, probB, m * sizeof(double)); } else { model->probA = NULL; model->probB = NULL; } /* We'll free SV ourselves */ model->free_sv = 0; return model; probB_error: free(model->probA); probA_error: for (i=0; i < model->nr_class-1; i++) free(model->sv_coef[i]); sv_coef_i_error: free(model->rho); rho_error: free(model->sv_coef); sv_coef_error: free(model->label); label_error: free(model->nSV); nsv_error: free(model); model_error: return NULL; } /* * Copy support vectors into a scipy.sparse.csr matrix */ int csr_copy_SV (char *data, Py_ssize_t *n_indices, char *indices, Py_ssize_t *n_indptr, char *indptr, struct svm_csr_model *model, int n_features) { int i, j, k=0, index; double *dvalues = (double *) data; int *iindices = (int *) indices; int *iindptr = (int *) indptr; iindptr[0] = 0; for (i=0; il; ++i) { /* iterate over support vectors */ index = model->SV[i][0].index; for(j=0; index >=0 ; ++j) { iindices[k] = index - 1; dvalues[k] = model->SV[i][j].value; index = model->SV[i][j+1].index; ++k; } iindptr[i+1] = k; } return 0; } /* get number of nonzero coefficients in support vectors */ Py_ssize_t get_nonzero_SV (struct svm_csr_model *model) { int i, j; Py_ssize_t count=0; for (i=0; il; ++i) { j = 0; while (model->SV[i][j].index != -1) { ++j; ++count; } } return count; } /* * Predict using a model, where data is expected to be encoded into a csr matrix. */ int csr_copy_predict (Py_ssize_t *data_size, char *data, Py_ssize_t *index_size, char *index, Py_ssize_t *intptr_size, char *intptr, struct svm_csr_model *model, char *dec_values, BlasFunctions *blas_functions) { double *t = (double *) dec_values; struct svm_csr_node **predict_nodes; Py_ssize_t i; predict_nodes = csr_to_libsvm((double *) data, (int *) index, (int *) intptr, intptr_size[0]-1); if (predict_nodes == NULL) return -1; for(i=0; i < intptr_size[0] - 1; ++i) { *t = svm_csr_predict(model, predict_nodes[i], blas_functions); free(predict_nodes[i]); ++t; } free(predict_nodes); return 0; } int csr_copy_predict_values (Py_ssize_t *data_size, char *data, Py_ssize_t *index_size, char *index, Py_ssize_t *intptr_size, char *intptr, struct svm_csr_model *model, char *dec_values, int nr_class, BlasFunctions *blas_functions) { struct svm_csr_node **predict_nodes; Py_ssize_t i; predict_nodes = csr_to_libsvm((double *) data, (int *) index, (int *) intptr, intptr_size[0]-1); if (predict_nodes == NULL) return -1; for(i=0; i < intptr_size[0] - 1; ++i) { svm_csr_predict_values(model, predict_nodes[i], ((double *) dec_values) + i*nr_class, blas_functions); free(predict_nodes[i]); } free(predict_nodes); return 0; } int csr_copy_predict_proba (Py_ssize_t *data_size, char *data, Py_ssize_t *index_size, char *index, Py_ssize_t *intptr_size, char *intptr, struct svm_csr_model *model, char *dec_values, BlasFunctions *blas_functions) { struct svm_csr_node **predict_nodes; Py_ssize_t i; int m = model->nr_class; predict_nodes = csr_to_libsvm((double *) data, (int *) index, (int *) intptr, intptr_size[0]-1); if (predict_nodes == NULL) return -1; for(i=0; i < intptr_size[0] - 1; ++i) { svm_csr_predict_probability( model, predict_nodes[i], ((double *) dec_values) + i*m, blas_functions); free(predict_nodes[i]); } free(predict_nodes); return 0; } Py_ssize_t get_nr(struct svm_csr_model *model) { return (Py_ssize_t) model->nr_class; } void copy_intercept(char *data, struct svm_csr_model *model, Py_ssize_t *dims) { /* intercept = -rho */ Py_ssize_t i, n = dims[0]; double t, *ddata = (double *) data; for (i=0; irho[i]; /* we do this to avoid ugly -0.0 */ *ddata = (t != 0) ? -t : 0; ++ddata; } } void copy_support (char *data, struct svm_csr_model *model) { memcpy (data, model->sv_ind, (model->l) * sizeof(int)); } /* * Some helpers to convert from libsvm sparse data structures * model->sv_coef is a double **, whereas data is just a double *, * so we have to do some stupid copying. */ void copy_sv_coef(char *data, struct svm_csr_model *model) { int i, len = model->nr_class-1; double *temp = (double *) data; for(i=0; isv_coef[i], sizeof(double) * model->l); temp += model->l; } } /* * Get the number of iterations run in optimization */ void copy_n_iter(char *data, struct svm_csr_model *model) { const int n_models = MAX(1, model->nr_class * (model->nr_class-1) / 2); memcpy(data, model->n_iter, n_models * sizeof(int)); } /* * Get the number of support vectors in a model. */ Py_ssize_t get_l(struct svm_csr_model *model) { return (Py_ssize_t) model->l; } void copy_nSV(char *data, struct svm_csr_model *model) { if (model->label == NULL) return; memcpy(data, model->nSV, model->nr_class * sizeof(int)); } /* * same as above with model->label * TODO: merge in the cython layer */ void copy_label(char *data, struct svm_csr_model *model) { if (model->label == NULL) return; memcpy(data, model->label, model->nr_class * sizeof(int)); } void copy_probA(char *data, struct svm_csr_model *model, Py_ssize_t * dims) { memcpy(data, model->probA, dims[0] * sizeof(double)); } void copy_probB(char *data, struct svm_csr_model *model, Py_ssize_t * dims) { memcpy(data, model->probB, dims[0] * sizeof(double)); } /* * Some free routines. Some of them are nontrivial since a lot of * sharing happens across objects (they *must* be called in the * correct order) */ int free_problem(struct svm_csr_problem *problem) { int i; if (problem == NULL) return -1; for (i=0; il; ++i) free (problem->x[i]); free (problem->x); free (problem); return 0; } int free_model(struct svm_csr_model *model) { /* like svm_free_and_destroy_model, but does not free sv_coef[i] */ /* We don't free n_iter, since we did not create them in set_model. */ if (model == NULL) return -1; free(model->SV); free(model->sv_coef); free(model->rho); free(model->label); free(model->probA); free(model->probB); free(model->nSV); free(model); return 0; } int free_param(struct svm_parameter *param) { if (param == NULL) return -1; free(param); return 0; } int free_model_SV(struct svm_csr_model *model) { int i; for (i=model->l-1; i>=0; --i) free(model->SV[i]); /* svn_destroy_model frees model->SV */ for (i=0; i < model->nr_class-1 ; ++i) free(model->sv_coef[i]); /* svn_destroy_model frees model->sv_coef */ return 0; } /* borrowed from original libsvm code */ static void print_null(const char *s) {} static void print_string_stdout(const char *s) { fputs(s,stdout); fflush(stdout); } /* provide convenience wrapper */ void set_verbosity(int verbosity_flag){ if (verbosity_flag) svm_set_print_string_function(&print_string_stdout); else svm_set_print_string_function(&print_null); }