/* * A set of functions for fittign an impulse function * (C) Gal Chechik 2004 */ #include #include #include #include #include #include #include #include #include "impulse.h" #include "io.h" #include "main.h" #include "nrutil.h" #include "nr.h" using namespace std; /* * * Amodel - 2 sigmoids sharing the same beta (h0,h1,h2,t1,t2,b) * Bmodel - a single sigmoid (h0,h1,t1,b) * Cmodel - two sigmoids, two different betas (h0,h1,h2,t1,t2,b1,b2) * Dmodel - two impulses (h0,h1,h2,h3,h4,t1,t2,t3,t4,b); * Emodel - sum 2 sigs (same slope) (h0,h1,h2,t1,t2,b) * Fmodel - sum 2 sigs (diff slope) (h0,h1,h2,t1,t2,b1,b2) * Gmodel - same as Amodel with extra zeros in data * Hmodel - same as Cmodel with extra zeros in data * Imodel - same as Gmodel with Prior on slopes * Jmodel - same as Bmodel with extra zeros in data * */ float *global_xs; float *global_ys; int global_n_points; long global_seed=1; float global_fraction_noise=0.1; float global_additive_noise=0.05; int global_stability_version = 3; int N_MODEL_PARMS; // No of parameters in the model chosen // For hand tuned priors float global_hand_prior_weight=0.05; int thetas_do_hand_priors; // For empirical priors int thetas_do_emp_priors; float ** thetas_means; float ** thetas_covar; float * thetas_emp_prior; #define N_CLUSTERS (10) #define EMP_PRIOR_WEIGHT (1.0) #define GAUSSIAN_MODE (1) #define PI (3.141592654) /* Hand tuned priors on beta shuld be a decaying exponential: p(b) = exp(-abs(b)) ; - log(p(b)) = abs(b) d (-logp(b)) / d(b) = sign(beta) */ int set_n_model_parms(char model) { switch(model){ case 'A': N_MODEL_PARMS = N_AMODEL_PARMS;break; case 'B': N_MODEL_PARMS = N_BMODEL_PARMS;break; case 'C': N_MODEL_PARMS = N_CMODEL_PARMS;break; case 'D': N_MODEL_PARMS = N_DMODEL_PARMS;break; case 'G': N_MODEL_PARMS = N_AMODEL_PARMS;break; case 'H': N_MODEL_PARMS = N_CMODEL_PARMS;break; case 'I': N_MODEL_PARMS = N_AMODEL_PARMS;break; case 'J': N_MODEL_PARMS = N_BMODEL_PARMS;break; default: cerr << "impulse.cpp: Illegal model = '" << model << "'\n"; exit(1); } cout << "N_MODEL_PARMS = " << N_MODEL_PARMS << "\n"; return N_MODEL_PARMS; } // ------------------------------------------------- string set_fitfilename(char *segnum, string gene_name, char model) { string outfilename = homedir()+BASE_DIR + "SingleGenesFit/" + model + "model/Fit/seg" + segnum + "/seg" + segnum + "_" + gene_name + "_fit.m"; return outfilename; } // ------------------------------------------------- string set_stabfilename(char *segnum, string gene_name, char model, string stability_version){ string outfilename = homedir()+BASE_DIR + "SingleGenesFit/" + model + "model/Stability" + stability_version + '0' + "/seg" + segnum + "/seg" + segnum + "_" + gene_name + "_stab.m"; return outfilename; } // ------------------------------------------------- string set_pvalfilename(char *segnum, string gene_name, char model){ string outfilename = homedir()+BASE_DIR + "SingleGenesFit/" + model + "model/Pvals/seg" + segnum + "/seg" + segnum + "_" + gene_name + "_pval.m"; return outfilename; } // ------------------------------------------------- string set_cvfilename(char *segnum, string gene_name, char model){ string outfilename = homedir()+BASE_DIR + "SingleGenesFit/" + model + "model/FillMiss/seg" + segnum + "/seg" + segnum + "_" + gene_name + "_cvfil.m"; return outfilename; } // ------------------------------------------------- string set_theta_mean_filename(char *segnum, char model){ string outfilename = homedir()+BASE_DIR + "SingleGenesFit/" + model + "model/ThetaStats/seg" + segnum + "_means.txt"; return outfilename; } // ------------------------------------------------- string set_theta_covar_filename(char *segnum, char model){ string outfilename = homedir()+BASE_DIR + "SingleGenesFit/" + model + "model/ThetaStats/seg" + segnum + "_covars.txt"; return outfilename; } // ------------------------------------------------- string set_theta_prior_filename(char *segnum, char model){ string outfilename = homedir()+BASE_DIR + "SingleGenesFit/" + model + "model/ThetaStats/seg" + segnum + "_priors.txt"; return outfilename; } // ------------------------------------------------- // gaussian random number // float randn(long *idum) { float ran1(long *idum); static int iset=0; static float gset; float fac,rsq,v1,v2; if (*idum < 0) iset=0; if (iset == 0) { do { v1=2.0*ran1(idum)-1.0; v2=2.0*ran1(idum)-1.0; rsq=v1*v1+v2*v2; } while (rsq >= 1.0 || rsq == 0.0); fac=sqrt(-2.0*log(rsq)/rsq); gset=v1*fac; iset=1; return v2*fac; } else { iset=0; return gset; } } // ------------------------------------------------- // randperm: create a random permutation void randperm (int *perm, int n) { int to,tmp,from; for (int i=0;i0) ? + 1 : -1); } // ------------------------------------------------- // calc_deriv_model_log_emp_prior // ------------------------------------------------- float calc_deriv_model_log_emp_prior(float *x, int i_x, float **means, float **covar, float *weights, int n_clusters, int dim, int mode) { // d log(prior) / dtheta = // 1/prior \sum_i p(c_i) gauss(x,mu_i,std_i) * (x-mu_i)/ 2 std_i float f=0; for(int i_clust=0;i_clust f(n); float dh0 = 0.0, dh1 = 0.0, dh2 = 0.0; float dt1 = 0.0, dt2 = 0.0, db1 = 0.0; float invh1 = 1.0/h1; float e; for(int i=0;i f(n); float dh0 = 0.0, dh1 = 0.0; float dt1 = 0.0, db1 = 0.0; float invh1 = 1.0/h1; for(int i=0;i f(n); float dh0 = 0.0; float dh1 = 0.0; float dh2 = 0.0; float dt1 = 0.0; float dt2 = 0.0; float db1 = 0.0; float db2 = 0.0; float invh1 = 1.0/h1; float e; for(int i=0;i f(n); float dh0 = 0.0, dh1 = 0.0, dh2 = 0.0, dh3 = 0.0, dh4 = 0.0; float dt1 = 0.0, dt2 = 0.0, dt3 = 0.0, dt4 = 0.0; float db1 = 0.0; float invh1 = 1.0/h1; float invh3 = 1.0/h3; float e1, e3; for(int i=0;i f(n); float dh0 = 0.0, dh1 = 0.0, dh2 = 0.0; float dt1 = 0.0, dt2 = 0.0; float db1 = 0.0; float invh1 = 1.0/h1; float e; for(int i=0;i f(n); float dh0 = 0.0, dh1 = 0.0, dh2 = 0.0; float dt1 = 0.0, dt2 = 0.0, db1 = 0.0, db2 = 0.0; float invh1 = 1.0/h1; float e; for(int i=0;i-0.01){ if(theta_base[1]>0){ theta_base[1] = 0.01; }else{ theta_base[1] = -0.01; } } for(int i_repeat=0;i_repeat-0.01){ if(theta_base[1]>0){ theta_base[1] = 0.01; }else{ theta_base[1] = -0.01; } } for(int i_repeat=0;i_repeat-0.01){ if(theta_base[1]>0){ theta_base[1] = 0.01; }else{ theta_base[1] = -0.01; } } // cout << "n_repeats ="<-0.01){ if(theta_base[1]>0){ theta_base[1] = 0.01; }else{ theta_base[1] = -0.01; } } for(int i_repeat=0;i_repeat-0.01){ if(theta_base[1]>0){ theta_base[1] = 0.01; }else{ theta_base[1] = -0.01; } } for(int i_repeat=0;i_repeat-0.01){ if(theta_base[1]>0){ theta_base[1] = 0.01; }else{ theta_base[1] = -0.01; } } for(int i_repeat=0;i_repeat-0.01){ if(theta_base[1]>0){ theta_base[1] = 0.01; }else{ theta_base[1] = -0.01; } } for(int i_repeat=0;i_repeat-0.01){ if(theta_base[1]>0){ theta_base[1] = 0.01; }else{ theta_base[1] = -0.01; } } for(int i_repeat=0;i_repeat segs_inds) { int n_valid=0; //cout << "count_valid: \n"; // count valid unsigned int n_zeros=0; for(unsigned int i=0;i segs_lags, vector segs_inds, int zero_transform, int num_extra_zeros) { // Set Xs and Ys global_n_points=0; // Add extra zeros for (int i= num_extra_zeros; i>0; i--) { global_xs[global_n_points] = -10 * i; global_ys[global_n_points] = 0; global_n_points++; } if(zero_transform){ global_xs[global_n_points] = 0; global_ys[global_n_points] = 0; global_n_points++; } for(unsigned int i=0;i segs_lags, vector segs_inds, ofstream& outstream, int zero_transform, char model) { string segs_inds_name = "segs_inds"; write_vec_matlab(segs_inds_name,segs_inds); string segs_lags_name = "segs_lags"; write_vec_matlab(segs_lags_name,segs_lags); // int n_valid = count_valid(mask, express, i_gene, segs_inds); cout << "n valid "<< n_valid << "\n"; double err=0.0; if(n_valid>2){ // Number of extra zeros to add int num_extra_zeros = 0; if (model == 'G' || model == 'H' || model == 'I' || model =='J'){ num_extra_zeros = MODEL_GH_NUM_EXTRA_ZEROS; // model G pushes for df(t)/dt=0 at t=0 } // Set Xs and Ys set_xs_ys(express, mask, i_gene, segs_lags, segs_inds, zero_transform, num_extra_zeros); string gxs_name = "global_xs"; write_vec_matlab(gxs_name,global_xs,global_n_points); write_vec_matlab(outstream,gxs_name,global_xs,global_n_points); string gys_name = "global_ys"; write_vec_matlab(gys_name,global_ys,global_n_points); write_vec_matlab(outstream,gys_name,global_ys,global_n_points); // Fit the model err = cg_fit_single_nr(model,theta,zero_transform,N_REPEATS) ; }else{ // Skip modeling this gene cout << "SKIP: Only " << n_valid << " valid measurments.\n"; err=0; for(int i=0;i segs_lags, vector segs_inds, int i_gene, int n_shuffles, int n_valid, double err, ofstream &outstream, int zero_transform, char model) { // Headers outstream<<"n_shuffles="<< setw (4) < segs_lags, vector segs_inds, int i_gene, int n_shuffles, int n_valid, double err, ofstream &outstream, int zero_transform, char model) { // Headers outstream << "n_shuffles=" << setw(4) << n_shuffles << ";\n"; outstream << "n_valid=" << setfill(' ')<< setw(4) < segs_lags, vector segs_inds, int n_valid, ofstream &outstream, int zero_transform, char model) { // Headers outstream<<"cvtheta=zeros("< -0.01){ cout << setprecision(10)< paper_names; string papers_filename= homedir() + BASE_DIR + "papers.lst"; read_string_vector(papers_filename, paper_names); // Read from Compendium float ** express1; bool ** mask1; int n_fields1; string expression_filename = homedir() + EXPRESS_DIR + "Compendium/data.tab"; cout << "\nRead Expression from " << expression_filename<<"\n"; read_expression_mat(expression_filename, n_skip, n_headers, express1, mask1, n_genes, n_fields1); cout << endl; cout << " Read "<< n_genes+1 << " genes.\n"; cout << " Read "<< n_fields1 << " fields1.\n"; concat_to_bool_mat (mask, mask1, n_genes, 0, n_fields1); concat_to_float_mat(express, express1, n_genes, 0, n_fields1); n_fields = n_fields1; cout << " Total "<< n_fields << " fields.\n"; // Read from individual papers for(unsigned int i=0; i