/* * main for fitting impulse model using a plain text input * (C) Gal Chechik 2007 */ #include "impulse.h" #include "io.h" #include "main.h" #include "nrutil.h" #include "nr.h" #include #include #include #include #include #include #include #include using namespace std; string global_base_dir = "./Output"; // ====================================================== void set_filename(string & filename, const char mode, string & dataset, const int i_gene) { char charstr[128]; sprintf(charstr,"%s/%04d",dataset.c_str(),i_gene+1); string str1(charstr); filename = str1; switch(mode){ case 'c': filename += "_cv";break; case 'f': filename += "_fit";break; case 'p': filename += "_pval";break; case 's': filename += "_stab";break; default: cerr << "Illegal mode\n"; exit(1); } filename += ".m"; return; } // ====================================================== void ParseParms(int argc, char * argv[], string & dataset, int & n_shuffles, int & do_force, char & mode, int & stability_version, int & n_genes_from, int & n_genes_to, int & zero_transform, char & model, string &data_tab_filename_from_user, string &mask_tab_filename_from_user, string &time_tab_filename_from_user, int & read_genes_list, string & genes_filename_from_user, int & n_genes_from_user, int & n_fields_from_user ) { int n_args=0; int tmp; n_shuffles = 100; do_force = 1; mode = 'f'; stability_version = 0; n_genes_from = 0; n_genes_to = -1; zero_transform = 0; model = 'I'; dataset = global_base_dir.c_str(); read_genes_list = 0; n_genes_from_user = 0; n_fields_from_user = 0; if (argc <= 1){ cout << "\nNot enough input arguments: Use " << argv[0] << " -h for instructions.\n\n"; exit(0); } while (n_args 'J'){ cerr << "Illegal model code\n";exit(1);} break; case 'n': n_args++; n_genes_from = IntFromString(argv[n_args]); cout << " n_genes_from="<< n_genes_from<<"\n"; break; case 't': n_args++; n_genes_to = IntFromString(argv[n_args]); cout << " n_genes_to="<< n_genes_to<<"\n"; break; case 'E': n_args++; data_tab_filename_from_user = argv[n_args]; cout << " data_tab_filename_from_user="<< data_tab_filename_from_user<<"\n"; break; case 'T': n_args++; time_tab_filename_from_user = argv[n_args]; cout << " time_tab_filename_from_user="<< time_tab_filename_from_user<<"\n"; break; case 'M': n_args++; mask_tab_filename_from_user = argv[n_args]; cout << " mask_tab_filename_from_user="<< mask_tab_filename_from_user<<"\n"; break; // case 'S': n_args++; stability_version = IntFromString(argv[n_args]); // cout << " Set Stability Version ="<< stability_version<<"\n"; // break; //case 'z': zero_transform = IntFromString(argv[n_args]); //cout << " zerotransform ="<< zero_transform<<"\n"; break; default: cout << "\nUnknown option - '" << argv[n_args][1] << "'\n"; case 'h': cout << "This binary fits an impulse model to time course.\n"; cout << "Gal Chechik (C) 2003-2008\n"; cout << "The model used here adds three pseudo measurements\n"; cout << "with zero values at times [-30,-20,-10]\n\n"; cout << "\nOptions: \n"; cout << "\t -h = this help\n"; cout << "\t -d = output directory name. default = 'Output'.\n"; // cout << "\t -c mode = 'f' - fit, 'p' -pvals, 's' - stability, 'c' -cross validation\n"; // cout << "\t -f = force. step on older files\n"; // cout << "\t -P = use previously calculated emp_priors\n"; // cout << "\t -H = use Hand-tuned priors\n"; // cout << "\t -m val = model (A/B/C../I) default = 'I'\n"; // cout << "\t -n val = n_genes from\n"; // cout << "\t -S ver = Stability: repeat fit with noised measurments. ver=2..4\n"; // cout << "\t -t val = n_genes to\n"; // cout << "\t -z val = zero_transform (default=0)\n"; // cout << "\t -l val = n_shuffles (default=100)\n\n"; // cout << "Tab-files input mode\n"; cout << "\t -E filename = tab delimited file with expression data\n"; cout << "\t -T filename = tab delimited file with measurments times\n"; cout << "\t -M filename = tab delimited file wirth mask of missing values\n"; cout << "\t -G value = number of genes\n"; cout << "\t -F value = number of fields per gene\n"; cout << " Examples: \n"; cout << " impulse -E express.tab -T timing.tab -G 1 -F 6 -M mask.tab\n"; // cout << " im -m G -S 2 - adds 0.05 noise, G model\n\n"; exit(0); break; } } n_args ++; } return; } void read_data_mask_timing(string data_tab_filename_from_user, string mask_tab_filename_from_user, string time_tab_filename_from_user, float **&express, bool **&mask, vector&lags, int n_fields_from_user, int n_genes_from_user) { cout << "Read Data from '" << data_tab_filename_from_user<<"'\n"; read_float_mat(data_tab_filename_from_user.c_str(),express,n_genes_from_user,n_fields_from_user); cout << "Read Mask from " << mask_tab_filename_from_user<<"\n"; read_bool_mat(mask_tab_filename_from_user, mask, n_genes_from_user, n_fields_from_user); cout << "Read Time from " << time_tab_filename_from_user<<"\n"; read_float_vector(time_tab_filename_from_user, lags); if ((unsigned)n_fields_from_user != lags.size()){ cerr << "inconsistent input: \n"; cerr << " #time points in Expression data (n_fileds_from_user) = " << n_fields_from_user << endl; cerr << " #time lags in '" << time_tab_filename_from_user << "' = " << lags.size() << endl; exit(1); } return; } /* ============================================== * Function main * ============================================== */ int main(int argc, char * argv[]) { // Init and read params int n_shuffles, n_genes_from, n_genes_to; int n_genes_from_user, n_fields_from_user; int do_force = 0; int zero_transform; string data_tab_filename_from_user; string mask_tab_filename_from_user; string time_tab_filename_from_user; int read_genes_list; string genes_filename_from_user; char model, mode; string dataset; int stability_version; ParseParms(argc, argv, dataset, n_shuffles, do_force, mode, stability_version, n_genes_from, n_genes_to, zero_transform, model, data_tab_filename_from_user, mask_tab_filename_from_user, time_tab_filename_from_user, read_genes_list, genes_filename_from_user, n_genes_from_user,n_fields_from_user); int n_model_parms = set_n_model_parms(model); float *theta=new float[n_model_parms]; float ** express; bool ** mask; vector lags; int n_genes, n_fields=0; // Read data read_data_mask_timing(data_tab_filename_from_user, mask_tab_filename_from_user, time_tab_filename_from_user, express,mask,lags,n_fields_from_user,n_genes_from_user); n_genes = n_genes_from_user; n_fields = n_fields_from_user; cout << "\t n_fields_from_user = " << n_fields_from_user << endl; cout << "\t n_fields (timing)= " << lags.size() << endl; cout << "\t n_genes_from_user = " << n_genes_from_user << endl; cout << "\t n_genes_from = " << n_genes_from << endl; cout << "\t n_genes_to = " << n_genes_to << endl; cout << "\t n_genes = " << n_genes << endl; vector segs_inds; for (int i=0; i< n_fields_from_user; i++){segs_inds.push_back(i+1);}; int num_extra_zeros = 0; if (model == 'G' || model == 'H' || model == 'I' || model == 'J'){ num_extra_zeros = MODEL_GH_NUM_EXTRA_ZEROS; // model G enforces df(t)/dt=0 at t=0 } cout << "===>num_extra_zeros = " << num_extra_zeros << endl; alloc_xs_ys(lags.size()+zero_transform+num_extra_zeros); //global_ys=new float[lags.size()+zero_transform+num_extra_zeros]; //global_xs=new float[lags.size()+zero_transform+num_extra_zeros]; if (n_genes_to < 0) {n_genes_to = n_genes_from_user;} //cout << "n_genes_from_user = " << n_genes_from_user << endl; //cout << "n_genes_from = " << n_genes_from << endl; //cout << "n_genes_to = " << n_genes_to << endl; //cout << "n_genes = " << n_genes << endl; if(n_genes_to>n_genes) {n_genes_to = n_genes;} cout << "Loop " << n_genes_from << " - " << n_genes_to-1 << " mode = " << mode << endl; double err; int n_valid; for(int i_gene=n_genes_from;i_gene