// Adam Sadovsky // 06/22/05 using namespace std; #include #include #include #ifndef _graph_h #define _graph_h #define CONFIG_FILE "config.dat" #define S_MOTIFS_FILE "motifs_s.dat" #define W_MOTIFS_FILE "motifs_w.dat" #define CMPD "_cmpd" #define S_MODE 0 // strong #define W_MODE 1 // weak typedef unsigned int uint; class Graph { public: struct Edge; struct Node { int nodeId; vector* parentGenes; vector* childGenes; bool visited; // used by recBuildRelns to check for cycles }; struct Edge { int geneId; vector* startNodes; vector* endNodes; // each vector in ancestors/descendants arrays holds ancestors/descendants of distance // (vector's array index) + 1 vector** ancestors; vector** descendants; // parallel vectors that store paths to ancestors/descendants vector > >** ancPaths; vector > >** desPaths; bool visited; // used by recBuildRelns to check for cycles }; struct CmnRel { int nodeId; int dist1; // distance from first gene int index1; // index of node in first gene's anc/des depth=dist1 vector int dist2; int index2; }; // Graph constructor loads config data, builds the graph, and builds ancestors/descendants // arrays for each gene Graph(); // Graph destructor ~Graph(); // Methods for finding motifs in the graph void findAllMotifs(); private: // Reads the thresholds, the data file name, and other config info void readConfig(); // Builds the graph from the data file void readData(); // Inits a node to the given nodeId and sets parent/child vectors to NULL and visited to false void initNode(Graph::Node* node, int nodeId); // Inits a gene to the given geneId, creates ancestors/descendants arrays, and sets each // ancestors/descendants vector to NULL void initGene(Graph::Edge* gene, int geneId); // Builds ancestors/descendants vectors for each gene void buildRelns(); void recBuildDes(Graph::Edge* startGene, Graph::Edge* currGene, int currDist, vector >* currPath); void recBuildAnc(Graph::Edge* startGene, Graph::Edge* currGene, int currDist, vector >* currPath); // motif file helper methods int size_YH(int t, int tail_t); int index_YH(int i, int j, int t, int tail_len); int size_VA(int t); int index_VA(int i, int j, int t); int size_P(); int index_P(int i, int j, int k, int g); // Inits current motif files to NULL void initCurrFiles(); // Closes current motif files void closeCurrFiles(); // Recursively finds cycles containing startNode in the graph void recFindMotifs_C(Graph::Node* startNode, Graph::Node* currNode, int currLength, int* currPath); bool haveCommonDes(Graph::Edge* gene1, Graph::Edge* gene2, int threshold); bool haveCommonAnc(Graph::Edge* gene1, Graph::Edge* gene2, int threshold); vector* findCommonDes(Graph::Edge* gene1, Graph::Edge* gene2, int threshold); vector* findCommonAnc(Graph::Edge* gene1, Graph::Edge* gene2, int threshold); static bool set_merge(set& used_nodes, set& curr_nodes); static bool fpath_merge(const vector >& fpath, set& used_nodes); static bool rpath_merge(const vector >& rpath, set& used_nodes); static bool intersect_V(const vector >& left_path, const vector >& right_path); static bool intersect_A(const vector >& left_path, const vector >& right_path); static bool intersect_P(const vector >& u_left_path, const vector >& u_right_path, const vector >& b_left_path, const vector >& b_right_path); static bool intersect_T(vector > t_path, Edge* end_gene); static bool intersect_y(const vector > left_path, const vector > right_path, const vector > tail_path, Edge* left_end, Edge* right_end, Edge* tail_end); static bool intersect_h(const vector > left_path, const vector > right_path, const vector > tail_path, Edge* left_end, Edge* right_end, Edge* tail_end); void findMotifs_V(); void findMotifs_A(); void findMotifs_P(); void findMotifs_T(); void findMotifs_C(); void findMotifs_vand_vnec(); void findMotifs_aand_anec(); void findMotifs_y(); void findMotifs_h(); void outputMotif_V(int gene1, int gene2, int dist1, int dist2, int nodeId); void outputMotif_A(int gene1, int gene2, int dist1, int dist2, int nodeId); void outputMotif_P(int gene1, int gene2, int dist1a, int dist1v, int dist2a, int dist2v); void outputMotif_T(int startGene, int endGene, int dist); void outputMotif_Tchn(int startGene, int endGene, const vector >& t_path, int chn_length); void outputMotif_C(int length, int* path); void outputMotif_y(int gene1, int gene2, int gene3, int dist1, int dist2, int tail_dist); void outputMotif_h(int gene1, int gene2, int gene3, int dist1, int dist2, int tail_dist); void outputMotif_vand(int gene1, int gene2, int dist1, int dist2); void outputMotif_aand(int gene1, int gene2, int dist1, int dist2); void outputMotif_vnec(int gene1, int gene2, int dist1, int dist2); void outputMotif_anec(int gene1, int gene2, int dist1, int dist2); Node** nodes; Edge** genes; int numNodes; int numGenes; // name of data file string data_file; // ancestors/descendants depth threshold int threshold; // Motif thresholds int v_anc_threshold; int v_des_threshold; int a_anc_threshold; int a_des_threshold; int p_anc_threshold; int p_des_threshold; int t_threshold; int tchn_threshold; int c_threshold; int y_des_threshold; int h_anc_threshold; int y_tail_threshold; int h_tail_threshold; int allow_vand; int allow_vnec; int allow_aand; int allow_anec; // Motif output file info string out_directory; string out_extension; string out_separator; string out_delimiter; // motif output file extension string tmp_extension; // verbosity level int verbose; // motif ofstreams ofstream** v_out; ofstream** a_out; ofstream** p_out; ofstream** t_out; ofstream** tchn_out; ofstream** c_out; ofstream** y_out; ofstream** vand_out; ofstream** vnec_out; ofstream** h_out; ofstream** aand_out; ofstream** anec_out; // motif ofstreams for cmpd files ofstream** v_out_cmpd; ofstream** a_out_cmpd; ofstream** tchn_out_cmpd; // ofstream for file that lists motifs that were found ofstream* motifsFile; int allow_intersect; int graph_mode; // strong or weak }; #endif