// Adam Sadovsky // 06/22/05 using namespace std; #include "graph.h" #include "ConfigFile.h" #include #include #include #include #include #include #include #include #include typedef vector nodeVec; typedef vector edgeVec; typedef nodeVec::iterator nodeVecIter; typedef edgeVec::iterator edgeVecIter; typedef pair pathStep; typedef vector path; typedef path::iterator pathIter; typedef path::const_iterator pathConstIter; typedef vector pathVec; typedef pathVec::iterator pathVecIter; typedef vector cmnRelVec; typedef cmnRelVec::iterator cmnRelVecIter; Graph::Graph() { readConfig(); // set ancestors/descendants depth threshold threshold = -1; if (v_anc_threshold > threshold) threshold = v_anc_threshold; if (v_des_threshold > threshold) threshold = v_des_threshold; if (a_anc_threshold > threshold) threshold = a_anc_threshold; if (a_des_threshold > threshold) threshold = a_des_threshold; if (p_anc_threshold > threshold) threshold = p_anc_threshold; if (p_des_threshold > threshold) threshold = p_des_threshold; if (t_threshold > threshold) threshold = t_threshold; // if (c_threshold > threshold) threshold = c_threshold; if (graph_mode == W_MODE) { if (y_des_threshold > threshold) threshold = y_des_threshold; if (h_anc_threshold > threshold) threshold = h_anc_threshold; } readData(); buildRelns(); } Graph::~Graph() { // delete nodes for (int i = 0; i < numNodes; i++) { if (nodes[i] != NULL) { delete nodes[i]->parentGenes; delete nodes[i]->childGenes; delete nodes[i]; } } delete[] nodes; // delete genes for (int i = 0; i < numGenes; i++) { if (genes[i] != NULL) { delete genes[i]->startNodes; delete genes[i]->endNodes; for (int j = 0; j < threshold; j++) { delete genes[i]->ancestors[j]; delete genes[i]->descendants[j]; } delete[] genes[i]->descendants; for (int j = 0; j < threshold; j++) { delete genes[i]->ancPaths[j]; delete genes[i]->desPaths[j]; } delete[] genes[i]->ancPaths; delete[] genes[i]->desPaths; delete genes[i]; } } delete[] genes; } // ======================================================== // Read config file // ======================================================== void Graph::readConfig() { // open the config file ConfigFile config(CONFIG_FILE); config.readInto(graph_mode, "graph_mode"); // read thresholds config.readInto(v_anc_threshold, "v_anc_threshold"); config.readInto(v_des_threshold, "v_des_threshold"); config.readInto(a_anc_threshold, "a_anc_threshold"); config.readInto(a_des_threshold, "a_des_threshold"); config.readInto(p_anc_threshold, "p_anc_threshold"); config.readInto(p_des_threshold, "p_des_threshold"); config.readInto(t_threshold, "t_threshold"); config.readInto(tchn_threshold, "tchn_threshold"); config.readInto(c_threshold, "c_threshold"); if (graph_mode == W_MODE) { config.readInto(y_des_threshold, "y_des_threshold"); config.readInto(h_anc_threshold, "h_anc_threshold"); config.readInto(y_tail_threshold, "y_tail_threshold"); config.readInto(h_tail_threshold, "h_tail_threshold"); config.readInto(allow_vand, "allow_vand"); config.readInto(allow_vnec, "allow_vnec"); config.readInto(allow_aand, "allow_aand"); config.readInto(allow_anec, "allow_anec"); } // read output info config.readInto(out_directory, "out_directory"); config.readInto(out_extension, "out_extension"); config.readInto(out_separator, "out_separator"); config.readInto(out_delimiter, "out_delimiter"); // fix out_delimiter whitespace const string whitespace_fake[] = {"\\s", "\\n", "\\t", "\\v", "\\r", "\\f"}; const string whitespace_real[] = {" ", "\n", "\t", "\v", "\r", "\f"}; for (int i = 0; i < 6; i++) { while (true) { string::size_type pos = out_delimiter.find(whitespace_fake[i]); if (pos == string::npos) break; out_delimiter.replace(pos, whitespace_fake[i].size(), whitespace_real[i]); } } config.readInto(data_file, "data_file"); config.readInto(tmp_extension, "tmp_extension"); config.readInto(verbose, "verbose"); config.readInto(allow_intersect, "allow_intersect"); } // ======================================================== // Read data file and build graph // ======================================================== void Graph::readData() { if (verbose == 1) cout << "." << flush; else if (verbose >= 2) cout << "graph.cpp: read data file and build graph\n"; // open the data file ifstream infile; infile.open(data_file.c_str()); if (infile.fail()) { cerr << "** Error: graph.cpp couldn't open file " << data_file << " **\n"; exit(1); } // temps to store a gene id and start/end node ids (one line from file) int gene_id; string start_ids, end_ids; // get number of nodes/genes numNodes = 0; numGenes = 0; while (infile >> gene_id >> start_ids >> end_ids) { string::size_type prevIndex = 0, currIndex; while (prevIndex != start_ids.length()) { currIndex = start_ids.find('+', prevIndex + 1); if (currIndex == string::npos) currIndex = start_ids.length(); int start_id = atoi(start_ids.substr(prevIndex, currIndex - prevIndex).c_str()); if (start_id >= numNodes) numNodes = start_id + 1; prevIndex = currIndex; } prevIndex = 0; while (prevIndex != end_ids.length()) { currIndex = end_ids.find('+', prevIndex + 1); if (currIndex == string::npos) currIndex = end_ids.length(); int end_id = atoi(end_ids.substr(prevIndex, currIndex - prevIndex).c_str()); if (end_id >= numNodes) numNodes = end_id + 1; prevIndex = currIndex; } if (gene_id >= numGenes) numGenes = gene_id + 1; } // create nodes/genes arrays nodes = new Graph::Node*[numNodes]; genes = new Graph::Edge*[numGenes]; // initialize nodes/genes elements to NULL for (int i = 0; i < numNodes; i++) { nodes[i] = NULL; } for (int i = 0; i < numGenes; i++) { genes[i] = NULL; } // reset file and read data into nodes/genes arrays infile.clear(); infile.seekg(0, ios::beg); while (infile >> gene_id >> start_ids >> end_ids) { // init gene genes[gene_id] = new Graph::Edge; initGene(genes[gene_id], gene_id); string::size_type prevIndex = 0, currIndex; // init start nodes while (prevIndex != start_ids.length()) { currIndex = start_ids.find('+', prevIndex + 1); if (currIndex == string::npos) currIndex = start_ids.length(); int start_id = atoi(start_ids.substr(prevIndex, currIndex - prevIndex).c_str()); if (nodes[start_id] == NULL) { nodes[start_id] = new Graph::Node; initNode(nodes[start_id], start_id); } // update start node data nodes[start_id]->childGenes->push_back(genes[gene_id]); prevIndex = currIndex; } prevIndex = 0; // init end nodes while (prevIndex != end_ids.length()) { currIndex = end_ids.find('+', prevIndex + 1); if (currIndex == string::npos) currIndex = end_ids.length(); int end_id = atoi(end_ids.substr(prevIndex, currIndex - prevIndex).c_str()); if (nodes[end_id] == NULL) { nodes[end_id] = new Graph::Node; initNode(nodes[end_id], end_id); } // update end node data nodes[end_id]->parentGenes->push_back(genes[gene_id]); prevIndex = currIndex; } prevIndex = 0; // update gene start nodes while (prevIndex != start_ids.length()) { currIndex = start_ids.find('+', prevIndex + 1); if (currIndex == string::npos) currIndex = start_ids.length(); int start_id = atoi(start_ids.substr(prevIndex, currIndex - prevIndex).c_str()); genes[gene_id]->startNodes->push_back(nodes[start_id]); prevIndex = currIndex; } prevIndex = 0; // update gene end nodes while (prevIndex != end_ids.length()) { currIndex = end_ids.find('+', prevIndex + 1); if (currIndex == string::npos) currIndex = end_ids.length(); int end_id = atoi(end_ids.substr(prevIndex, currIndex - prevIndex).c_str()); genes[gene_id]->endNodes->push_back(nodes[end_id]); prevIndex = currIndex; } } // close the file infile.close(); } void Graph::initNode(Graph::Node* node, int nodeId) { node->nodeId = nodeId; node->parentGenes = new edgeVec; node->childGenes = new edgeVec; node->visited = false; } void Graph::initGene(Graph::Edge* gene, int geneId) { gene->geneId = geneId; gene->startNodes = new nodeVec; gene->endNodes = new nodeVec; gene->ancestors = new nodeVec*[threshold]; gene->descendants = new nodeVec*[threshold]; for (int i = 0; i < threshold; i++) { gene->ancestors[i] = new nodeVec; gene->descendants[i] = new nodeVec; } gene->ancPaths = new vector*[threshold]; gene->desPaths = new vector*[threshold]; for (int i = 0; i < threshold; i++) { gene->ancPaths[i] = new vector; gene->desPaths[i] = new vector; } gene->visited = false; } // ======================================================== // Build ancestors/descendants relations // ======================================================== void Graph::buildRelns() { if (verbose == 1) cout << "." << flush; else if (verbose >= 2) cout << "graph.cpp: build ancestors/descendants relations\n"; for (int gene = 0; gene < numGenes; gene++) { if (genes[gene] != NULL) { path* currPath = new path; recBuildDes(genes[gene], genes[gene], 1, currPath); assert(currPath->size() == 0); recBuildAnc(genes[gene], genes[gene], 1, currPath); assert(currPath->size() == 0); delete currPath; } } } void Graph::recBuildDes(Graph::Edge* startGene, Graph::Edge* currGene, int currDist, path* currPath) { if (currDist > threshold) return; // reached threshold distance if (currGene->visited == true) return; currGene->visited = true; for (nodeVecIter i = currGene->endNodes->begin(); i != currGene->endNodes->end(); i++) { if ((*i)->visited == false) { (*i)->visited = true; pathStep currStep; currStep.first = currGene; currStep.second = (*i); currPath->push_back(currStep); startGene->descendants[currDist - 1]->push_back(*i); startGene->desPaths[currDist - 1]->push_back(*currPath); for (edgeVecIter j = (*i)->childGenes->begin(); j != (*i)->childGenes->end(); j++) { recBuildDes(startGene, (*j), currDist + 1, currPath); } (*i)->visited = false; currPath->pop_back(); } } currGene->visited = false; } void Graph::recBuildAnc(Graph::Edge* startGene, Graph::Edge* currGene, int currDist, path* currPath) { if (currDist > threshold) return; // reached threshold distance if (currGene->visited == true) return; currGene->visited = true; for (nodeVecIter i = currGene->startNodes->begin(); i != currGene->startNodes->end(); i++) { if ((*i)->visited == false) { (*i)->visited = true; pathStep currStep; currStep.first = currGene; currStep.second = (*i); currPath->push_back(currStep); startGene->ancestors[currDist - 1]->push_back(*i); startGene->ancPaths[currDist - 1]->push_back(*currPath); for (edgeVecIter j = (*i)->parentGenes->begin(); j != (*i)->parentGenes->end(); j++) { recBuildAnc(startGene, (*j), currDist + 1, currPath); } (*i)->visited = false; currPath->pop_back(); } } currGene->visited = false; } // ======================================================== // Utility methods // ======================================================== // returns true if the file/directory with the given name exists bool exists(string name) { struct stat sbuf; return (stat(name.c_str(), &sbuf) == 0); } void Graph::findAllMotifs() { if (!exists(out_directory)) mkdir(out_directory.c_str(), 0755); // make sure the directory now exists if (!exists(out_directory)) { cerr << "** Error: graph.cpp couldn't mkdir " << out_directory << " **\n"; exit(1); } initCurrFiles(); // init to NULL findMotifs_V(); findMotifs_A(); findMotifs_P(); findMotifs_T(); findMotifs_C(); if (graph_mode == W_MODE) { findMotifs_vand_vnec(); findMotifs_y(); findMotifs_aand_anec(); findMotifs_h(); } closeCurrFiles(); } // returns Y/H size given y_des_threshold/h_anc_threshold=t, tail_threshold=tail_t int Graph::size_YH(int t, int tail_t) { return (tail_t + 1) * size_VA(t); } // returns Y/H index given d1/a1=i, d2/a2=j, v_des_threshold/a_anc_threshold=t, // tail length=tail_len (note tail_len=0 is allowed) int Graph::index_YH(int i, int j, int t, int tail_len) { return tail_len * size_VA(t) + index_VA(i, j, t); } // returns V/A size given v_des_threshold/a_anc_threshold=t int Graph::size_VA(int t) { return (t * t); } // returns V/A index given d1/a1=i, d2/a2=j, v_des_threshold/a_anc_threshold=t int Graph::index_VA(int i, int j, int t) { return ((((i - 1) * t) + j) - 1); } // returns P size int Graph::size_P() { int d = p_des_threshold; int a = p_anc_threshold; return (a * d * a * d); } // returns P index given a1=i, d1=j, a2=k, d2=g int Graph::index_P(int i, int j, int k, int g) { int d = p_des_threshold; int a = p_anc_threshold; return ((((i - 1) * d * a * d) + ((j - 1) * a * d) + ((k - 1) * d) + g) - 1); } void Graph::initCurrFiles() { if (verbose == 1) cout << "." << flush; else if (verbose >= 2) cout << "graph.cpp: init current motif files\n"; v_out = new ofstream*[size_VA(v_des_threshold)]; v_out_cmpd = new ofstream*[size_VA(v_des_threshold)]; a_out = new ofstream*[size_VA(a_anc_threshold)]; a_out_cmpd = new ofstream*[size_VA(a_anc_threshold)]; p_out = new ofstream*[size_P()]; t_out = new ofstream*[t_threshold]; if (tchn_threshold >= 2) tchn_out = new ofstream*[tchn_threshold - 1]; if (tchn_threshold >= 2) tchn_out_cmpd = new ofstream*[tchn_threshold - 1]; c_out = new ofstream*[c_threshold]; // init v/v_cmpd motif files to NULL for (int i = 0; i < size_VA(v_des_threshold); i++) { v_out[i] = NULL; v_out_cmpd[i] = NULL; } // init a/a_cmpd motif files to NULL for (int i = 0; i < size_VA(a_anc_threshold); i++) { a_out[i] = NULL; a_out_cmpd[i] = NULL; } // init p motif files to NULL for (int i = 0; i < size_P(); i++) { p_out[i] = NULL; } // init t motif files to NULL for (int i = 0; i < t_threshold; i++) { t_out[i] = NULL; } // init tchn/tchn_cmpd motif files to NULL for (int i = 0; i < tchn_threshold - 1; i++) { tchn_out[i] = NULL; tchn_out_cmpd[i] = NULL; } // init c motif files to NULL for (int i = 0; i < c_threshold; i++) { c_out[i] = NULL; } if (graph_mode == W_MODE) { y_out = new ofstream*[size_YH(y_des_threshold, y_tail_threshold)]; vand_out = new ofstream*[size_VA(1)]; vnec_out = new ofstream*[size_VA(1)]; h_out = new ofstream*[size_YH(h_anc_threshold, h_tail_threshold)]; aand_out = new ofstream*[size_VA(1)]; anec_out = new ofstream*[size_VA(1)]; // init y/vand/vnec motif files to NULL for (int i = 0; i < size_YH(y_des_threshold, y_tail_threshold); i++) { y_out[i] = NULL; } for (int i = 0; i < size_VA(1); i++) { vand_out[i] = NULL; vnec_out[i] = NULL; } // init h/aand/anec motif files to NULL for (int i = 0; i < size_YH(h_anc_threshold, h_tail_threshold); i++) { h_out[i] = NULL; } for (int i = 0; i < size_VA(1); i++) { aand_out[i] = NULL; anec_out[i] = NULL; } } string motifsFile_name = out_directory + (graph_mode == S_MODE ? string(S_MOTIFS_FILE) : string(W_MOTIFS_FILE)); motifsFile = new ofstream(motifsFile_name.c_str()); if (motifsFile->fail()) { cerr << "** Error: graph.cpp couldn't open file " << motifsFile_name << " **\n"; exit(1); } } void Graph::closeCurrFiles() { if (verbose == 1) cout << "." << flush; else if (verbose >= 2) cout << "graph.cpp: close current motif files\n"; // close v/v_cmpd motif files for (int i = 0; i < size_VA(v_des_threshold); i++) { if (v_out[i] != NULL) { v_out[i]->close(); delete v_out[i]; v_out_cmpd[i]->close(); delete v_out_cmpd[i]; } } // close a/a_cmpd motif files for (int i = 0; i < size_VA(a_anc_threshold); i++) { if (a_out[i] != NULL) { a_out[i]->close(); delete a_out[i]; a_out_cmpd[i]->close(); delete a_out_cmpd[i]; } } // close p motif files for (int i = 0; i < size_P(); i++) { if (p_out[i] != NULL) { p_out[i]->close(); delete p_out[i]; } } // close t motif files for (int i = 0; i < t_threshold; i++) { if (t_out[i] != NULL) { t_out[i]->close(); delete t_out[i]; } } // close tchn/tchn_cmpd motif files for (int i = 0; i < tchn_threshold - 1; i++) { if (tchn_out[i] != NULL) { tchn_out[i]->close(); delete tchn_out[i]; tchn_out_cmpd[i]->close(); delete tchn_out_cmpd[i]; } } // close c motif files for (int i = 0; i < c_threshold; i++) { if (c_out[i] != NULL) { c_out[i]->close(); delete c_out[i]; } } if (graph_mode == W_MODE) { // close y/vand/vnec motif files for (int i = 0; i < size_YH(y_des_threshold, y_tail_threshold); i++) { if (y_out[i] != NULL) { y_out[i]->close(); delete y_out[i]; } } for (int i = 0; i < size_VA(1); i++) { if (vand_out[i] != NULL) { vand_out[i]->close(); delete vand_out[i]; } if (vnec_out[i] != NULL) { vnec_out[i]->close(); delete vnec_out[i]; } } // close h/aand/anec motif files for (int i = 0; i < size_YH(h_anc_threshold, h_tail_threshold); i++) { if (h_out[i] != NULL) { h_out[i]->close(); delete h_out[i]; } } for (int i = 0; i < size_VA(1); i++) { if (aand_out[i] != NULL) { aand_out[i]->close(); delete aand_out[i]; } if (anec_out[i] != NULL) { anec_out[i]->close(); delete anec_out[i]; } } } motifsFile->close(); delete motifsFile; } bool Graph::haveCommonDes(Graph::Edge* gene1, Graph::Edge* gene2, int threshold) { for (int i = 0; i < threshold; i++) { if (gene1->descendants[i]->size() == 0) break; for (int j = 0; j < threshold; j++) { if (gene2->descendants[j]->size() == 0) break; for (nodeVecIter g = gene1->descendants[i]->begin(); g != gene1->descendants[i]->end(); g++) { for (nodeVecIter h = gene2->descendants[j]->begin(); h != gene2->descendants[j]->end(); h++) { if ((*g) == (*h)) { return true; } } } } } return false; } bool Graph::haveCommonAnc(Graph::Edge* gene1, Graph::Edge* gene2, int threshold) { for (int i = 0; i < threshold; i++) { if (gene1->ancestors[i]->size() == 0) break; for (int j = 0; j < threshold; j++) { if (gene2->ancestors[j]->size() == 0) break; for (nodeVecIter g = gene1->ancestors[i]->begin(); g != gene1->ancestors[i]->end(); g++) { for (nodeVecIter h = gene2->ancestors[j]->begin(); h != gene2->ancestors[j]->end(); h++) { if ((*g) == (*h)) { return true; } } } } } return false; } cmnRelVec* Graph::findCommonDes(Graph::Edge* gene1, Graph::Edge* gene2, int threshold) { cmnRelVec* relVec = new cmnRelVec; for (int i = 0; i < threshold; i++) { if (gene1->descendants[i]->size() == 0) break; for (int j = 0; j < threshold; j++) { if (gene2->descendants[j]->size() == 0) break; for (uint g = 0; g < gene1->descendants[i]->size(); g++) { for (uint h = 0; h < gene2->descendants[j]->size(); h++) { if (gene1->descendants[i]->at(g) == gene2->descendants[j]->at(h)) { Graph::CmnRel cr; cr.nodeId = gene1->descendants[i]->at(g)->nodeId; cr.dist1 = i + 1; cr.index1 = g; cr.dist2 = j + 1; cr.index2 = h; relVec->push_back(cr); } } } } } return relVec; } cmnRelVec* Graph::findCommonAnc(Graph::Edge* gene1, Graph::Edge* gene2, int threshold) { cmnRelVec* relVec = new cmnRelVec; for (int i = 0; i < threshold; i++) { if (gene1->ancestors[i]->size() == 0) break; for (int j = 0; j < threshold; j++) { if (gene2->ancestors[j]->size() == 0) break; for (uint g = 0; g < gene1->ancestors[i]->size(); g++) { for (uint h = 0; h < gene2->ancestors[j]->size(); h++) { if (gene1->ancestors[i]->at(g) == gene2->ancestors[j]->at(h)) { Graph::CmnRel cr; cr.nodeId = gene1->ancestors[i]->at(g)->nodeId; cr.dist1 = i + 1; cr.index1 = g; cr.dist2 = j + 1; cr.index2 = h; relVec->push_back(cr); } } } } } return relVec; } // ======================================================== // Motif intersection-checking methods // ======================================================== bool Graph::set_merge(set& used_nodes, set& curr_nodes) { // check if used_nodes contains any nodes from curr_nodes for (set::iterator it = curr_nodes.begin(); it != curr_nodes.end(); it++) { if (used_nodes.find(*it) != used_nodes.end()) return true; } used_nodes.insert(curr_nodes.begin(), curr_nodes.end()); return false; } bool Graph::fpath_merge(const path& fpath, set& used_nodes) { // forward path (edges point away from start) pathConstIter prev_step = fpath.begin(); for (pathConstIter curr_step = fpath.begin(); curr_step != fpath.end(); curr_step++) { if (curr_step == fpath.begin()) continue; assert(prev_step != curr_step); set curr_nodes; curr_nodes.insert((*prev_step).first->endNodes->begin(), (*prev_step).first->endNodes->end()); curr_nodes.insert((*curr_step).first->startNodes->begin(), (*curr_step).first->startNodes->end()); assert(curr_nodes.find((*prev_step).second) != curr_nodes.end()); if (set_merge(used_nodes, curr_nodes)) return true; prev_step = curr_step; } return false; } bool Graph::rpath_merge(const path& rpath, set& used_nodes) { // reverse path (edges point towards start) pathConstIter prev_step = rpath.begin(); for (pathConstIter curr_step = rpath.begin(); curr_step != rpath.end(); curr_step++) { if (curr_step == rpath.begin()) continue; assert(prev_step != curr_step); set curr_nodes; curr_nodes.insert((*prev_step).first->startNodes->begin(), (*prev_step).first->startNodes->end()); curr_nodes.insert((*curr_step).first->endNodes->begin(), (*curr_step).first->endNodes->end()); assert(curr_nodes.find((*prev_step).second) != curr_nodes.end()); if (set_merge(used_nodes, curr_nodes)) return true; prev_step = curr_step; } return false; } bool Graph::intersect_V(const path& left_path, const path& right_path) { // sanity check assert(left_path[left_path.size() - 1].second == right_path[right_path.size() - 1].second); set used_nodes; if (fpath_merge(left_path, used_nodes)) return true; // inside left path if (fpath_merge(right_path, used_nodes)) return true; // inside right path // intersection of left and right paths set curr_nodes; curr_nodes.insert(left_path[left_path.size() - 1].first->endNodes->begin(), left_path[left_path.size() - 1].first->endNodes->end()); curr_nodes.insert(right_path[right_path.size() - 1].first->endNodes->begin(), right_path[right_path.size() - 1].first->endNodes->end()); assert(curr_nodes.find(left_path[left_path.size() - 1].second) != curr_nodes.end()); if (set_merge(used_nodes, curr_nodes)) return true; // top of left path curr_nodes.clear(); curr_nodes.insert(left_path[0].first->startNodes->begin(), left_path[0].first->startNodes->end()); if (set_merge(used_nodes, curr_nodes)) return true; // top of right path curr_nodes.clear(); curr_nodes.insert(right_path[0].first->startNodes->begin(), right_path[0].first->startNodes->end()); if (set_merge(used_nodes, curr_nodes)) return true; return false; } bool Graph::intersect_A(const path& left_path, const path& right_path) { // sanity check assert(left_path[left_path.size() - 1].second == right_path[right_path.size() - 1].second); set used_nodes; if (rpath_merge(left_path, used_nodes)) return true; // inside left path if (rpath_merge(right_path, used_nodes)) return true; // inside right path // intersection of left and right paths set curr_nodes; curr_nodes.insert(left_path[left_path.size() - 1].first->startNodes->begin(), left_path[left_path.size() - 1].first->startNodes->end()); curr_nodes.insert(right_path[right_path.size() - 1].first->startNodes->begin(), right_path[right_path.size() - 1].first->startNodes->end()); assert(curr_nodes.find(left_path[left_path.size() - 1].second) != curr_nodes.end()); if (set_merge(used_nodes, curr_nodes)) return true; // bottom of left path curr_nodes.clear(); curr_nodes.insert(left_path[0].first->endNodes->begin(), left_path[0].first->endNodes->end()); if (set_merge(used_nodes, curr_nodes)) return true; // bottom of right path curr_nodes.clear(); curr_nodes.insert(right_path[0].first->endNodes->begin(), right_path[0].first->endNodes->end()); if (set_merge(used_nodes, curr_nodes)) return true; return false; } bool Graph::intersect_P(const path& u_left_path, const path& u_right_path, const path& b_left_path, const path& b_right_path) { // sanity check assert(u_left_path[u_left_path.size() - 1].second == u_right_path[u_right_path.size() - 1].second); assert(b_left_path[b_left_path.size() - 1].second == b_right_path[b_right_path.size() - 1].second); assert(u_left_path[0].first == b_left_path[0].first); assert(u_right_path[0].first == b_right_path[0].first); set used_nodes; if (rpath_merge(u_left_path, used_nodes)) return true; // inside upper left path if (rpath_merge(u_right_path, used_nodes)) return true; // inside upper right path if (fpath_merge(b_left_path, used_nodes)) return true; // inside bottom left path if (fpath_merge(b_right_path, used_nodes)) return true; // inside bottom right path // intersection of upper left and upper right paths set curr_nodes; curr_nodes.insert(u_left_path[u_left_path.size() - 1].first->startNodes->begin(), u_left_path[u_left_path.size() - 1].first->startNodes->end()); curr_nodes.insert(u_right_path[u_right_path.size() - 1].first->startNodes->begin(), u_right_path[u_right_path.size() - 1].first->startNodes->end()); assert(curr_nodes.find(u_left_path[u_left_path.size() - 1].second) != curr_nodes.end()); if (set_merge(used_nodes, curr_nodes)) return true; // intersection of bottom left and bottom right paths curr_nodes.clear(); curr_nodes.insert(b_left_path[b_left_path.size() - 1].first->endNodes->begin(), b_left_path[b_left_path.size() - 1].first->endNodes->end()); curr_nodes.insert(b_right_path[b_right_path.size() - 1].first->endNodes->begin(), b_right_path[b_right_path.size() - 1].first->endNodes->end()); assert(curr_nodes.find(b_left_path[b_left_path.size() - 1].second) != curr_nodes.end()); if (set_merge(used_nodes, curr_nodes)) return true; return false; } bool Graph::intersect_T(path t_path, Graph::Edge* end_gene) { set used_nodes; // add dummy pathStep with end_gene (this is why t_path isn't passed by reference) pathStep dummy_step; dummy_step.first = end_gene; dummy_step.second = NULL; t_path.push_back(dummy_step); if (fpath_merge(t_path, used_nodes)) return true; // inside path // top of path set curr_nodes; curr_nodes.insert(t_path[0].first->startNodes->begin(), t_path[0].first->startNodes->end()); if (set_merge(used_nodes, curr_nodes)) return true; // bottom of path curr_nodes.clear(); curr_nodes.insert(t_path[t_path.size() - 1].first->endNodes->begin(), t_path[t_path.size() - 1].first->endNodes->end()); if (set_merge(used_nodes, curr_nodes)) return true; return false; } bool Graph::intersect_y(path left_path, path right_path, path tail_path, Graph::Edge* left_end, Graph::Edge* right_end, Graph::Edge* tail_end) { set used_nodes; // add dummy pathSteps with end genes (this is why the paths aren't passed by reference) pathStep dummy_step; dummy_step.first = left_end; dummy_step.second = NULL; left_path.push_back(dummy_step); dummy_step.first = right_end; dummy_step.second = NULL; right_path.push_back(dummy_step); dummy_step.first = tail_end; dummy_step.second = NULL; tail_path.push_back(dummy_step); uint left_path_size = left_path.size(); left_path.erase(left_path.begin()); assert(left_path.size() == left_path_size - 1); right_path.erase(right_path.begin()); if (rpath_merge(left_path, used_nodes)) return true; // inside left path if (rpath_merge(right_path, used_nodes)) return true; // inside right path if (fpath_merge(tail_path, used_nodes)) return true; // inside tail path // intersection of left, right, and tail paths set curr_nodes; curr_nodes.insert(left_path[0].first->endNodes->begin(), left_path[0].first->endNodes->end()); curr_nodes.insert(right_path[0].first->endNodes->begin(), right_path[0].first->endNodes->end()); curr_nodes.insert(tail_path[0].first->startNodes->begin(), tail_path[0].first->startNodes->end()); if (set_merge(used_nodes, curr_nodes)) return true; // top of left path curr_nodes.clear(); curr_nodes.insert(left_path[left_path.size() - 1].first->startNodes->begin(), left_path[left_path.size() - 1].first->startNodes->end()); if (set_merge(used_nodes, curr_nodes)) return true; // top of right path curr_nodes.clear(); curr_nodes.insert(right_path[right_path.size() - 1].first->startNodes->begin(), right_path[right_path.size() - 1].first->startNodes->end()); if (set_merge(used_nodes, curr_nodes)) return true; // bottom of tail path curr_nodes.clear(); curr_nodes.insert(tail_path[tail_path.size() - 1].first->endNodes->begin(), tail_path[tail_path.size() - 1].first->endNodes->end()); if (set_merge(used_nodes, curr_nodes)) return true; return false; } bool Graph::intersect_h(path left_path, path right_path, path tail_path, Graph::Edge* left_end, Graph::Edge* right_end, Graph::Edge* tail_end) { set used_nodes; // add dummy pathSteps with end genes (this is why the paths aren't passed by reference) pathStep dummy_step; dummy_step.first = left_end; dummy_step.second = NULL; left_path.push_back(dummy_step); dummy_step.first = right_end; dummy_step.second = NULL; right_path.push_back(dummy_step); dummy_step.first = tail_end; dummy_step.second = NULL; tail_path.push_back(dummy_step); uint left_path_size = left_path.size(); left_path.erase(left_path.begin()); assert(left_path.size() == left_path_size - 1); right_path.erase(right_path.begin()); if (fpath_merge(left_path, used_nodes)) return true; // inside left path if (fpath_merge(right_path, used_nodes)) return true; // inside right path if (rpath_merge(tail_path, used_nodes)) return true; // inside tail path // intersection of left, right, and tail paths set curr_nodes; curr_nodes.insert(left_path[0].first->startNodes->begin(), left_path[0].first->startNodes->end()); curr_nodes.insert(right_path[0].first->startNodes->begin(), right_path[0].first->startNodes->end()); curr_nodes.insert(tail_path[0].first->endNodes->begin(), tail_path[0].first->endNodes->end()); if (set_merge(used_nodes, curr_nodes)) return true; // top of left path curr_nodes.clear(); curr_nodes.insert(left_path[left_path.size() - 1].first->endNodes->begin(), left_path[left_path.size() - 1].first->endNodes->end()); if (set_merge(used_nodes, curr_nodes)) return true; // top of right path curr_nodes.clear(); curr_nodes.insert(right_path[right_path.size() - 1].first->endNodes->begin(), right_path[right_path.size() - 1].first->endNodes->end()); if (set_merge(used_nodes, curr_nodes)) return true; // bottom of tail path curr_nodes.clear(); curr_nodes.insert(tail_path[tail_path.size() - 1].first->startNodes->begin(), tail_path[tail_path.size() - 1].first->startNodes->end()); if (set_merge(used_nodes, curr_nodes)) return true; return false; } // ======================================================== // Motif search and output methods // ======================================================== void Graph::findMotifs_V() { if (v_des_threshold <= 0) return; if (verbose == 1) cout << "." << flush; else if (verbose >= 2) cout << "graph.cpp: find " << (graph_mode == S_MODE ? "V" : "v") << " motifs\n"; for (int gene1 = 0; gene1 < numGenes - 1; gene1++) { for (int gene2 = gene1 + 1; gene2 < numGenes; gene2++) { if (!haveCommonAnc(genes[gene1], genes[gene2], v_anc_threshold)) { cmnRelVec* cd = findCommonDes(genes[gene1], genes[gene2], v_des_threshold); for (cmnRelVecIter it = cd->begin(); it != cd->end(); it++) { path& left_path = genes[gene1]->desPaths[(*it).dist1 - 1]->at((*it).index1); path& right_path = genes[gene2]->desPaths[(*it).dist2 - 1]->at((*it).index2); if (allow_intersect || !intersect_V(left_path, right_path)) { outputMotif_V(gene1, gene2, (*it).dist1, (*it).dist2, (*it).nodeId); outputMotif_V(gene2, gene1, (*it).dist2, (*it).dist1, (*it).nodeId); } } delete cd; } } } } void Graph::outputMotif_V(int gene1, int gene2, int dist1, int dist2, int nodeId) { int v_index = index_VA(dist1, dist2, v_des_threshold); if (v_out[v_index] == NULL) { ostringstream oss1, oss2; oss1 << dist1; oss2 << dist2; string motifFile = (graph_mode == S_MODE ? string("V") : string("v")) + out_separator + oss1.str() + out_separator + oss2.str(); string motifFile_tmp = out_directory + motifFile + tmp_extension; string motifFile_out = out_directory + motifFile + out_extension; v_out[v_index] = new ofstream(motifFile_tmp.c_str()); if (v_out[v_index]->fail()) { cerr << "** Error: graph.cpp couldn't open file " << motifFile_tmp << " **\n"; exit(1); } if (verbose >= 2) cout << "\t> " << motifFile_out << "\n"; *motifsFile << motifFile << "\n"; motifFile = (graph_mode == S_MODE ? string("V") : string("v")) + out_separator + oss1.str() + out_separator + oss2.str() + CMPD; motifFile_tmp = out_directory + motifFile + tmp_extension; motifFile_out = out_directory + motifFile + out_extension; v_out_cmpd[v_index] = new ofstream(motifFile_tmp.c_str()); if (v_out_cmpd[v_index]->fail()) { cerr << "** Error: graph.cpp couldn't open file " << motifFile_tmp << " **\n"; exit(1); } if (verbose >= 2) cout << "\t> " << motifFile_out << "\n"; *motifsFile << motifFile << "\n"; } *v_out[v_index] << gene1 << out_delimiter << gene2 << "\n"; *v_out_cmpd[v_index] << gene1 << out_delimiter << nodeId << out_delimiter << gene2 << "\n"; } void Graph::findMotifs_A() { if (a_anc_threshold <= 0) return; if (verbose == 1) cout << "." << flush; else if (verbose >= 2) cout << "graph.cpp: find " << (graph_mode == S_MODE ? "A" : "a") << " motifs\n"; for (int gene1 = 0; gene1 < numGenes - 1; gene1++) { for (int gene2 = gene1 + 1; gene2 < numGenes; gene2++) { if (!haveCommonDes(genes[gene1], genes[gene2], a_des_threshold)) { cmnRelVec* ca = findCommonAnc(genes[gene1], genes[gene2], a_anc_threshold); for (cmnRelVecIter it = ca->begin(); it != ca->end(); it++) { path& left_path = genes[gene1]->ancPaths[(*it).dist1 - 1]->at((*it).index1); path& right_path = genes[gene2]->ancPaths[(*it).dist2 - 1]->at((*it).index2); if (allow_intersect || !intersect_A(left_path, right_path)) { outputMotif_A(gene1, gene2, (*it).dist1, (*it).dist2, (*it).nodeId); outputMotif_A(gene2, gene1, (*it).dist2, (*it).dist1, (*it).nodeId); } } delete ca; } } } } void Graph::outputMotif_A(int gene1, int gene2, int dist1, int dist2, int nodeId) { int a_index = index_VA(dist1, dist2, a_anc_threshold); if (a_out[a_index] == NULL) { ostringstream oss1, oss2; oss1 << dist1; oss2 << dist2; string motifFile = (graph_mode == S_MODE ? string("A") : string("a")) + out_separator + oss1.str() + out_separator + oss2.str(); string motifFile_tmp = out_directory + motifFile + tmp_extension; string motifFile_out = out_directory + motifFile + out_extension; a_out[a_index] = new ofstream(motifFile_tmp.c_str()); if (a_out[a_index]->fail()) { cerr << "** Error: graph.cpp couldn't open file " << motifFile_tmp << " **\n"; exit(1); } if (verbose >= 2) cout << "\t> " << motifFile_out << "\n"; *motifsFile << motifFile << "\n"; motifFile = (graph_mode == S_MODE ? string("A") : string("a")) + out_separator + oss1.str() + out_separator + oss2.str() + CMPD; motifFile_tmp = out_directory + motifFile + tmp_extension; motifFile_out = out_directory + motifFile + out_extension; a_out_cmpd[a_index] = new ofstream(motifFile_tmp.c_str()); if (a_out_cmpd[a_index]->fail()) { cerr << "** Error: graph.cpp couldn't open file " << motifFile_tmp << " **\n"; exit(1); } if (verbose >= 2) cout << "\t> " << motifFile_out << "\n"; *motifsFile << motifFile << "\n"; } *a_out[a_index] << gene1 << out_delimiter << gene2 << "\n"; *a_out_cmpd[a_index] << gene1 << out_delimiter << nodeId << out_delimiter << gene2 << "\n"; } void Graph::findMotifs_P() { if ((p_anc_threshold <= 0) || (p_des_threshold <= 0)) return; if (verbose == 1) cout << "." << flush; else if (verbose >= 2) cout << "graph.cpp: find " << (graph_mode == S_MODE ? "P" : "p") << " motifs\n"; for (int gene1 = 0; gene1 < numGenes - 1; gene1++) { for (int gene2 = gene1 + 1; gene2 < numGenes; gene2++) { cmnRelVec* ca = findCommonAnc(genes[gene1], genes[gene2], p_anc_threshold); cmnRelVec* cd = findCommonDes(genes[gene1], genes[gene2], p_des_threshold); for (cmnRelVecIter i = ca->begin(); i != ca->end(); i++) { for (cmnRelVecIter j = cd->begin(); j != cd->end(); j++) { path& u_left_path = genes[gene1]->ancPaths[(*i).dist1 - 1]->at((*i).index1); path& u_right_path = genes[gene2]->ancPaths[(*i).dist2 - 1]->at((*i).index2); path& b_left_path = genes[gene1]->desPaths[(*j).dist1 - 1]->at((*j).index1); path& b_right_path = genes[gene2]->desPaths[(*j).dist2 - 1]->at((*j).index2); if (allow_intersect || !intersect_P(u_left_path, u_right_path, b_left_path, b_right_path)) { outputMotif_P(gene1, gene2, (*i).dist1, (*j).dist1, (*i).dist2, (*j).dist2); outputMotif_P(gene2, gene1, (*i).dist2, (*j).dist2, (*i).dist1, (*j).dist1); } } } delete ca; delete cd; } } } void Graph::outputMotif_P(int gene1, int gene2, int dist1a, int dist1v, int dist2a, int dist2v) { int p_index = index_P(dist1a, dist1v, dist2a, dist2v); if (p_out[p_index] == NULL) { ostringstream oss1, oss2, oss3, oss4; oss1 << dist1a; oss2 << dist1v; oss3 << dist2a; oss4 << dist2v; string motifFile = (graph_mode == S_MODE ? string("P") : string("p")) + out_separator + oss1.str() + out_separator + oss2.str() + out_separator + oss3.str() + out_separator + oss4.str(); string motifFile_tmp = out_directory + motifFile + tmp_extension; string motifFile_out = out_directory + motifFile + out_extension; p_out[p_index] = new ofstream(motifFile_tmp.c_str()); if (p_out[p_index]->fail()) { cerr << "** Error: graph.cpp couldn't open file " << motifFile_tmp << " **\n"; exit(1); } if (verbose >= 2) cout << "\t> " << motifFile_out << "\n"; *motifsFile << motifFile << "\n"; } *p_out[p_index] << gene1 << out_delimiter << gene2 << "\n"; } void Graph::findMotifs_T() { if (t_threshold > 0 || tchn_threshold > 1) { if (verbose == 1) cout << "." << flush; else if (verbose >= 2) cout << "graph.cpp: find " << (graph_mode == S_MODE ? "T" : "t") << " motifs\n"; // for each gene output all children of descendants of distance <= t_threshold for (int gene = 0; gene < numGenes; gene++) { if (genes[gene] != NULL) { for (int i = 0; (i < t_threshold) || (i < tchn_threshold - 1); i++) { if (genes[gene]->descendants[i]->size() == 0) break; for (uint j = 0; j < genes[gene]->descendants[i]->size(); j++) { // iterate over all path-ending genes for (edgeVecIter k = genes[gene]->descendants[i]->at(j)->childGenes->begin(); k != genes[gene]->descendants[i]->at(j)->childGenes->end(); k++) { path t_path = genes[gene]->desPaths[i]->at(j); if (allow_intersect || !intersect_T(t_path, (*k))) { if (i < t_threshold) { outputMotif_T(genes[gene]->geneId, (*k)->geneId, i + 1); } if (i < tchn_threshold - 1) { outputMotif_Tchn(genes[gene]->geneId, (*k)->geneId, genes[gene]->desPaths[i]->at(j), i + 2); } } } } } } } } } void Graph::outputMotif_T(int startGene, int endGene, int dist) { int t_index = dist - 1; if (t_out[t_index] == NULL) { ostringstream oss; oss << dist; string motifFile = (graph_mode == S_MODE ? string("T") : string("t")) + out_separator + oss.str(); string motifFile_tmp = out_directory + motifFile + tmp_extension; string motifFile_out = out_directory + motifFile + out_extension; t_out[t_index] = new ofstream(motifFile_tmp.c_str()); if (t_out[t_index]->fail()) { cerr << "** Error: graph.cpp couldn't open file " << motifFile_tmp << " **\n"; exit(1); } if (verbose >= 2) cout << "\t> " << motifFile_out << "\n"; *motifsFile << motifFile << "\n"; } *t_out[t_index] << startGene << out_delimiter << endGene << "\n"; } void Graph::outputMotif_Tchn(int startGene, int endGene, const path& t_path, int chn_length) { int tchn_index = chn_length - 2; if (tchn_out[tchn_index] == NULL) { ostringstream oss; oss << chn_length; string motifFile = (graph_mode == S_MODE ? string("T") : string("t")) + string("chn") + out_separator + oss.str(); string motifFile_tmp = out_directory + motifFile + tmp_extension; string motifFile_out = out_directory + motifFile + out_extension; tchn_out[tchn_index] = new ofstream(motifFile_tmp.c_str()); if (tchn_out[tchn_index]->fail()) { cerr << "** Error: graph.cpp couldn't open file " << motifFile_tmp << " **\n"; exit(1); } if (verbose >= 2) cout << "\t> " << motifFile_out << "\n"; *motifsFile << motifFile << "\n"; motifFile = (graph_mode == S_MODE ? string("T") : string("t")) + string("chn") + out_separator + oss.str() + CMPD; motifFile_tmp = out_directory + motifFile + tmp_extension; motifFile_out = out_directory + motifFile + out_extension; tchn_out_cmpd[tchn_index] = new ofstream(motifFile_tmp.c_str()); if (tchn_out_cmpd[tchn_index]->fail()) { cerr << "** Error: graph.cpp couldn't open file " << motifFile_tmp << " **\n"; exit(1); } if (verbose >= 2) cout << "\t> " << motifFile_out << "\n"; *motifsFile << motifFile << "\n"; } for (pathConstIter it = t_path.begin(); it != t_path.end(); it++) { *tchn_out[tchn_index] << (*it).first->geneId << out_delimiter; *tchn_out_cmpd[tchn_index] << (*it).first->geneId << out_delimiter << (*it).second->nodeId << out_delimiter; } *tchn_out[tchn_index] << endGene << "\n"; *tchn_out_cmpd[tchn_index] << endGene << "\n";; } void Graph::findMotifs_C() { if (c_threshold <= 0) return; if (verbose == 1) cout << "." << flush; else if (verbose >= 2) cout << "graph.cpp: find " << (graph_mode == S_MODE ? "C" : "c") << " motifs\n"; int* empty_path = new int[c_threshold]; // iterate through all nodes and for each node, search for cycles by recursively traversing // the graph and checking if currNode equals startNode for (int node = 0; node < numNodes; node++) { recFindMotifs_C(nodes[node], nodes[node], 0, empty_path); // we've found all cycles that use nodes[node], so mark it as visited to prevent the same // cycle from being found again nodes[node]->visited = true; } delete[] empty_path; // restore node->visited to false for each node for (int node = 0; node < numNodes; node++) { nodes[node]->visited = false; } } void Graph::recFindMotifs_C(Graph::Node* startNode, Graph::Node* currNode, int currLength, int* currPath) { if (currLength > c_threshold) return; // reached threshold length if (currNode->visited == true) { if ((startNode == currNode) && (currLength > 0)) { // found a cycle containing startNode outputMotif_C(currLength, currPath); } return; } currLength++; currNode->visited = true; for (edgeVecIter it = currNode->childGenes->begin(); it != currNode->childGenes->end(); it++) { if ((*it)->visited == false) { (*it)->visited = true; currPath[currLength - 1] = (*it)->geneId; for (nodeVecIter j = (*it)->endNodes->begin(); j != (*it)->endNodes->end(); j++) { recFindMotifs_C(startNode, (*j), currLength, currPath); } (*it)->visited = false; } } currNode->visited = false; } void Graph::outputMotif_C(int length, int* path) { int c_index = length - 1; if (c_out[c_index] == NULL) { ostringstream oss; oss << length; string motifFile = (graph_mode == S_MODE ? string("C") : string("c")) + out_separator + oss.str(); string motifFile_tmp = out_directory + motifFile + tmp_extension; string motifFile_out = out_directory + motifFile + out_extension; c_out[c_index] = new ofstream(motifFile_tmp.c_str()); if (c_out[c_index]->fail()) { cerr << "** Error: graph.cpp couldn't open file " << motifFile_tmp << " **\n"; exit(1); } if (verbose >= 2) cout << "\t> " << motifFile_out << "\n"; *motifsFile << motifFile << "\n"; } for (int i = 0; i < length - 1; i++) { *c_out[c_index] << path[i] << out_delimiter; } *c_out[c_index] << path[length - 1] << "\n"; } void Graph::findMotifs_vand_vnec() { if (!allow_vand && !allow_vnec) return; if (verbose == 1) cout << "." << flush; else if (verbose >= 2) cout << "graph.cpp: find vand/vnec motifs\n"; for (int gene = 0; gene < numGenes; gene++) { if (genes[gene] != NULL) { for (nodeVecIter i = genes[gene]->startNodes->begin(); i != genes[gene]->startNodes->end(); i++) { for (nodeVecIter j(i); j != genes[gene]->startNodes->end(); j++) { if ((*i) != (*j)) { for (edgeVecIter g = (*i)->parentGenes->begin(); g != (*i)->parentGenes->end(); g++) { for (edgeVecIter h = (*j)->parentGenes->begin(); h != (*j)->parentGenes->end(); h++) { if (allow_vand) { outputMotif_vand((*g)->geneId, (*h)->geneId, 1, 1); outputMotif_vand((*h)->geneId, (*g)->geneId, 1, 1); } if (allow_vnec) { if ((*i)->parentGenes->size() == 1 && (*j)->parentGenes->size() == 1) { outputMotif_vnec((*g)->geneId, (*h)->geneId, 1, 1); outputMotif_vnec((*g)->geneId, (*h)->geneId, 1, 1); } } } } } } } } } } void Graph::findMotifs_y() { if (y_des_threshold <= 0 || y_tail_threshold < 0) return; if (verbose == 1) cout << "." << flush; else if (verbose >= 2) cout << "graph.cpp: find y motifs\n"; for (int gene = 0; gene < numGenes; gene++) { if (genes[gene] != NULL) { for (int i = 0; i < y_des_threshold; i++) { if (genes[gene]->ancestors[i]->size() == 0) break; for (uint j = 0; j < genes[gene]->ancestors[i]->size(); j++) { for (int g = i; g < y_des_threshold; g++) { if (genes[gene]->ancestors[g]->size() == 0) break; for (uint h = (g == i) ? j + 1 : 0; h < genes[gene]->ancestors[g]->size(); h++) { // check if paths have same start node if (genes[gene]->ancPaths[i]->at(j)[0].second == genes[gene]->ancPaths[g]->at(h)[0].second) break; // iterate over all path-ending genes for (edgeVecIter b = genes[gene]->ancestors[i]->at(j)->parentGenes->begin(); b != genes[gene]->ancestors[i]->at(j)->parentGenes->end(); b++) { for (edgeVecIter c = genes[gene]->ancestors[g]->at(h)->parentGenes->begin(); c != genes[gene]->ancestors[g]->at(h)->parentGenes->end(); c++) { path left_path = genes[gene]->ancPaths[i]->at(j); path right_path = genes[gene]->ancPaths[g]->at(h); path empty_tail; // output y motif with no tail if (allow_intersect || !intersect_y(left_path, right_path, empty_tail, (*b), (*c), genes[gene])) { outputMotif_y((*b)->geneId, (*c)->geneId, genes[gene]->geneId, i + 1, g + 1, 0); outputMotif_y((*c)->geneId, (*b)->geneId, genes[gene]->geneId, g + 1, i + 1, 0); } // for each pair of ancestor genes, iterate over all tail nodes for (int e = 0; e < y_tail_threshold; e++) { if (genes[gene]->descendants[e]->size() == 0) break; for (uint f = 0; f < genes[gene]->descendants[e]->size(); f++) { // iterate over all tail-ending genes for (edgeVecIter d = genes[gene]->descendants[e]->at(f)->childGenes->begin(); d != genes[gene]->descendants[e]->at(f)->childGenes->end(); d++) { path tail_path = genes[gene]->desPaths[e]->at(f); if (allow_intersect || !intersect_y(left_path, right_path, tail_path, (*b), (*c), (*d))) { outputMotif_y((*b)->geneId, (*c)->geneId, (*d)->geneId, i + 1, g + 1, e + 1); outputMotif_y((*c)->geneId, (*b)->geneId, (*d)->geneId, g + 1, i + 1, e + 1); } } } } } } } } } } } } } void Graph::outputMotif_y(int gene1, int gene2, int tail_gene, int dist1, int dist2, int tail_dist) { int y_index = index_YH(dist1, dist2, y_des_threshold, tail_dist); assert(dist1 <= y_des_threshold); assert(dist2 <= y_des_threshold); assert(tail_dist <= y_tail_threshold); assert(y_index < size_YH(y_des_threshold, y_tail_threshold)); if (y_out[y_index] == NULL) { ostringstream oss1, oss2, oss3; oss1 << dist1; oss2 << dist2; oss3 << tail_dist; string motifFile = string("y") + out_separator + oss1.str() + out_separator + oss2.str() + out_separator + oss3.str(); string motifFile_tmp = out_directory + motifFile + tmp_extension; string motifFile_out = out_directory + motifFile + out_extension; y_out[y_index] = new ofstream(motifFile_tmp.c_str()); if (y_out[y_index]->fail()) { cerr << "** Error: graph.cpp couldn't open file " << motifFile_tmp << " **\n"; exit(1); } if (verbose >= 2) cout << "\t> " << motifFile_out << "\n"; *motifsFile << motifFile << "\n"; } *y_out[y_index] << gene1 << out_delimiter << gene2 << out_delimiter << tail_gene << "\n"; } void Graph::outputMotif_vand(int gene1, int gene2, int dist1, int dist2) { int vand_index = index_VA(dist1, dist2, 1); if (vand_out[vand_index] == NULL) { ostringstream oss1, oss2; oss1 << dist1; oss2 << dist2; string motifFile = string("vand") + out_separator + oss1.str() + out_separator + oss2.str(); string motifFile_tmp = out_directory + motifFile + tmp_extension; string motifFile_out = out_directory + motifFile + out_extension; vand_out[vand_index] = new ofstream(motifFile_tmp.c_str()); if (vand_out[vand_index]->fail()) { cerr << "** Error: graph.cpp couldn't open file " << motifFile_tmp << " **\n"; exit(1); } if (verbose >= 2) cout << "\t> " << motifFile_out << "\n"; *motifsFile << motifFile << "\n"; } *vand_out[vand_index] << gene1 << out_delimiter << gene2 << "\n"; } void Graph::outputMotif_vnec(int gene1, int gene2, int dist1, int dist2) { int vnec_index = index_VA(dist1, dist2, 1); if (vnec_out[vnec_index] == NULL) { ostringstream oss1, oss2; oss1 << dist1; oss2 << dist2; string motifFile = string("vnec") + out_separator + oss1.str() + out_separator + oss2.str(); string motifFile_tmp = out_directory + motifFile + tmp_extension; string motifFile_out = out_directory + motifFile + out_extension; vnec_out[vnec_index] = new ofstream(motifFile_tmp.c_str()); if (vnec_out[vnec_index]->fail()) { cerr << "** Error: graph.cpp couldn't open file " << motifFile_tmp << " **\n"; exit(1); } if (verbose >= 2) cout << "\t> " << motifFile_out << "\n"; *motifsFile << motifFile << "\n"; } *vnec_out[vnec_index] << gene1 << out_delimiter << gene2 << "\n"; } void Graph::findMotifs_aand_anec() { if (!allow_aand && !allow_anec) return; if (verbose == 1) cout << "." << flush; else if (verbose >= 2) cout << "graph.cpp: find aand/anec motifs\n"; for (int gene = 0; gene < numGenes; gene++) { if (genes[gene] != NULL) { for (nodeVecIter i = genes[gene]->endNodes->begin(); i != genes[gene]->endNodes->end(); i++) { for (nodeVecIter j(i); j != genes[gene]->endNodes->end(); j++) { if ((*i) != (*j)) { for (edgeVecIter g = (*i)->childGenes->begin(); g != (*i)->childGenes->end(); g++) { for (edgeVecIter h = (*j)->childGenes->begin(); h != (*j)->childGenes->end(); h++) { if (allow_aand) { outputMotif_aand((*g)->geneId, (*h)->geneId, 1, 1); outputMotif_aand((*h)->geneId, (*g)->geneId, 1, 1); } if (allow_anec) { if ((*i)->childGenes->size() == 1 && (*j)->childGenes->size() == 1) { outputMotif_anec((*g)->geneId, (*h)->geneId, 1, 1); outputMotif_anec((*g)->geneId, (*h)->geneId, 1, 1); } } } } } } } } } } void Graph::findMotifs_h() { if (h_anc_threshold <= 0 || h_tail_threshold < 0) return; if (verbose == 1) cout << "." << flush; else if (verbose >= 2) cout << "graph.cpp: find h motifs\n"; for (int gene = 0; gene < numGenes; gene++) { if (genes[gene] != NULL) { for (int i = 0; i < h_anc_threshold; i++) { if (genes[gene]->descendants[i]->size() == 0) break; for (uint j = 0; j < genes[gene]->descendants[i]->size(); j++) { for (int g = i; g < h_anc_threshold; g++) { if (genes[gene]->descendants[g]->size() == 0) break; for (uint h = (g == i) ? j + 1 : 0; h < genes[gene]->descendants[g]->size(); h++) { // check if paths have same start node if (genes[gene]->desPaths[i]->at(j)[0].second == genes[gene]->desPaths[g]->at(h)[0].second) break; // iterate over all path-ending genes for (edgeVecIter b = genes[gene]->descendants[i]->at(j)->childGenes->begin(); b != genes[gene]->descendants[i]->at(j)->childGenes->end(); b++) { for (edgeVecIter c = genes[gene]->descendants[g]->at(h)->childGenes->begin(); c != genes[gene]->descendants[g]->at(h)->childGenes->end(); c++) { path left_path = genes[gene]->desPaths[i]->at(j); path right_path = genes[gene]->desPaths[g]->at(h); path empty_tail; // output h motif with no tail if (allow_intersect || !intersect_h(left_path, right_path, empty_tail, (*b), (*c), genes[gene])) { outputMotif_h((*b)->geneId, (*c)->geneId, genes[gene]->geneId, i + 1, g + 1, 0); outputMotif_h((*c)->geneId, (*b)->geneId, genes[gene]->geneId, g + 1, i + 1, 0); } // for each pair of descendant genes, iterate over all tail nodes for (int e = 0; e < h_tail_threshold; e++) { if (genes[gene]->ancestors[e]->size() == 0) break; for (uint f = 0; f < genes[gene]->ancestors[e]->size(); f++) { // iterate over all tail-ending genes for (edgeVecIter d = genes[gene]->ancestors[e]->at(f)->parentGenes->begin(); d != genes[gene]->ancestors[e]->at(f)->parentGenes->end(); d++) { path tail_path = genes[gene]->ancPaths[e]->at(f); if (allow_intersect || !intersect_h(left_path, right_path, tail_path, (*b), (*c), (*d))) { outputMotif_h((*b)->geneId, (*c)->geneId, (*d)->geneId, i + 1, g + 1, e + 1); outputMotif_h((*c)->geneId, (*b)->geneId, (*d)->geneId, g + 1, i + 1, e + 1); } } } } } } } } } } } } } void Graph::outputMotif_h(int gene1, int gene2, int tail_gene, int dist1, int dist2, int tail_dist) { int h_index = index_YH(dist1, dist2, h_anc_threshold, tail_dist); if (h_out[h_index] == NULL) { ostringstream oss1, oss2, oss3; oss1 << dist1; oss2 << dist2; oss3 << tail_dist; string motifFile = string("h") + out_separator + oss1.str() + out_separator + oss2.str() + out_separator + oss3.str(); string motifFile_tmp = out_directory + motifFile + tmp_extension; string motifFile_out = out_directory + motifFile + out_extension; h_out[h_index] = new ofstream(motifFile_tmp.c_str()); if (h_out[h_index]->fail()) { cerr << "** Error: graph.cpp couldn't open file " << motifFile_tmp << " **\n"; exit(1); } if (verbose >= 2) cout << "\t> " << motifFile_out << "\n"; *motifsFile << motifFile << "\n"; } *h_out[h_index] << gene1 << out_delimiter << gene2 << out_delimiter << tail_gene << "\n"; } void Graph::outputMotif_aand(int gene1, int gene2, int dist1, int dist2) { int aand_index = index_VA(dist1, dist2, 1); if (aand_out[aand_index] == NULL) { ostringstream oss1, oss2; oss1 << dist1; oss2 << dist2; string motifFile = string("aand") + out_separator + oss1.str() + out_separator + oss2.str(); string motifFile_tmp = out_directory + motifFile + tmp_extension; string motifFile_out = out_directory + motifFile + out_extension; aand_out[aand_index] = new ofstream(motifFile_tmp.c_str()); if (aand_out[aand_index]->fail()) { cerr << "** Error: graph.cpp couldn't open file " << motifFile_tmp << " **\n"; exit(1); } if (verbose >= 2) cout << "\t> " << motifFile_out << "\n"; *motifsFile << motifFile << "\n"; } *aand_out[aand_index] << gene1 << out_delimiter << gene2 << "\n"; } void Graph::outputMotif_anec(int gene1, int gene2, int dist1, int dist2) { int anec_index = index_VA(dist1, dist2, 1); if (anec_out[anec_index] == NULL) { ostringstream oss1, oss2; oss1 << dist1; oss2 << dist2; string motifFile = string("anec") + out_separator + oss1.str() + out_separator + oss2.str(); string motifFile_tmp = out_directory + motifFile + tmp_extension; string motifFile_out = out_directory + motifFile + out_extension; anec_out[anec_index] = new ofstream(motifFile_tmp.c_str()); if (anec_out[anec_index]->fail()) { cerr << "** Error: graph.cpp couldn't open file " << motifFile_tmp << " **\n"; exit(1); } if (verbose >= 2) cout << "\t> " << motifFile_out << "\n"; *motifsFile << motifFile << "\n"; } *anec_out[anec_index] << gene1 << out_delimiter << gene2 << "\n"; }