########################## # Supplementary Code 1 # Protein subcomplex identification from co-purification data --R code # Programming language: R v.2.15.2 # Written by: Antonio Mora (antoniocmora@gmail.com) # Feb.-Apr. 2013 ######################## ####################### # 1. Needed Libraries and Files: ####################### setwd("**Introduce your working directory here**") library(iRefR) # iRefR can be installed from CRAN. library(org.Sc.sgd.db) # org.Sc.sgd.db can be installed from bioconductor library(linkcomm) # linkcomm can be installed from CRAN. library(limma) # limma can be installed from bioconductor # Original TAP data: load("gavin.RData") # This is an R object with Gavin's data, as in the original paper: http://www.nature.com/nature/journal/v440/n7084/extref/nature04532-s2.pdf # Nested group generation functions (taken from "Zaki and Mora, Characterization of nested complexes in protein interaction networks, Proteins: Structure, function and Bioinformatics, under review"): generate_overlap_nesting_matrices = function(complexList) { overlap = matrix(0,nrow=length(complexList), ncol=length(complexList)); jaccard_index = matrix(0,nrow=length(complexList), ncol=length(complexList)); meetmin_index = matrix(0,nrow=length(complexList), ncol=length(complexList)) for (i in 1:(length(complexList)-1)) { complex_i = strsplit(complexList[i], ",", fixed=TRUE)[[1]] for (j in (i+1):length(complexList)) { complex_j = strsplit(complexList[j], ",", fixed=TRUE)[[1]] overlap[i,j] = length(intersect(complex_i, complex_j)) jaccard_index[i,j] = overlap[i,j] / length(unique(complex_i, complex_j)) meetmin_index[i,j] = overlap[i,j] / min(length(complex_i), length(complex_j)) } } result = list(overlap=overlap, jaccard_index=jaccard_index, meetmin_index=meetmin_index) } create_table_nested_complexList = function(complexList_C_unique, overlap_complex_complex, jaccard_complex_complex, meetmin_complex_complex) { table_nested_complexList = NULL for (i in 1:(dim(overlap_complex_complex)[1]-1)) { for (j in (i+1):dim(overlap_complex_complex)[1]) { if (meetmin_complex_complex[i,j] == 1) { longest_set = which.max(c(length(strsplit(complexList_C_unique[i,2], ",", fixed=T)[[1]]), length(strsplit(complexList_C_unique[j,2], ",", fixed=T)[[1]]))) if (longest_set==1) { new_row = c(complexList_C_unique[i,1], complexList_C_unique[j,1], complexList_C_unique[i,2], complexList_C_unique[j,2], overlap_complex_complex[i,j], jaccard_complex_complex[i,j], meetmin_complex_complex[i,j]) } if (longest_set==2) { new_row = c(complexList_C_unique[j,1], complexList_C_unique[i,1], complexList_C_unique[j,2], complexList_C_unique[i,2], overlap_complex_complex[i,j], jaccard_complex_complex[i,j], meetmin_complex_complex[i,j]) } table_nested_complexList = rbind(table_nested_complexList, new_row) } } } rownames(table_nested_complexList) = NULL colnames(table_nested_complexList) = c("Macro complex -ID", "Nested complex -ID", "Macro complex -subunits", "Nested complex -subunits", "Overlap", "Jaccard", "Meet-Min") # Grooming the table: V2 = NULL for (i in 1:length(table_nested_complexList[,2])) { tmp = strsplit(table_nested_complexList[i,2], "|", fixed=T)[[1]] V2 = c(V2, sort(tmp)[1]) } table_nested_complexList_final = as.data.frame(cbind(table_nested_complexList[,1], V2, table_nested_complexList[,3:4], table_nested_complexList[,6:7], weight=table_nested_complexList[,5])) # Note: Choosing one ID for complex and leaving overlap as weight. } make_nested_groups = function(table_nested_complexList_final) { # DFS: graph_all_nesting = graph.data.frame(table_nested_complexList_final, directed="T") n=0; table_nested_groups = NULL for (i in unique(V(graph_all_nesting)$name)) { n=n+1 nested_id = paste("NID", n, sep="_") tmp <- unique(graph.dfs(graph_all_nesting, root=which(V(graph_all_nesting)$name==i), "out", unreachable="F")$order) tmp2 = length(tmp) complex_group = V(graph_all_nesting)$name[tmp[-tmp2]] length_group = length(complex_group) nested_group = paste(complex_group, collapse=",", sep="") new_row = c(nested_id, nested_group, length_group) table_nested_groups = rbind(table_nested_groups, new_row) } # Remove subsets of others: complexList = table_nested_groups[,2] meetmin_index = matrix(0,nrow=length(complexList), ncol=length(complexList)) complex_groups_to_remove = NULL for (i in 1:(length(complexList)-1)) { complex_i = strsplit(complexList[i], ",", fixed=TRUE)[[1]] for (j in (i+1):length(complexList)) { complex_j = strsplit(complexList[j], ",", fixed=TRUE)[[1]] overlap = length(intersect(complex_i, complex_j)) meetmin_index = overlap / min(length(complex_i), length(complex_j)) if (meetmin_index == 1) { if (length(complex_i) < length(complex_j)) { complex_groups_to_remove = c(complex_groups_to_remove, i) } else { complex_groups_to_remove = c(complex_groups_to_remove, j) } } } } table_nested_groups = table_nested_groups[-unique(complex_groups_to_remove),] new_ids = paste("NID", 1:dim(table_nested_groups)[1], sep="_") table_nested_groups[,1]= new_ids rownames(table_nested_groups) = NULL colnames(table_nested_groups) = c("nested_group", "nested_complexes", "group_size") # Generating graph objects for each group: graph_nested_comp = list() for (i in 1:dim(table_nested_groups)[1]) { tmp = strsplit(table_nested_groups[i,2], ",", fixed=T)[[1]] edgeList = table_nested_complexList_final[which((table_nested_complexList_final[,1] %in% tmp) & (table_nested_complexList_final[,2] %in% tmp)),] graph_nested_comp[[i]] = graph.data.frame(unique(edgeList), directed="T") } # Adding subunit info: a = c(as.character(table_nested_complexList_final[,1]), as.character(table_nested_complexList_final[,2])) b = c(as.character(table_nested_complexList_final[,3]), as.character(table_nested_complexList_final[,4])) c = unique(cbind(a, b)) complex_to_prot = cbind(c[,1], gsub("uniprotkb:", "", c[,2])) # 3073x2 all_subunits = NULL; size_subunits = NULL for (i in 1:dim(table_nested_groups)[1]) { this_complex = strsplit(table_nested_groups[i,2], ",", fixed=T)[[1]] these_subunits = NULL; this_size = NULL for (j in this_complex) { tmp = complex_to_prot[which(complex_to_prot[,1]==j),2] these_subunits = c(these_subunits, tmp) this_size = c(this_size, length(strsplit(tmp, ",", fixed=T)[[1]])) } all_subunits = c(all_subunits, paste(these_subunits, collapse="|", sep="")) size_subunits = c(size_subunits, paste(this_size, collapse="|", sep="")) } table_nested_groups2 = cbind(table_nested_groups, all_subunits, size_subunits) output = list(table_nested_groups2, graph_nested_comp) } # Output files from the ProCope software: # The scored files, using Gavin's dataset and each of the four scores, were called: # "gavin_scored_Dice.txt" # "gavin_scored_Hart.txt" # "gavin_scored_PE.txt" # "gavin_scored_SA.txt" # The list of complexes, using Gavin's dataset, hierarchical clustering and each of the four scores, were called: # "gavin_complex_Dice_hierarchical.txt" # "gavin_complex_Hart_hierarchical.txt" # "gavin_complex_PE_hierarchical.txt" # "gavin_complex_SA_hierarchical.txt" # The file with the MIPS reference dataset was called: # "mips_complexes.txt" ##### # 2. Analysis of Raw data: ##### # 2.1. Characterize raw purification data: generate_complexList_from_purifications = function(gavin_purifications) { complexList_raw = NULL for (i in 1:dim(gavin_purifications)[1]) { baits = as.character(gavin_purifications[i,1]) preys = strsplit(as.character(gavin_purifications[i,2]), " ")[[1]] new_row = cbind(i, paste(sort(unique(c(baits, preys))), collapse=",")) complexList_raw = rbind(complexList_raw, new_row) } complexList_raw } complexList_raw = generate_complexList_from_purifications(gavin_purifications) # 2166x2 tmp = generate_overlap_nesting_matrices(complexList_raw[,2]) # TIME: 3' overlap_raw = tmp$overlap jaccard_raw = tmp$jaccard_index meetmin_raw = tmp$meetmin_index ## Time consuming computation: count_over = 0; count_meetmin = 0; all_pairs = 0; positive_overlap = NULL; positive_meetmin = NULL for (i in 1:(length(complexList_raw[,2])-1)) { for (j in (i+1):length(complexList_raw[,2])) { all_pairs = all_pairs + 1 if (overlap_raw[i,j] > 0) { positive_overlap = c(positive_overlap, overlap_raw[i,j]) count_over = count_over + 1 positive_meetmin = c(positive_meetmin, meetmin_raw[i,j]) if (meetmin_raw[i,j] == 1) { count_meetmin = count_meetmin + 1 } } } } # TIME: 2 hr all_pairs # 2344695 count_over # 912333 count_meetmin # 601 ## # Make nested complex multiedge representation: table_nested_complexList_raw = create_table_nested_complexList(complexList_raw, overlap_raw, jaccard_raw, meetmin_raw) # 601x7 tmp = make_nested_groups(table_nested_complexList_raw) raw_nested_groups = tmp[[1]] dim(raw_nested_groups) # 402x5 (raw data brings 402 nested groups) graph_nested_raw = tmp[[2]] # 2.2: Generate edge lists --spoke and matrix models: # Generate matrix models for all purification groups generate_edgeList_matrix_from_complexList = function(complexList_raw) { edgeList_matrix_raw = NULL for (i in complexList_raw[,2]){ tmp = strsplit(i, ",")[[1]] if (length(tmp)>1) { these_edges = t(combn(tmp, 2)) edgeList_matrix_raw = rbind(edgeList_matrix_raw, these_edges) } } edges_to_remove = NULL for (i in 1:dim(edgeList_matrix_raw)[1]) { if (edgeList_matrix_raw[i,1] == edgeList_matrix_raw[i,2]) { edges_to_remove = c(edges_to_remove, i) } } if (length(edges_to_remove) > 0) { edgeList_matrix_raw = edgeList_matrix_raw[-edges_to_remove,] } edgeList_matrix_raw } generate_edgeList_spoke_from_complexList = function(gavin_purifications) { edgeList_spoke_raw = NULL for (i in 1:dim(gavin_purifications)[1]) { baits = as.character(gavin_purifications[i,1]) preys = strsplit(as.character(gavin_purifications[i,2]), " ")[[1]] if (length(unique(c(baits, preys)))>1) { these_edges = cbind(rep(baits, length(preys)), preys) edgeList_spoke_raw = rbind(edgeList_spoke_raw, these_edges) } } edges_to_remove = NULL for (i in 1:dim(edgeList_spoke_raw)[1]) { if (edgeList_spoke_raw[i,1] == edgeList_spoke_raw[i,2]) { edges_to_remove = c(edges_to_remove, i) } } if (length(edges_to_remove) > 0) { edgeList_spoke_raw = edgeList_spoke_raw[-edges_to_remove,] } edgeList_spoke_raw } edgeList_matrix_raw = generate_edgeList_matrix_from_complexList(complexList_raw) # 228826 edges edgeList_spoke_raw = generate_edgeList_spoke_from_complexList(gavin_purifications) # 20208x2 # 2.3. Extract repeated experiments: repeated_experiments = names(which(table(gavin_purifications[,1]) > 1)) # 143 single_purifications = gavin_purifications[-which(gavin_purifications[,1] %in% repeated_experiments),] # 1849 repeated_purifications = gavin_purifications[which(gavin_purifications[,1] %in% repeated_experiments),] # 317 from_gavin_to_complexList = function(gavin_purification) { complexList = NULL for (i in 1:dim(gavin_purification)[1]) { tmp = toupper(sort(unique(c(as.character(gavin_purification[i,1]), strsplit(as.character(gavin_purification[i,2]), " ", fixed=T)[[1]])))) complexList = c(complexList, paste(tmp, collapse=",")) } complexList } gavin_raw_repeated = from_gavin_to_complexList(repeated_purifications) gavin_raw_single = from_gavin_to_complexList(single_purifications) # 2.4. Baits behavior: sizes_purifications = sapply(complexList_raw[,2], function(x) length(strsplit(x,",")[[1]])) hub_baits = gavin_purifications[which(sizes_purifications > 0.66*max(sizes_purifications)),] # APA1, CIC1, HHF1, MAK21, PSA1, PWP2 low_degree_baits = gavin_purifications[which(sizes_purifications == 1),] # 254 ##### # 3. Data scoring: ##### x <- org.Sc.sgdGENENAME mapped_genes <- mappedkeys(x) tmp <- unlist(as.list(x[mapped_genes])) table_sgd_genename = cbind(mapped_genes, unname(tmp)) get_procope_scored_files = function(procope_file) { tmp = read.table(procope_file, header=F, comment.char="", sep='\t', quote="") scored_table = cbind(toupper(tmp[,1]), toupper(tmp[,2]), tmp[,3]) scored_table_tr = NULL for (i in 1:dim(scored_table)[1]) { tmp2 = table_sgd_genename[which(table_sgd_genename[,1] %in% scored_table[i,1]), 2] if (length(tmp2)==0) { tmp2=scored_table[i,1]} tmp3 = table_sgd_genename[which(table_sgd_genename[,1] %in% scored_table[i,2]), 2] if (length(tmp3)==0) { tmp3=scored_table[i,2]} scored_table_tr = rbind(scored_table_tr, c(tmp2, tmp3, scored_table[i,3])) } scored_table_tr } Dice_scored_table = get_procope_scored_files("gavin_scored_Dice.txt") # 82202x3 Hart_scored_table = get_procope_scored_files("gavin_scored_Hart.txt") # 82153x3 PE_scored_table = get_procope_scored_files("gavin_scored_PE.txt") # SA_scored_table = get_procope_scored_files("gavin_scored_SA.txt") # 76436x3 edgeList_dice_scored = Dice_scored_table[-which(as.numeric(Dice_scored_table[,3]) <= 0.15),] # 16447 (threshold: Zhang08, 16169 in paper) edgeList_hart_scored = Hart_scored_table[-which(as.numeric(Hart_scored_table[,3]) > 0.01),] # 6528 (threshold: Hart09, 5352 in paper, slightly different networks) edgeList_PE_scored = PE_scored_table[-which(as.numeric(PE_scored_table[,3]) <= 1.5),] # 18278 (threshold: Zhang08) edgeList_SA_scored = SA_scored_table[-which(as.numeric(SA_scored_table[,3]) < 4),] # 14004 (threshold: Zhang08) #from_scored_edgeList_to_rank = function(edgeList_scored_list) { # rank = list() # for (i in 1:length(edgeList_scored_list)) { # edgeList_scored = edgeList_scored_list[[i]] # tmp = data.frame(edgeList_scored, -log(as.numeric(edgeList_scored[,3]))) # colnames(tmp) = c("V1", "V2", "scores", "log_scores") # rank[[i]] = tmp[with(tmp, order(-scores)),] # } # rank #} #rank_scores = from_scored_edgeList_to_rank(list(edgeList_dice_scored, edgeList_hart_scored, edgeList_SA_scored)) #head(rank_scores[[1]]) #head(rank_scores[[2]]) #head(rank_scores[[3]]) from_scored_edgeList_to_PIN = function(edgeList_scored) { edgeList_final = cbind(edgeList_scored[,1:2], rep("1", dim(edgeList_scored)[1]), rep("1", dim(edgeList_scored)[1]), rep("1", dim(edgeList_scored)[1])) PIN = convert_edgeList_to_graph(edgeList_final) #simplify(PIN, remove.multiple=F) } PIN_Dice = from_scored_edgeList_to_PIN(edgeList_dice_scored) # 2192 and 16447 PIN_Hart = from_scored_edgeList_to_PIN(edgeList_hart_scored) # 639 and 6528 PIN_PE = from_scored_edgeList_to_PIN(edgeList_PE_scored) # 2344 and 18278 PIN_SA = from_scored_edgeList_to_PIN(edgeList_SA_scored) # 2005 and 14004 ############## # 4. Data clustering: ############## # 4.1. Generated complexes after hierarchical clustering procedure: from_procope_to_complexList = function(procope_file) { fc <- file(procope_file) tmp <- strsplit(readLines(fc), "\t") close(fc) complexList = unlist(lapply(tmp, paste, collapse=",")) complexList_tr = NULL for (i in complexList) { tmp2 = toupper(strsplit(i, ",")[[1]]) tmp3 = table_sgd_genename[which(table_sgd_genename[,1] %in% tmp2), 2] tmp4 = paste(sort(tmp3), collapse=",") complexList_tr = c(complexList_tr, tmp4) } complexList_tr } gavin_complex_Dice_hierarchical = from_procope_to_complexList("gavin_complex_Dice_hierarchical.txt") gavin_complex_Hart_hierarchical = from_procope_to_complexList("gavin_complex_Hart_hierarchical.txt") gavin_complex_PE_hierarchical = from_procope_to_complexList("gavin_complex_PE_hierarchical.txt") gavin_complex_SA_hierarchical = from_procope_to_complexList("gavin_complex_SA_hierarchical.txt") mips_complexes = from_procope_to_complexList("mips_complexes.txt") # 4.2. Clustering using linkcomm: lcomm_dice <- getLinkCommunities(data.frame(edgeList_dice_scored[,1:2]), hcmethod = "single") lcomm_hart <- getLinkCommunities(data.frame(edgeList_hart_scored[,1:2]), hcmethod = "single") lcomm_PE <- getLinkCommunities(data.frame(edgeList_PE_scored[,1:2]), hcmethod = "single") lcomm_SA <- getLinkCommunities(data.frame(edgeList_SA_scored[,1:2]), hcmethod = "single") print(lcomm_dice) # 770 comm, 70 in largest print(lcomm_hart) # fail print(lcomm_PE) # 553 comm, 100 in largest print(lcomm_SA) # 694 comm, 72 in largest #head(lcomm_dice$nodeclusters) #head(lcomm_dice$clusters) #head(lcomm_dice$edges) #head(lcomm_dice$numclusters) clusters_lcomm_dice = lcomm_dice$nodeclusters clusters_lcomm_PE = lcomm_PE$nodeclusters clusters_lcomm_SA = lcomm_SA$nodeclusters from_linkcomm_to_complexList = function(linkcomm_vector) { cluster_ids = sort(unique(linkcomm_vector[,2])) complexList = NULL for (i in cluster_ids) { tmp = sort(linkcomm_vector[which(linkcomm_vector[,2]==i), 1]) complexList = c(complexList, paste(tmp, collapse=",")) } result = cbind(cluster_ids, complexList) } complexList_lcomm_dice = from_linkcomm_to_complexList(clusters_lcomm_dice) # 770x2 complexList_lcomm_PE = from_linkcomm_to_complexList(clusters_lcomm_PE) # 553x2 complexList_lcomm_SA = from_linkcomm_to_complexList(clusters_lcomm_SA) # 694x2 #jpeg("lcomm_dice.jpg", width = 1024, height = 512) # plot(lcomm_dice, type = "graph", layout = layout.fruchterman.reingold, shownodesin = 70) #dev.off() #jpeg("lcomm_dice_some.jpg", width = 1024, height = 512) # plot(lcomm_dice, type = "graph", clusterids=c(1,2,3,4,5,6,7)) #dev.off() nested_lcomm_dice = getAllNestedComm(lcomm_dice) # 102 nested_lcomm_PE = getAllNestedComm(lcomm_PE) # 35 nested_lcomm_SA = getAllNestedComm(lcomm_SA) # 67 #getNestedHierarchies(lcomm_dice, clusid = 9) jpeg("lcomm_clusters_of_clusters_dice.jpg", width = 1024, height = 512) cr1 <- getClusterRelatedness(lcomm_dice, hcmethod = "ward") cutDendrogramAt(cr1, cutat = 1.2) # 183 clusters of clusters dev.off() jpeg("lcomm_clusters_of_clusters_PE.jpg", width = 1024, height = 512) cr3 <- getClusterRelatedness(lcomm_PE, hcmethod = "ward") cutDendrogramAt(cr3, cutat = 1.2) # 106 clusters of clusters dev.off() jpeg("lcomm_clusters_of_clusters_SA.jpg", width = 1024, height = 512) cr4 <- getClusterRelatedness(lcomm_SA, hcmethod = "ward") cutDendrogramAt(cr4, cutat = 1.2) # 164 clusters of clusters dev.off() # 4.3. Clustering using OCG: ocomm_dice <- getOCG.clusters(data.frame(edgeList_dice_scored[,1:2])) ocomm_hart <- getOCG.clusters(data.frame(edgeList_hart_scored[,1:2])) ocomm_PE <- getOCG.clusters(data.frame(edgeList_PE_scored[,1:2])) ocomm_SA <- getOCG.clusters(data.frame(edgeList_SA_scored[,1:2])) print(ocomm_dice) # 474 comm, 67 in largest print(ocomm_hart) # 207 comm, 59 in largest print(ocomm_PE) # 467 comm, 65 in largest print(ocomm_SA) # 249 comm, 75 in largest complexList_ocomm_dice = from_linkcomm_to_complexList(ocomm_dice$nodeclusters) # 474x2 complexList_ocomm_hart = from_linkcomm_to_complexList(ocomm_hart$nodeclusters) # 194x2 complexList_ocomm_PE = from_linkcomm_to_complexList(ocomm_PE$nodeclusters) # 467x2 complexList_ocomm_SA = from_linkcomm_to_complexList(ocomm_SA$nodeclusters) # 249x2 nested_ocomm_dice = getAllNestedComm(ocomm_dice) # NaN nested_ocomm_hart = getAllNestedComm(ocomm_hart) # NaN nested_ocomm_PE = getAllNestedComm(ocomm_PE) # NaN nested_ocomm_SA = getAllNestedComm(ocomm_SA) # NaN jpeg("ocomm_clusters_of_clusters_dice.jpg", width = 1024, height = 512) cro_dice <- getClusterRelatedness(ocomm_dice, hcmethod = "ward") cutDendrogramAt(cro_dice, cutat = 1.2) # 78 clusters of clusters dev.off() jpeg("ocomm_clusters_of_clusters_hart.jpg", width = 1024, height = 512) cro_hart <- getClusterRelatedness(ocomm_hart, hcmethod = "ward") cutDendrogramAt(cro_hart, cutat = 1.2) # 10 clusters of clusters dev.off() jpeg("ocomm_clusters_of_clusters_PE.jpg", width = 1024, height = 512) cro_PE <- getClusterRelatedness(ocomm_PE, hcmethod = "ward") cutDendrogramAt(cro_PE, cutat = 1.2) # 58 clusters of clusters dev.off() jpeg("ocomm_clusters_of_clusters_SA.jpg", width = 1024, height = 512) cro_SA <- getClusterRelatedness(ocomm_SA, hcmethod = "ward") cutDendrogramAt(cro_SA, cutat = 1.2) # 41 clusters of clusters dev.off() # Table 1 in paper summarizes the previous part ############## # 5. Analysis of communities/predicted complexes --Hypergeometric method: ############## # 5.1. Complex-to-complex hypergeometric comparisons: #Population = total_number_proteins #Pop success = mips[j] #Sample = gavin[i] #Sample success = overlap[i,j] generate_pvalue_matrix = function(gavin_complex, complex_reference) { all_prots = paste(paste(gavin_complex, collapse=",", sep=""), paste(complex_reference, collapse=",", sep=""), sep=",") total_number_proteins = length(unique(strsplit(all_prots, ",", fixed=T)[[1]])) overlap = matrix(nrow=length(gavin_complex), ncol=length(complex_reference)); hg_score = matrix(nrow=length(gavin_complex), ncol=length(complex_reference)) for (i in 1:length(gavin_complex)) { digs = strsplit(gavin_complex[i], ",", fixed=TRUE)[[1]] for (j in 1:length(complex_reference)) { complexes = strsplit(complex_reference[j], ",", fixed=TRUE)[[1]] overlap[i,j] = length(intersect(digs, complexes)) hg_score[i,j] = phyper(overlap[i,j]-1, length(complexes), total_number_proteins - length(complexes), length(digs), lower.tail=FALSE) } } #hg_score = as.data.frame(hg_score) rownames(hg_score) = gavin_complex colnames(hg_score) = complex_reference result = list(overlap=overlap, hg_score=hg_score) } quantify_similarity = function(map_complex_to_reference) { # Find mappings between complexes and reference set: marginals_per_complex = NULL; marginals_per_reference = NULL for (i in 1:dim(map_complex_to_reference)[1]) { marginals_per_complex = c(marginals_per_complex, length(which(map_complex_to_reference[i,]<0.05))) } number_complexes_mapped_to_references = length(marginals_per_complex[marginals_per_complex > 0]) for (j in 1:dim(map_complex_to_reference)[2]) { marginals_per_reference = c(marginals_per_reference, length(which(map_complex_to_reference[,j]<0.05))) } number_references_mapped_to_complexes = length(marginals_per_reference[marginals_per_reference > 0]) result = list(number_complexes_mapped_to_references = number_complexes_mapped_to_references, total_number_complexes_test = dim(map_complex_to_reference)[1], number_references_mapped_to_complexes = number_references_mapped_to_complexes, total_number_complexes_reference = dim(map_complex_to_reference)[2]) } similarity_complexList_to_reference = function(list_complexList, reference) { n = 0; similarity_gavin = list() for (i in list_complexList) { n = n+1 map_gavin = generate_pvalue_matrix(i, reference)[[2]] similarity_gavin[[n]] = unlist(quantify_similarity(map_gavin)) } method_performance = do.call(rbind, similarity_gavin) method_performance = cbind(method_performance[,1:2], as.numeric(method_performance[,1])/as.numeric(method_performance[,2]), method_performance[,3:4], as.numeric(method_performance[,3])/as.numeric(method_performance[,4])) colnames(method_performance) = c("#_complexes_mapped_to_reference", "total_number_complexes_test", "%complexes_mapped_to_reference", "number_references_mapped_to_complexes", "total_number_complexes_reference", "% references_mapped_to_complexes") method_performance } # Detect all predicted complexes found in all MIPS complexes method_performance_all = similarity_complexList_to_reference(list(gavin_raw_single, gavin_raw_repeated, gavin_complex_Dice_hierarchical, gavin_complex_Hart_hierarchical, gavin_complex_PE_hierarchical, gavin_complex_SA_hierarchical, toupper(complexList_lcomm_dice[,2]), toupper(complexList_lcomm_PE[,2]), toupper(complexList_lcomm_SA[,2]), toupper(complexList_ocomm_dice[,2]), toupper(complexList_ocomm_hart[,2]), toupper(complexList_ocomm_PE[,2]), toupper(complexList_ocomm_SA[,2])), mips_complexes) rownames(method_performance_all) = c("Raw_single-exp", "Raw_repeated-exp", "Dice+H", "Hart+H", "PE+H", "SA+H", "Dice+Linkcomm", "PE+Linkcomm", "SA+Linkcomm", "Dice+OCG", "Hart+OCG", "PE+OCG", "SA+OCG") method_performance_all # Table 2 in paper # 5.2. Analysis of nests: from_complexList_to_nestedgroup = function(complexList) { # Step 1: Make overlap matrices. complexList_unique = remove_repeated_complexes(complexList) tmp = generate_overlap_nesting_matrices(complexList_unique[,2]) overlap_mat = tmp$overlap jaccard_mat = tmp$jaccard_index meetmin_mat = tmp$meetmin_index if (length(which(overlap_mat>0))>0) { # Step 2: Generate nested pairs: table_nested_complexList = create_table_nested_complexList(complexList_unique, overlap_mat, jaccard_mat, meetmin_mat) # Step 3: Make nested complex multiedge representation. tmp = make_nested_groups(table_nested_complexList) nested_groups = tmp[[1]] graph_nested = tmp[[2]] result = list(table_nested_complexList = table_nested_complexList, nested_groups = nested_groups, graph_nested = graph_nested) } else { result = list(table_nested_complexList = NULL, nested_groups = NULL, graph_nested = NULL) } result } # 5.3. Nests --Comparison: tmp = from_complexList_to_nestedgroup(cbind(1:length(mips_complexes), mips_complexes)) table_nested_complexList_mips = tmp[[1]] mips_nested_groups = tmp[[2]] graph_nested_mips = tmp[[3]] tmp = from_complexList_to_nestedgroup(cbind(1:length(gavin_raw_single), gavin_raw_single)) table_nested_complexList_raw_sin = tmp[[1]] raw_sin_nested_groups = tmp[[2]] graph_nested_raw_sin = tmp[[3]] tmp = from_complexList_to_nestedgroup(cbind(1:length(gavin_raw_repeated), gavin_raw_repeated)) table_nested_complexList_raw_rep = tmp[[1]] raw_rep_nested_groups = tmp[[2]] graph_nested_raw_rep = tmp[[3]] tmp = from_complexList_to_nestedgroup(cbind(1:length(gavin_complex_Dice_hierarchical), gavin_complex_Dice_hierarchical)) table_nested_complexList_Dice_h = tmp[[1]] Dice_h_nested_groups = tmp[[2]] graph_nested_Dice_h = tmp[[3]] tmp = from_complexList_to_nestedgroup(cbind(1:length(gavin_complex_Hart_hierarchical), gavin_complex_Hart_hierarchical)) table_nested_complexList_Hart = tmp[[1]] Hart_nested_groups = tmp[[2]] graph_nested_Hart = tmp[[3]] tmp = from_complexList_to_nestedgroup(cbind(1:length(gavin_complex_PE_hierarchical), gavin_complex_PE_hierarchical)) table_nested_complexList_PE = tmp[[1]] PE_nested_groups = tmp[[2]] graph_nested_PE = tmp[[3]] tmp = from_complexList_to_nestedgroup(cbind(1:length(gavin_complex_SA_hierarchical), gavin_complex_SA_hierarchical)) table_nested_complexList_SA = tmp[[1]] SA_nested_groups = tmp[[2]] graph_nested_SA = tmp[[3]] tmp = from_complexList_to_nestedgroup(complexList_lcomm_dice) table_nested_complexList_lcomm_dice = tmp[[1]] lcomm_dice_nested_groups = tmp[[2]] graph_nested_lcomm_dice = tmp[[3]] tmp = from_complexList_to_nestedgroup(complexList_lcomm_PE) table_nested_complexList_lcomm_PE = tmp[[1]] lcomm_PE_nested_groups = tmp[[2]] graph_nested_lcomm_PE = tmp[[3]] tmp = from_complexList_to_nestedgroup(complexList_lcomm_SA) table_nested_complexList_lcomm_SA = tmp[[1]] lcomm_SA_nested_groups = tmp[[2]] graph_nested_lcomm_SA = tmp[[3]] tmp = from_complexList_to_nestedgroup(complexList_ocomm_dice) table_nested_complexList_ocomm_dice = tmp[[1]] ocomm_dice_nested_groups = tmp[[2]] graph_nested_ocomm_dice = tmp[[3]] tmp = from_complexList_to_nestedgroup(complexList_ocomm_hart) table_nested_complexList_ocomm_hart = tmp[[1]] ocomm_hart_nested_groups = tmp[[2]] graph_nested_ocomm_hart = tmp[[3]] tmp = from_complexList_to_nestedgroup(complexList_ocomm_PE) table_nested_complexList_ocomm_PE = tmp[[1]] ocomm_PE_nested_groups = tmp[[2]] graph_nested_ocomm_PE = tmp[[3]] tmp = from_complexList_to_nestedgroup(complexList_ocomm_SA) # error #table_nested_complexList_ocomm_SA = tmp[[1]] #ocomm_SA_nested_groups = tmp[[2]] #graph_nested_ocomm_SA = tmp[[3]] list(mips=dim(table_nested_complexList_mips)[1], raw_sin=dim(table_nested_complexList_raw_sin)[1], raw_rep=dim(table_nested_complexList_raw_rep)[1], Dice_h=dim(table_nested_complexList_Dice_h)[1], Hart_h=dim(table_nested_complexList_Hart)[1], PE_h=dim(table_nested_complexList_PE)[1], SA_h=dim(table_nested_complexList_SA)[1], dice_lcomm=dim(table_nested_complexList_lcomm_dice)[1], PE_lcomm=dim(table_nested_complexList_lcomm_PE)[1], SA_lcomm=dim(table_nested_complexList_lcomm_SA)[1], dice_ocomm=dim(table_nested_complexList_ocomm_dice)[1], hart_ocomm=dim(table_nested_complexList_ocomm_hart)[1], PE_ocomm=dim(table_nested_complexList_ocomm_PE)[1]) # Nested groups: mips = 16; raw_sin = 348; raw_rep = 80; Dice_h = NULL; Hart_h = NULL; PE_h = NULL; SA_h = NULL; dice_lcomm = 238; PE_lcomm = 55; SA_lcomm = 156; dice_ocomm = 52; hart_ocomm = 15; PE_ocomm = 53 list(mips=dim(mips_nested_groups)[1], raw_sin=dim(raw_sin_nested_groups)[1], raw_rep=dim(raw_rep_nested_groups)[1], Dice_h=dim(Dice_h_nested_groups)[1], Hart_h=dim(Hart_nested_groups)[1], PE_h=dim(PE_nested_groups)[1], SA_h=dim(SA_nested_groups)[1], dice_lcomm=dim(lcomm_dice_nested_groups)[1], PE_lcomm=dim(lcomm_PE_nested_groups)[1], SA_lcomm=dim(lcomm_SA_nested_groups)[1], dice_ocomm=dim(ocomm_dice_nested_groups)[1], hart_ocomm=dim(ocomm_hart_nested_groups)[1], PE_ocomm=dim(ocomm_PE_nested_groups)[1]) # Nested groups: mips = 11; raw_sin = 256; raw_rep = 72; Dice_h = NULL; Hart_h = NULL; PE_h = NULL; SA_h = NULL; dice_lcomm = 230; PE_lcomm = 53; SA_lcomm = 154; dice_ocomm = 16; hart_ocomm = 10; PE_ocomm = 18 (OCG results are more groupable) complexList_nests_mips = unique(c(as.character(table_nested_complexList_mips[,3]), as.character(table_nested_complexList_mips[,4]))) complexList_nests_raw_sin = unique(c(as.character(table_nested_complexList_raw_sin[,3]), as.character(table_nested_complexList_raw_sin[,4]))) complexList_nests_raw_rep = unique(c(as.character(table_nested_complexList_raw_rep[,3]), as.character(table_nested_complexList_raw_rep[,4]))) complexList_nests_lcomm_dice = unique(c(as.character(table_nested_complexList_lcomm_dice[,3]), as.character(table_nested_complexList_lcomm_dice[,4]))) complexList_nests_lcomm_PE = unique(c(as.character(table_nested_complexList_lcomm_PE[,3]), as.character(table_nested_complexList_lcomm_PE[,4]))) complexList_nests_lcomm_SA = unique(c(as.character(table_nested_complexList_lcomm_SA[,3]), as.character(table_nested_complexList_lcomm_SA[,4]))) complexList_nests_ocomm_dice = unique(c(as.character(table_nested_complexList_ocomm_dice[,3]), as.character(table_nested_complexList_ocomm_dice[,4]))) complexList_nests_ocomm_hart = unique(c(as.character(table_nested_complexList_ocomm_hart[,3]), as.character(table_nested_complexList_ocomm_hart[,4]))) complexList_nests_ocomm_PE = unique(c(as.character(table_nested_complexList_ocomm_PE[,3]), as.character(table_nested_complexList_ocomm_PE[,4]))) # Predicted nested complexes found in all mips complexes (even not-nested ones) method_performance_n = similarity_complexList_to_reference(list(toupper(complexList_nests_raw_sin), toupper(complexList_nests_raw_rep), toupper(complexList_nests_lcomm_dice), toupper(complexList_nests_lcomm_PE), toupper(complexList_nests_lcomm_SA), toupper(complexList_nests_ocomm_dice), toupper(complexList_nests_ocomm_hart), toupper(complexList_nests_ocomm_PE)), mips_complexes) rownames(method_performance_n) = c("Raw_single-exp", "Raw_repeated-exp", "Dice+Linkcomm", "PE+Linkcomm", "SA+Linkcomm", "Dice+OCG", "Hart+OCG", "PE+OCG") method_performance_n # Table 3 in paper # Predicted nested complexes found in mips nested complexes method_performance_nn = similarity_complexList_to_reference(list(toupper(complexList_nests_raw_sin), toupper(complexList_nests_raw_rep), toupper(complexList_nests_lcomm_dice), toupper(complexList_nests_lcomm_PE), toupper(complexList_nests_lcomm_SA), toupper(complexList_nests_ocomm_dice), toupper(complexList_nests_ocomm_hart), toupper(complexList_nests_ocomm_PE)), complexList_nests_mips) rownames(method_performance_nn) = c("Raw_single-exp", "Raw_repeated-exp", "Dice+Linkcomm", "PE+Linkcomm", "SA+Linkcomm", "Dice+OCG", "Hart+OCG", "PE+OCG") method_performance_nn # Not in paper # 5.4. Analysis of nests before and after clustering: map_gavin_raw_sin_lcomm_dice = generate_pvalue_matrix(complexList_nests_raw_sin, complexList_nests_lcomm_dice)[[2]] similarity_gavin_raw_sin_lcomm_dice = quantify_similarity(map_gavin_raw_sin_lcomm_dice) map_gavin_raw_sin_lcomm_PE = generate_pvalue_matrix(complexList_nests_raw_sin, complexList_nests_lcomm_PE)[[2]] similarity_gavin_raw_sin_lcomm_PE = quantify_similarity(map_gavin_raw_sin_lcomm_PE) map_gavin_raw_sin_ocomm_dice = generate_pvalue_matrix(complexList_nests_raw_sin, complexList_nests_ocomm_dice)[[2]] similarity_gavin_raw_sin_ocomm_dice = quantify_similarity(map_gavin_raw_sin_ocomm_dice) map_gavin_raw_sin_ocomm_PE = generate_pvalue_matrix(complexList_nests_raw_sin, complexList_nests_ocomm_PE)[[2]] similarity_gavin_raw_sin_ocomm_PE = quantify_similarity(map_gavin_raw_sin_ocomm_PE) map_gavin_raw_rep_lcomm_dice = generate_pvalue_matrix(complexList_nests_raw_rep, complexList_nests_lcomm_dice)[[2]] similarity_gavin_raw_rep_lcomm_dice = quantify_similarity(map_gavin_raw_rep_lcomm_dice) map_gavin_raw_rep_lcomm_PE = generate_pvalue_matrix(complexList_nests_raw_rep, complexList_nests_lcomm_PE)[[2]] similarity_gavin_raw_rep_lcomm_PE = quantify_similarity(map_gavin_raw_rep_lcomm_PE) map_gavin_raw_rep_ocomm_dice = generate_pvalue_matrix(complexList_nests_raw_rep, complexList_nests_ocomm_dice)[[2]] similarity_gavin_raw_rep_ocomm_dice = quantify_similarity(map_gavin_raw_rep_ocomm_dice) map_gavin_raw_rep_ocomm_PE = generate_pvalue_matrix(complexList_nests_raw_rep, complexList_nests_ocomm_PE)[[2]] similarity_gavin_raw_rep_ocomm_PE = quantify_similarity(map_gavin_raw_rep_ocomm_PE) method_performance_ba = rbind(unlist(similarity_gavin_raw_sin_lcomm_dice), unlist(similarity_gavin_raw_sin_ocomm_dice), unlist(similarity_gavin_raw_sin_lcomm_PE), unlist(similarity_gavin_raw_sin_ocomm_PE), unlist(similarity_gavin_raw_rep_lcomm_dice), unlist(similarity_gavin_raw_rep_ocomm_dice), unlist(similarity_gavin_raw_rep_lcomm_PE), unlist(similarity_gavin_raw_rep_ocomm_PE)) method_performance_ba = cbind(method_performance_ba[,1:2], as.numeric(method_performance_ba[,1])/as.numeric(method_performance_ba[,2]), method_performance_ba[,3:4], as.numeric(method_performance_ba[,3])/as.numeric(method_performance_ba[,4])) rownames(method_performance_ba) = c("raw_sin_to_lcomm_dice", "raw_sin_to_ocomm_dice", "raw_sin_to_lcomm_PE", "raw_sin_to_ocomm_PE", "raw_rep_to_lcomm_dice", "raw_rep_to_ocomm_dice", "raw_rep_to_lcomm_PE", "raw_rep_to_ocomm_PE") colnames(method_performance_ba) = c("#_initial_mapped_to_final", "total_number_complexes_initial", "%initial_mapped_to_final", "number_final_mapped_to_initial", "total_number_complexes_final", "%final_mapped_to_initial") method_performance_ba # Hierarchical methods lost all nest info. OCG+PE and OCG+Dice can map all their findings to the raw_sin info, while 96%(PE) and 93%(Dice) of the repeated one. PE+linkcomm can map 82% single. All other combinations show poor values (lcomm+dice sin and rep, lcomm+pe repeated); therefore, OCG is more sensitive to detect the original nests. # 5.5. Additional validation functions: validation_functions = function(complexList1, complexList2) { # Reference: Brohee, 2006, Evaluation of clustering algorithms --Methods, Matching statistics N = NULL; T = matrix(nrow=length(complexList1), ncol=length(complexList2)); sensitivity = matrix(nrow=length(complexList1), ncol=length(complexList2)); complex_sensitivity = NULL for (i in 1:length(complexList1)) { complex_i = strsplit(complexList1[i], ",")[[1]] N[i] = length(complex_i) for (j in 1:length(complexList2)) { complex_j = strsplit(complexList2[j], ",")[[1]] T[i,j] = length(intersect(complex_i, complex_j)) sensitivity[i,j] = T[i,j]/N[i] } complex_sensitivity[i] = max(sensitivity[i,]) } cluster_sensitivity = sum(N * complex_sensitivity) / sum(N) Tj_vector = NULL; PPV = matrix(nrow=length(complexList1), ncol=length(complexList2)); cluster_PPV = NULL for (j in 1:length(complexList2)) { Tj = sum(T[,j]); Tj_vector = c(Tj_vector, Tj) for (i in 1:length(complexList1)) { if (Tj > 0) { PPV[i,j] = T[i,j]/Tj } else { PPV[i,j] = 0 } } cluster_PPV[j] = max(PPV[,j]) } total_PPV = sum(Tj_vector * cluster_PPV) / sum(Tj_vector) geometric_accuracy = sqrt(cluster_sensitivity * total_PPV) result = list(sensitivity=cluster_sensitivity, PPV=total_PPV, accuracy=geometric_accuracy) } matching_statistics_gavin_raw_sin_mips_n = validation_functions(toupper(complexList_nests_raw_sin), mips_complexes) matching_statistics_gavin_raw_rep_mips_n = validation_functions(toupper(complexList_nests_raw_rep), mips_complexes) matching_statistics_gavin_lcomm_dice_mips_n = validation_functions(toupper(complexList_nests_lcomm_dice), mips_complexes) matching_statistics_gavin_lcomm_PE_mips_n = validation_functions(toupper(complexList_nests_lcomm_PE), mips_complexes) matching_statistics_gavin_lcomm_SA_mips_n = validation_functions(toupper(complexList_nests_lcomm_SA), mips_complexes) matching_statistics_gavin_ocomm_dice_mips_n = validation_functions(toupper(complexList_nests_ocomm_dice), mips_complexes) matching_statistics_gavin_ocomm_hart_mips_n = validation_functions(toupper(complexList_nests_ocomm_hart), mips_complexes) matching_statistics_gavin_ocomm_PE_mips_n = validation_functions(toupper(complexList_nests_ocomm_PE), mips_complexes) matching_statistics_gavin_raw_sin_mips_n # accuracy: .22 matching_statistics_gavin_raw_rep_mips_n # accuracy: .25 matching_statistics_gavin_lcomm_dice_mips_n # accuracy: .32 matching_statistics_gavin_lcomm_PE_mips_n # accuracy: .45 + matching_statistics_gavin_lcomm_SA_mips_n # accuracy: .35 + matching_statistics_gavin_ocomm_dice_mips_n # accuracy: .25 matching_statistics_gavin_ocomm_hart_mips_n # accuracy: .38 (only 10 complexes) matching_statistics_gavin_ocomm_PE_mips_n # accuracy: .30 ######## # 6. TRIBAL ("TRIad-Based ALgorithm"): ######## # 6.1. Dice score adapted for triads: generate_complexList_from_purifications = function(gavin_purifications) { complexList_raw = NULL for (i in 1:dim(gavin_purifications)[1]) { baits = as.character(gavin_purifications[i,1]) preys = strsplit(as.character(gavin_purifications[i,2]), " ")[[1]] new_row = cbind(i, paste(sort(unique(c(baits, preys))), collapse=",")) complexList_raw = rbind(complexList_raw, new_row) } complexList_raw } generate_edgeList_triad_from_complexList = function(purifications) { purification_pairs = list(); purification_triads = list(); n=0 for (i in 1:dim(purifications)[1]){ tmp = sort(strsplit(as.character(purifications[i,2]), " ")[[1]]) #tmp2 = which(tmp == purifications[i,1]) #if (length(tmp2)>0) { tmp3 = tmp[-tmp2] } else { tmp3 = tmp } if (length(tmp)>1) { n = n+1 these_pairs = t(combn(toupper(tmp), 2)) purification_pairs[[n]] = do.call(`paste`, c(unname(data.frame(these_pairs[,1], these_pairs[,2])), list(sep=","))) purification_triads[[n]] = cbind(rep(toupper(as.character(purifications[i,1])), length(these_pairs[,1])), purification_pairs[[n]]) } } all_purification_pairs_table = unique(do.call(c, purification_pairs)) result = list(purification_pairs_per_experiment = purification_pairs, all_purification_pairs_table = all_purification_pairs_table, purification_triads = do.call(rbind, purification_triads)) } create_pulldown_matrix_tri = function(purification_pairs, all_purification_pairs_table, purification_triads) { list_experiments = unique(purification_triads[,1]) pulldown_mat = matrix(0, nrow=length(all_purification_pairs_table), ncol=length(list_experiments)) for (j in 1:length(list_experiments)) { i = which(all_purification_pairs_table %in% purification_pairs[[j]]) pulldown_mat[i,j] = 1 } rownames(pulldown_mat) = all_purification_pairs_table colnames(pulldown_mat) = list_experiments pulldown_mat } compute_dissimilarity_score = function(purification_triads, pulldown_matrix, score="dice") { D = NULL; list_experiments = unique(purification_triads[,1]) for (i in 1:dim(purification_triads)[1]) { where_i1 = which(list_experiments == purification_triads[i,1]) where_i2 = grep(purification_triads[i,2], purification_pairs) q = length(intersect(where_i1, where_i2)) r = length(setdiff(where_i1, where_i2)) s = length(setdiff(where_i2, where_i1)) if (score=="dice") { D[i] = 2*q / (2*q+r+s) } #if (score=="jaccard") { D[i] = q / (q+r+s) } #if (score=="geometric") { D[i] = log(q/(q*(r+s))) } write(D[i], file = "dice_triads.txt", append = TRUE, sep = "\t") } D } triad_algorithm = function(purifications) { complexList_raw = generate_complexList_from_purifications(purifications) tmp = generate_edgeList_triad_from_complexList(purifications) purification_pairs = tmp[[1]] all_purification_pairs_table = tmp[[2]] edgeList_triads = tmp[[3]] pulldown_matrix = create_pulldown_matrix_tri(purification_pairs, all_purification_pairs_table, edgeList_triads) dice_scores = compute_dissimilarity_score(edgeList_triads, pulldown_matrix, "dice") edgeList_dice_merged = cbind(edgeList_triads, dice_scores) } #dice_scores = read.table("dice_triads.txt", header=F, comment.char="", sep='\t', quote="") #Sys.time() edgeList_dice_full = triad_algorithm(gavin_purifications) # 12hr26' #Sys.time() # 6.2. Selection of a template method and a cutoff value for the score: # The functions: TRIBAL_PIN_analysis = function(edgeList_dice_full, vector_cutoffs, model="spoke") { edgeList_dice_hq = list(); PIN_TRIBAL = list(); the_categories = list(); n=0 for (i in vector_cutoffs) { n = n+1 edgeList_dice_tmp = edgeList_dice_full[-which(as.numeric(edgeList_dice_full[,3]) <= i),] edgeList_dice_2 = NULL for (j in 1:dim(edgeList_dice_tmp)[1]) { tmp = strsplit(as.character(edgeList_dice_tmp[j,2]), ",")[[1]] these_edges = rbind(c(as.character(edgeList_dice_tmp[j,1]), tmp[1], edgeList_dice_tmp[j,3]), c(as.character(edgeList_dice_tmp[j,1]), tmp[2], edgeList_dice_tmp[j,3])) edgeList_dice_2 = rbind(edgeList_dice_2, these_edges) } edgeList_dice_hq[[n]] = edgeList_dice_2 PIN_TRIBAL[[n]] = graph.data.frame(as.data.frame(edgeList_dice_2), directed=FALSE) universe <- sort(union(E(PIN_PE), union(E(PIN_Dice), E(PIN_TRIBAL[[n]])))) Counts <- matrix(0, nrow=length(universe), ncol=3) colnames(Counts) <- c("PE", "Dice", "TRIBAL") for (j in 1:length(universe)) { Counts[j,1] <- universe[j] %in% E(PIN_PE) Counts[j,2] <- universe[j] %in% E(PIN_Dice) Counts[j,3] <- universe[j] %in% E(PIN_TRIBAL[[n]]) } the_categories[[n]] = vennCounts(Counts) } result = list(edgeList_dice_hq, PIN_TRIBAL, the_categories) } TRIBAL_complex_prediction = function(edgeList_dice_hq, vector_cutoffs, complexList=complexList_lcomm_PE, mm=0.5) { complexList_triad = list() for (i in 1:length(vector_cutoffs)) { # Generate edgeList only including multiple edges: edges = data.frame(edgeList_dice_hq[[i]][,1], edgeList_dice_hq[[i]][,2]) tmp = do.call(`paste`, c(unname(edges), list(sep="."))) tmp2 = table(tmp) tmp3 = names(tmp2[which(tmp2==1)]) final_edgeList = edgeList_dice_hq[[i]][-which(tmp %in% tmp3),] # Generate list of all predicted subcomplexes and all nested pairs: n=0; predicted_maximal_complex = NULL; predicted_subcomplex_interaction = list() for (j in 1:(dim(complexList)[1]-1)) { vector_j = strsplit(complexList[j,2],",")[[1]] for (k in (j+1):dim(final_edgeList)[1]) { vector_k = final_edgeList[k,1:2] if (length(vector_k) < length(vector_j)) { meetmin = length(intersect(vector_j, vector_k))/min(length(vector_j),length(vector_k)) if (meetmin > mm) { n=n+1 predicted_maximal_complex[n] = complexList[j,2] predicted_subcomplex_interaction[[n]] = vector_k } } } } unique_maximal_complexes = unique(predicted_maximal_complex) predicted_subcomplexes = NULL; predicted_nested_pairs = NULL for (j in 1:length(unique_maximal_complexes)) { tmp = predicted_subcomplex_interaction[which(predicted_maximal_complex == unique_maximal_complexes[j])] predicted_subcomplexes = c(predicted_subcomplexes, paste(sort(unique(unlist(tmp))), collapse=",")) predicted_nested_pairs = rbind(predicted_nested_pairs, c(j, paste(sort(unique(unlist(tmp))), collapse=","))) } complexList_triad[[i]] = unique(predicted_subcomplexes) } result = complexList_triad } vector_cutoffs = c(.0001, .001, .0125, .025,.05,.075,.1,.125,.15,.175,.2) # Using linkcomm: tmp = TRIBAL_PIN_analysis(edgeList_dice_full, vector_cutoffs) edgeList_dice_hq = tmp[[1]] PIN_TRIBAL = tmp[[2]] the_categories = tmp[[3]] detach(package:limma) complexList_lcomm_TRIBAL_cutoffs = TRIBAL_complex_prediction(edgeList_dice_hq, vector_cutoffs, complexList_lcomm_PE, .99) a = unlist(lapply(complexList_lcomm_TRIBAL_cutoffs, length)) b = max(which(a == max(a))) c(a[b], b) # mm=0.5 & 0.85 & 1 -> Chosen: 5 (18 predictions) complexList_lcomm_TRIBAL_cutoffs2 = TRIBAL_complex_prediction(edgeList_dice_hq, vector_cutoffs, complexList_lcomm_dice, .99) a = unlist(lapply(complexList_lcomm_TRIBAL_cutoffs2, length)) b = max(which(a == max(a))) c(a[b], b) # mm= 1 -> Chosen: 5 (17 predictions) complexList_lcomm_TRIBAL_cutoffs3 = TRIBAL_complex_prediction(edgeList_dice_hq, vector_cutoffs, complexList_lcomm_SA, .99) a = unlist(lapply(complexList_lcomm_TRIBAL_cutoffs3, length)) b = max(which(a == max(a))) c(a[b], b) # mm= 1 -> Chosen: 7 (21 predictions) # Using OCG: complexList_OCG_TRIBAL_cutoffs = TRIBAL_complex_prediction(edgeList_dice_hq, vector_cutoffs, complexList_ocomm_PE, .99) a = unlist(lapply(complexList_OCG_TRIBAL_cutoffs, length)) b = max(which(a == max(a))) c(a[b], b) # mm=0.5 & 0.85 & 1 -> Chosen: 3 (58 predictions) complexList_OCG_TRIBAL_cutoffs2 = TRIBAL_complex_prediction(edgeList_dice_hq, vector_cutoffs, complexList_ocomm_dice, .99) a = unlist(lapply(complexList_OCG_TRIBAL_cutoffs2, length)) b = max(which(a == max(a))) c(a[b], b) # mm= 1 -> Chosen: 5 (33 predictions) complexList_OCG_TRIBAL_cutoffs3 = TRIBAL_complex_prediction(edgeList_dice_hq, vector_cutoffs, complexList_ocomm_hart, .99) a = unlist(lapply(complexList_OCG_TRIBAL_cutoffs3, length)) b = max(which(a == max(a))) c(a[b], b) # mm= 1 -> Chosen: 10 (2 predictions) # Using MIPS: complexList_MIPS = cbind(1:length(mips_complexes), mips_complexes) complexList_MIPS_TRIBAL_cutoffs = TRIBAL_complex_prediction(edgeList_dice_hq, vector_cutoffs, complexList_MIPS, .99) a = unlist(lapply(complexList_MIPS_TRIBAL_cutoffs, length)) b = max(which(a == max(a))) c(a[b], b) # mm=0.5 & 0.85 & 1 -> Chosen: 9 (8 predictions) predictions_per_cutoff = cbind(vector_cutoffs, unlist(lapply(complexList_lcomm_TRIBAL_cutoffs, length)), unlist(lapply(complexList_lcomm_TRIBAL_cutoffs2, length)), unlist(lapply(complexList_lcomm_TRIBAL_cutoffs3, length)), unlist(lapply(complexList_OCG_TRIBAL_cutoffs, length)), unlist(lapply(complexList_OCG_TRIBAL_cutoffs2, length)), unlist(lapply(complexList_OCG_TRIBAL_cutoffs3, length))) colnames(predictions_per_cutoff) = c("reliab_cutoff", "lcomm-PE", "lcomm-Dice", "lcomm-SA", "OCG-PE", "OCG-Dice", "OCG-Hart") predictions_per_cutoff ############ # 7. TRIBAL evaluation using hyper-geometric comparisons: ############ TRIBAL_evaluation = function(complexList_triad, vector_cutoffs) { table_nested_complexList_triad = list(); triad_nested_groups = list(); complexList_nests_lcomm_triad = list() matching_statistics_gavin_lcomm_triad_mips = list() # Detect all predicted complexes found in all mips complexes method_performance_triad = similarity_complexList_to_reference(complexList_triad, mips_complexes) rownames(method_performance_triad) = vector_cutoffs # Compare method_performance_triad vs method_performance_all for (i in 1:length(vector_cutoffs)) { # Detect nested predicted complexes found in nested mips complexes tmp = from_complexList_to_nestedgroup(cbind(1:length(complexList_triad[[i]]), complexList_triad[[i]])) table_nested_complexList_triad[[i]] = tmp[[1]] triad_nested_groups[[i]] = tmp[[2]] complexList_nests_lcomm_triad[[i]] = unique(c(as.character(table_nested_complexList_triad[[i]][,3]), as.character(table_nested_complexList_triad[[i]][,4]))) } method_performance_triad_nest = similarity_complexList_to_reference(complexList_nests_lcomm_triad, mips_complexes) rownames(method_performance_triad_nest) = vector_cutoffs # Compare method_performance_triad_nest vs method_performance_n for (i in 1:length(vector_cutoffs)) { matching_statistics_gavin_lcomm_triad_mips[[i]] = validation_functions(complexList_nests_lcomm_triad[[i]], mips_complexes) } result = list(method_performance_triad = method_performance_triad, table_nested_complexList_triad = table_nested_complexList_triad, triad_nested_groups = triad_nested_groups, complexList_nests_lcomm_triad = complexList_nests_lcomm_triad, method_performance_triad_nest = method_performance_triad_nest, matching_statistics_gavin_lcomm_triad_mips = matching_statistics_gavin_lcomm_triad_mips) } # Using linkcomm-PE: tmp = TRIBAL_evaluation(complexList_lcomm_TRIBAL_cutoffs, vector_cutoffs) method_performance_triad = tmp[[1]] table_nested_complexList_triad = tmp[[2]] triad_nested_groups = tmp[[3]] complexList_nests_lcomm_triad = tmp[[4]] method_performance_triad_nest = tmp[[5]] # Supplementary Table 1 in paper matching_statistics_gavin_lcomm_triad_mips = tmp[[6]] # Supplementary Table 2 in paper # Chosen cutoff: 0.05 (100%comp mapped to ref, 6%ref mapped to comp, 79.3%sensit, 55.5%accurac) prediction_PIN = PIN_TRIBAL[[5]] # 271x1430 (Approx.1/10th of other nets) prediction_subcomplexes = complexList_triad[[5]] # 18 prediction_nested_pairs_lcomm = table_nested_complexList_triad[[5]] # 13 prediction_nested_groups_lcomm = triad_nested_groups[[5]] # 5 prediction_subcomplexes_nested_groups_lcomm = complexList_nests_lcomm_triad[[5]] # 14 (more) # Venn diagram of the nested groups: make_Venn = function(list_vector) { if (length(list_vector)==3) { universe <- sort(union(list_vector[[1]], union(list_vector[[2]], list_vector[[3]]))) Counts <- matrix(0, nrow=length(universe), ncol=3) colnames(Counts) <- c("Maximal", "SC1", "SC2") for (j in 1:length(universe)) { Counts[j,1] <- universe[j] %in% list_vector[[1]] Counts[j,2] <- universe[j] %in% list_vector[[2]] Counts[j,3] <- universe[j] %in% list_vector[[3]] } } if (length(list_vector)==2) { universe <- sort(union(list_vector[[1]], list_vector[[2]])) Counts <- matrix(0, nrow=length(universe), ncol=2) colnames(Counts) <- c("Maximal", "SubComp") for (j in 1:length(universe)) { Counts[j,1] <- universe[j] %in% list_vector[[1]] Counts[j,2] <- universe[j] %in% list_vector[[2]] } } the_categories = vennCounts(Counts) } nested_groups = triad_nested_groups[[7]][,4] nested_groups = nested_groups[-c(2,3)] # remove cases with more than 3 sets Venn_nested_group = list() for (i in 1:length(nested_groups)) { complexes = strsplit(nested_groups[i], "|", fixed=T)[[1]] vector_subunits = list(); n=0 for (j in complexes) { n=n+1 vector_subunits[[n]] = strsplit(j, ",", fixed=T)[[1]] } Venn_nested_group[[i]] = make_Venn(vector_subunits) } postscript("fig2.eps", width = 1024, height = 512) par(mfrow=c(2,2)) for (i in 1:3) { vennDiagram(Venn_nested_group[[i]]) # Overlap behavior of three nested groups. Not in paper } dev.off() # Using OCG-PE: tmp = TRIBAL_evaluation(complexList_OCG_TRIBAL_cutoffs, vector_cutoffs) method_performance_triad_OCG = tmp[[1]] table_nested_complexList_triad_OCG = tmp[[2]] triad_nested_groups_OCG = tmp[[3]] complexList_nests_OCG_triad = tmp[[4]] method_performance_triad_nest_OCG = tmp[[5]] # Supplementary Table 3 in paper matching_statistics_gavin_OCG_triad_mips = tmp[[6]] # Supplementary Table 4 in paper # Chosen cutoff: 0.175 (95%comp mapped to ref, 13%ref mapped to comp, 67% sensit, 45%accurac) prediction_nested_pairs_OCG = table_nested_complexList_triad_OCG[[10]] # 21 prediction_subcomplexes_OCG = unique(prediction_nested_pairs_OCG[,4]) # 9 prediction_nested_groups_OCG = triad_nested_groups_OCG[[10]] # 7 prediction_subcomplexes_nested_groups_OCG = complexList_nests_OCG_triad[[10]] # 16 (more) # Using MIPS: tmp = TRIBAL_evaluation(complexList_MIPS_TRIBAL_cutoffs, vector_cutoffs) method_performance_triad_MIPS = tmp[[1]] # 100% comp mapped to reference; 5% reference mapped to complexes table_nested_complexList_triad_MIPS = tmp[[2]] # 2 triad_nested_groups_MIPS = tmp[[3]] # 2 nested groups complexList_nests_MIPS_triad = tmp[[4]] # 5 (using [[9]]) method_performance_triad_nest_MIPS = tmp[[5]] # cutoff=.15: 100% comp mapped to ref and 3.7%ref mapped to comp matching_statistics_gavin_MIPS_triad_mips = tmp[[6]] # cutoff=.15: sens=1, accur: .72 # Chosen cutoff: 0.15 prediction_nested_pairs_MIPS = table_nested_complexList_triad_MIPS[[11]] # 2 prediction_subcomplexes_MIPS = unique(prediction_nested_pairs_MIPS[,4]) # 1 prediction_nested_groups_MIPS = triad_nested_groups_MIPS[[11]] # 2 prediction_subcomplexes_nested_groups_MIPS = complexList_nests_MIPS_triad[[11]] # 3 # Summary table -Precision and predictions per cutoff: a = similarity_complexList_to_reference(complexList_lcomm_TRIBAL_cutoffs, mips_complexes) b = similarity_complexList_to_reference(complexList_lcomm_TRIBAL_cutoffs2, mips_complexes) c = similarity_complexList_to_reference(complexList_lcomm_TRIBAL_cutoffs3, mips_complexes) d = similarity_complexList_to_reference(complexList_OCG_TRIBAL_cutoffs, mips_complexes) e = similarity_complexList_to_reference(complexList_OCG_TRIBAL_cutoffs2, mips_complexes) f = similarity_complexList_to_reference(complexList_OCG_TRIBAL_cutoffs3, mips_complexes) predictions_per_cutoff = cbind(vector_cutoffs, a[,1], b[,1], c[,1], d[,1], e[,1], f[,1]) precision_per_cutoff = cbind(vector_cutoffs, a[,3], b[,3], c[,3], d[,3], e[,3], f[,3]) colnames(predictions_per_cutoff) = c("reliab_cutoff", "lcomm-PE", "lcomm-Dice", "lcomm-SA", "OCG-PE", "OCG-Dice", "OCG-Hart") colnames(precision_per_cutoff) = c("reliab_cutoff", "lcomm-PE", "lcomm-Dice", "lcomm-SA", "OCG-PE", "OCG-Dice", "OCG-Hart") predictions_per_cutoff # Table 4 in paper precision_per_cutoff # Table 5 in paper ############## # 8. Analysis of communities/predicted complexes --meetmin method: ############## meetmin_complexList_to_reference = function(list_complexList, list_names, template_mips, cutoff=0.5) { validation_table = NULL; validated_subcomplexes = NULL for (h in 1:length(list_complexList)) { tmp = NULL; n = NULL complexList = list_complexList[[h]] if (length(complexList)>0) { for (i in 1:(length(complexList)-1)) { complex_i = strsplit(as.character(complexList[i]), ",", fixed=TRUE)[[1]] if (length(complex_i) > 2) { for (j in (i+1):length(template_mips)) { complex_j = strsplit(as.character(template_mips[j]), ",", fixed=TRUE)[[1]] if (length(complex_i)= cutoff) { tmp = c(tmp, paste(sort(complex_i), collapse=",")) } } } } } } if (length(tmp)>0) { validated_subcomplexes = unique(tmp) } else { validated_subcomplexes = NULL } this_row = c(list_names[[h]], length(complexList), length(unique(n)), length(validated_subcomplexes), length(validated_subcomplexes)*100/length(unique(n)), paste(validated_subcomplexes, collapse="|")) validation_table = rbind(validation_table, this_row) } rownames(validation_table) = NULL validation_table } do_meetmin_validation_TRIBAL = function(TRIBAL_template_set, reference_set, cutoff_value=0.5) { meetmin_validation = meetmin_complexList_to_reference(list(unique(table_nested_complexList_Dice_h[,4]), unique(table_nested_complexList_Hart[,4]), unique(table_nested_complexList_PE[,4]), unique(table_nested_complexList_SA[,4]), unique(table_nested_complexList_lcomm_dice[,4]), unique(table_nested_complexList_lcomm_PE[,4]), unique(table_nested_complexList_lcomm_SA[,4]), unique(table_nested_complexList_ocomm_dice[,4]), unique(table_nested_complexList_ocomm_hart[,4]), unique(table_nested_complexList_ocomm_PE[,4]), TRIBAL_template_set), list("Dice-H", "Hart-H", "PE-H", "SA-H", "Dice-lcomm", "PE-lcomm", "SA-lcomm", "Dice-ocomm", "Hart-ocomm", "PE-ocomm", "TRIBAL"), reference_set, cutoff_value) colnames(meetmin_validation) = c("complex_list", "# subcomplexes", "# able-to-fit-in-template", "# validated subcomplexes", "% validated subcomplexes", "validated subcomplexes") result = meetmin_validation } # Type I validation: meetmin_validation_lcomm_PE = do_meetmin_validation_TRIBAL(unique(table_nested_complexList_triad[[7]][,4]), mips_complexes, 0.4) meetmin_validation_lcomm_PE = do_meetmin_validation_TRIBAL(complexList_lcomm_TRIBAL_cutoffs[[5]], mips_complexes, 0.4) # 60% (9/15) meetmin_validation_lcomm_dice = do_meetmin_validation_TRIBAL(complexList_lcomm_TRIBAL_cutoffs2[[5]], mips_complexes, 0.4) # 27% (3/11) meetmin_validation_lcomm_SA = do_meetmin_validation_TRIBAL(complexList_lcomm_TRIBAL_cutoffs3[[7]], mips_complexes, 0.4) # 40% (6/15) meetmin_validation_ocomm_PE = do_meetmin_validation_TRIBAL(complexList_OCG_TRIBAL_cutoffs[[3]], mips_complexes, 0.4) # 27% (11/41) meetmin_validation_ocomm_dice = do_meetmin_validation_TRIBAL(complexList_OCG_TRIBAL_cutoffs2[[5]], mips_complexes, 0.4) # 28% (6/21) meetmin_validation_ocomm_hart = do_meetmin_validation_TRIBAL(complexList_OCG_TRIBAL_cutoffs3[[10]], mips_complexes, 0.4) # 0% (0/0) meetmin_validation_MIPS = do_meetmin_validation_TRIBAL(complexList_MIPS_TRIBAL_cutoffs[[9]], mips_complexes, 0.4) # 80% (4/5) # Type II validation: mm_dice_h = unique(c(as.character(table_nested_complexList_Dice_h[,3]), as.character(table_nested_complexList_Dice_h[,4]))) mm_hart_h = unique(c(as.character(table_nested_complexList_Hart[,3]), as.character(table_nested_complexList_Hart[,4]))) mm_PE_h = unique(c(as.character(table_nested_complexList_PE[,3]), as.character(table_nested_complexList_PE[,4]))) mm_SA_h = unique(c(as.character(table_nested_complexList_SA[,3]), as.character(table_nested_complexList_SA[,4]))) mm_dice_lcomm = unique(c(as.character(table_nested_complexList_lcomm_dice[,3]), as.character(table_nested_complexList_lcomm_dice[,4]))) mm_PE_lcomm = unique(c(as.character(table_nested_complexList_lcomm_PE[,3]), as.character(table_nested_complexList_lcomm_PE[,4]))) mm_SA_lcomm = unique(c(as.character(table_nested_complexList_lcomm_SA[,3]), as.character(table_nested_complexList_lcomm_SA[,4]))) mm_dice_ocomm = unique(c(as.character(table_nested_complexList_ocomm_dice[,3]), as.character(table_nested_complexList_ocomm_dice[,4]))) mm_hart_ocomm = unique(c(as.character(table_nested_complexList_ocomm_hart[,3]), as.character(table_nested_complexList_ocomm_hart[,4]))) mm_PE_ocomm = unique(c(as.character(table_nested_complexList_ocomm_PE[,3]), as.character(table_nested_complexList_ocomm_PE[,4]))) mm_TRIBAL = unique(c(as.character(table_nested_complexList_triad[[7]][,3]), as.character(table_nested_complexList_triad[[7]][,4]))) #meetmin_validation2 = meetmin_complexList_to_reference(list(mm_dice_h, mm_hart_h, mm_PE_h, mm_SA_h, mm_dice_lcomm, mm_PE_lcomm, mm_SA_lcomm, mm_dice_ocomm, mm_hart_ocomm, mm_PE_ocomm, mm_TRIBAL), list("Dice-H", "Hart-H", "PE-H", "SA-H", "Dice-lcomm", "PE-lcomm", "SA-lcomm", "Dice-ocomm", "Hart-ocomm", "PE-ocomm", "TRIBAL"), mips_complexes, 0.4) #colnames(meetmin_validation2) = c("complex_list", "# subcomplexes", "# able-to-fit-in-template", "# validated subcomplexes", "% validated subcomplexes", "validated subcomplexes") #meetmin_validation2 # Final plot: percentage_dice_lcomm_1 = NULL; percentage_PE_lcomm_1 = NULL; percentage_SA_lcomm_1 = NULL; percentage_dice_ocomm_1 = NULL; percentage_hart_ocomm_1 = NULL; percentage_PE_ocomm_1 = NULL; percentage_TRIBAL_1 = NULL; percentage_dice_lcomm_2 = NULL; percentage_PE_lcomm_2 = NULL; percentage_SA_lcomm_2 = NULL; percentage_dice_ocomm_2 = NULL; percentage_hart_ocomm_2 = NULL; percentage_PE_ocomm_2 = NULL; percentage_TRIBAL_2 = NULL for (cutoff in c(0.01, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1)) { meetmin_validation = do_meetmin_validation_TRIBAL(complexList_lcomm_TRIBAL_cutoffs[[5]], mips_complexes, cutoff) percentage_dice_lcomm_1 = c(percentage_dice_lcomm_1, meetmin_validation[5,4]) percentage_PE_lcomm_1 = c(percentage_PE_lcomm_1, meetmin_validation[6,4]) percentage_SA_lcomm_1 = c(percentage_SA_lcomm_1, meetmin_validation[7,4]) percentage_dice_ocomm_1 = c(percentage_dice_ocomm_1, meetmin_validation[8,4]) percentage_hart_ocomm_1 = c(percentage_hart_ocomm_1, meetmin_validation[9,4]) percentage_PE_ocomm_1 = c(percentage_PE_ocomm_1, meetmin_validation[10,4]) percentage_TRIBAL_1 = c(percentage_TRIBAL_1, meetmin_validation[11,4]) percentage_dice_lcomm_2 = c(percentage_dice_lcomm_2, meetmin_validation[5,5]) percentage_PE_lcomm_2 = c(percentage_PE_lcomm_2, meetmin_validation[6,5]) percentage_SA_lcomm_2 = c(percentage_SA_lcomm_2, meetmin_validation[7,5]) percentage_dice_ocomm_2 = c(percentage_dice_ocomm_2, meetmin_validation[8,5]) percentage_hart_ocomm_2 = c(percentage_hart_ocomm_2, meetmin_validation[9,5]) percentage_PE_ocomm_2 = c(percentage_PE_ocomm_2, meetmin_validation[10,5]) percentage_TRIBAL_2 = c(percentage_TRIBAL_2, meetmin_validation[11,5]) } postscript("fig2.eps") # Fig.2 in paper par(mfrow=c(1,2)) plot(c(0.01, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1), percentage_dice_lcomm_1, type="o", col="blue", xlab="meet-min index", ylab="# validated subcomplexes", ylim=c(0,35)) legend("topright", c("linkcomm-Dice", "linkcomm-PE", "linkcomm-SA", "OCG-Dice", "OCG-Hart", "OCG-PE", "TRIBAL"), fill=c("blue", "black", "purple", "orange", "green", "lightblue", "red")) lines(c(0.01, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1), percentage_PE_lcomm_1, type="o", col="black") lines(c(0.01, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1), percentage_SA_lcomm_1, type="o", col="purple") lines(c(0.01, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1), percentage_dice_ocomm_1, type="o", col="orange") lines(c(0.01, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1), percentage_hart_ocomm_1, type="o", col="green") lines(c(0.01, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1), percentage_PE_ocomm_1, type="o", col="lightblue") lines(c(0.01, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1), percentage_TRIBAL_1, type="o", col="red") plot(c(0.01, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1), percentage_dice_lcomm_2, type="o", col="blue", xlab="meet-min index", ylab="% validated subcomplexes", ylim=c(0,100)) legend("topright", c("linkcomm-Dice", "linkcomm-PE", "linkcomm-SA", "OCG-Dice", "OCG-Hart", "OCG-PE", "TRIBAL"), fill=c("blue", "black", "purple", "orange", "green", "lightblue", "red")) lines(c(0.01, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1), percentage_PE_lcomm_2, type="o", col="black") lines(c(0.01, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1), percentage_SA_lcomm_2, type="o", col="purple") lines(c(0.01, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1), percentage_dice_ocomm_2, type="o", col="orange") lines(c(0.01, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1), percentage_hart_ocomm_2, type="o", col="green") lines(c(0.01, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1), percentage_PE_ocomm_2, type="o", col="lightblue") lines(c(0.01, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1), percentage_TRIBAL_2, type="o", col="red") dev.off() ############## # 9. Analysis of examples: ############## # 9.1. List of some complexes curated from literature (extracted from nested MIPS and predicted nested): tim22_complex = c("TIM10, TIM12, TIM18, TIM22, TIM54, TIM9") tim9_tim10_complex = c("TIM10, TIM9") # table_nested_complexList_mips[1,4] tim22_complex_MIPS = c("TIM10,TIM12,TIM22,TIM54,TIM9") # table_nested_complexList_mips[1,3] CCR4_NOT_complex = c("CAF130, CAF16, CAF4, CAF40, CCR4, CDC36, CDC39, MOT2, NOT3, NOT5, POP2") CCR4_complex_MIPS = c("CAF16,CAF4,CCR4,CDC36,CDC39,DBF2,DHH1,IBA57,MOB1,MOT2,NOT3,NOT5,POP2") # table_nested_complexList_mips[2,3] NOT_complex_MIPS = c("CDC36, CDC39, MOT2, NOT3, NOT5") # table_nested_complexList_mips[2,4] holo_TFIIH_complex = c("CCL1, KIN28, RAD3, SSL1, SSL2, TFB1, TFB2, TFB3, TFB4") TFIIK_complex = c("CCL1, KIN28, TFB3") # CTK1,CTK2,CTK3 TFIIH_complex_MIPS = c("CCL1,KIN28,RAD3,SSL1,SSL2,TFB1,TFB2,TFB3,TFB4") # table_nested_complexList_mips[8,3] TFIIK_complex_MIPS = c("CCL1,KIN28") # table_nested_complexList_mips[4,4] & [8,4] SAGA_complex = c("ADA2, ADA3, CHD1, GCN5, HFI1, SGF11, SGF29, SGF73, SPT20, SPT3, SPT7, SPT8, SUS1, TAF10, TAF12, TAF5, TAF6, TAF9, TRA1, UBP8") # AT LEAST... SPT20=ADA5, HFI1=ADA1 SALSA_complex = c("ADA2, ADA3, GCN5, HFI1, SPT20, SPT3, SPT7, TAF12, TAF5, TAF6, TRA1") # AT LEAST... SPT7 (C-terminal truncated form) SLIK_complex = c("ADA2, ADA3, CHD1, GCN5, HFI1, RTG2, SGF29, SPT20, SPT3, SPT7, TAF10, TAF12, TAF5, TAF6, TAF9, TRA1, UBP8") # AT LEAST ADA_complex = c("ADA2, ADA3, AHC1, GCN5") # AT LEAST ADA_GCN5_complex = c("ADA2, ADA3, GCN5, HFI1, SPT20") # annotated as subcomplex of SAGA (http://www.uniprot.org/uniprot/P32494) #SAGA_complex_MIPS = c("ADA2,GCN5,HFI1,NGG1,SGF29,SGF73,SPT20,SPT3,SPT7,SPT8,TAF10,TAF12,TAF5,TAF6,TAF9,TRA1") # table_nested_complexList_mips[6,3] & [9,3] & [12,3] #SAGA_like_complex_MIPS = c("ADA2, SPT3, SPT7, TAF6, TRA1") # table_nested_complexList_mips[12,4] #ADA_complex_MIPS = c("ADA2, GCN5, HFI1, NGG1, SPT20, TRA1") table_nested_complexList_mips[5,3] & [9,4] #ADA_like_complex_MIPS = c("GCN5,SPT20") # table_nested_complexList_mips[5,4] & [6,4] ######### (can detect linkcomm or OCG any of these nests inside nests??) origin_recognition_complex = c("ORC1, ORC2, ORC3, ORC4, ORC5, ORC6") #also: post-replication complex? polymerase_delta = c("POL3, POL31, POL32") # table_nested_complexList_mips[7,4] polymerase_epsilon = c("DPB2, DPB3, DPB4, POL2") mini_chromosome_maintenance_complex = c("MCM2, MCM3, MCM4, MCM5, MCM6, MCM7") pre_replication_complex = c("CDC6, CDT1, MCM2, MCM3, MCM4, MCM5, MCM6, MCM7, ORC1, ORC2, ORC3, ORC4, ORC5, ORC6") #pre_replication_complex_MIPS = c("CDC45, CDC46, CDC47, CDC54, CDC6, MCM2, MCM3, MCM6, ORC1, ORC2, ORC3, ORC4, ORC5, ORC6") #replication_complex_MIPS = c("CDC2, CDC45, CDC46, CDC47, CDC54, DPB2, DPB3, HYS2, MCM2, MCM3, MCM6, ORC1, ORC2, ORC3, ORC4, ORC5, ORC6, POL2, POL32") #replication_initiation_complex_MIPS = c("CDC7, DBF4, ORC1, ORC2, ORC3, ORC4, ORC5, ORC6") # table_nested_complexList_mips[13,3] #post_replication_complex_MIPS = c("ORC1, ORC2, ORC3, ORC4, ORC5, ORC6") # table_nested_complexList_mips[13,4] & [15,4] & [16,4] #DNA_polymerase_deltaIII_MIPS = c("CDC2, HYS2, POL32") #DNA_polymerase_epsilonII_MIPS = c("DPB2, DPB3, POL2") # table_nested_complexList_mips[14,4] #DNA_polymerase_zeta = c("REV3, REV7") ###replication_complexes_MIPS = c("EST3, PRI1, RFA3, RFC2, PRI2, EST2, CDC46, EST1, CDC45, ORC3, RAD27, POL32, HYS2, CDC6, REV7, ORC1, TOP2, POL1, RFC1, RFC4, TOP1, RFA2, RFC3, POL2, ORC5, RNH1, MCM3, MCM6, ORC6, DNA2, ECM32, RFA1, RFC5, POL4, DBF4, CDC9, CDC2, CDC7, DPB3, CDC47, POL30, ORC2, MCM2, POL12, MIP1, DPB2, ORC4, CDC54, REV3") #CDC45,DPB11,DPB2,DPB3,MCM2,MCM3,MCM4,MCM5,MCM6,MCM7,ORC1,ORC2,ORC3,ORC4,ORC5,ORC6,POL2,POL3,POL31,POL32 table_nested_complexList_mips[7,3] & [14,3]& [15,3] #CDC45,CDC6,DPB11,MCM2,MCM3,MCM4,MCM5,MCM6,MCM7,ORC1,ORC2,ORC3,ORC4,ORC5,ORC6,SLD3 # table_nested_complexList_mips[16,3] ribonuclease_P = c("POP1, POP3, POP4, POP5, POP6, POP7, POP8, RPP1, RPR2") # table_nested_complexList_mips[10,3] ribonuclease_MRP = c("POP1, POP3, POP4, POP5, POP6, POP7, POP8, RMP1, RPP1, SNM1") #ribonuclease_MRP_MIPS = c("POP1, POP3, POP4, POP5, POP6, POP7, POP8, RPP1") # table_nested_complexList_mips[10,4] tubulin_associated_proteins_MIPS = c("ASE1, ATG4, ATG8, BIK1, BIM1, CBF5, CIN2, CIN4, MHP1, RBL2, SPC98, STU1, STU2, YTM1") # table_nested_complexList_mips[11,3] TAP_like_MIPS = c("ATG4,ATG8") # table_nested_complexList_mips[11,4] #microtubules_altMIPS = c("ARP1,ASE1,ATG4,ATG8,BIK1,BIM1,CBF5,CIK1,CIN2,CIN4,CIN8,COG7,DYN1,DYN2,JNM1,KAR3,KIP1,KIP2,KIP3,LIN1,MHP1,NIP100,PAC11,RBL2,SPC98,STU1,STU2,TUB1,TUB2,TUB3,TUB4,YTM1") #cytoskeleton_altMIPS = c("ABP1,ACT1,AIP1,ARC40,ARP1,ARP2,ASE1,ATG4,ATG8,BEM1,BIK1,BIM1,BUD6,CAP1,CAP2,CBF5,CDC10,CDC11,CDC12,CDC3,CIK1,CIN2,CIN4,CIN8,COF1,COG7,CRN1,DLD2,DYN1,DYN2,GLK1,JNM1,KAR3,KIP1,KIP2,KIP3,LAS17,LIN1,MDM1,MHP1,MYO1,MYO2,MYO3,MYO4,MYO5,NIP100,OYE2,PAC11,PAN1,PFY1,RBL2,RVS167,SAC6,SHS1,SLA1,SLA2,SMY1,SMY2,SPC98,SPR28,SPR3,SRV2,STU1,STU2,TPM1,TPM2,TUB1,TUB2,TUB3,TUB4,TWF1,VRP1,YTM1") signalosome = c("CSI1,CSN9,PCI8,RPN5,RRI1,RRI2,YJR084W") # http://www.yeastgenome.org/cgi-bin/GO/goTerm.pl?goid=8180 signalosome_lcomm_PE = c("CSI1,CSN9,PCI8,RPN5,RRI1,RRI2") # table_nested_complexList_lcomm_PE[1,3] signalosome_like_lcomm_PE = c("CSI1,CSN9,PCI8,RRI1,RRI2") # table_nested_complexList_lcomm_PE[1,4] (just incomplete) origin_recognition_complex_lcomm_PE = c("ORC1,ORC2,ORC3,ORC4,ORC5,ORC6") # table_nested_complexList_lcomm_PE[2,3] ORC2,ORC3,ORC4,ORC5,ORC6 # table_nested_complexList_lcomm_PE[2,3] (just incomplete) TFIIIC_complex = c("TFC1,TFC3,TFC4,TFC6,TFC7,TFC8") # http://www.uniprot.org/uniprot/P34111 TFIIIC_complex_lcomm_PE = c("TFC1,TFC3,TFC4,TFC6,TFC7,TFC8") # table_nested_complexList_lcomm_PE[3,3] TFC1,TFC3,TFC6,TFC7,TFC8 # table_nested_complexList_lcomm_PE[3,4] (just incomplete) HD1_complex = c("HDA1,HDA2,HDA3") # http://www.ebi.ac.uk/QuickGO/GTerm?id=GO:0070823 HD1_complex_lcomm_PE = c("HDA1,HDA2,HDA3,MSH2") # table_nested_complexList_lcomm_PE[4,3] HDA1,HDA2,HDA3 # table_nested_complexList_lcomm_PE[4,4] (just incomplete) #core_HDAC_complex = c("HDAC1, HDAC2, RBBP4, RBBP7") # #SIN3_HDAC_complex = c("HDAC1, HDAC2, RBBP4, RBBP7, SIN3A, ARID4B/SAP180, SAP18, SAP30, SAP130, SUDS3/SAP45, ARID4A/RBP1, ING1") # #NuRD_complex = c("HDAC1, HDAC2, RBBP4, RBBP7, MTA2, MBD3, CHD3, CHD4") GARP/VFT_complex = c("VPS51, VPS52, VPS53, VPS54") GARP/VFT_complex_lcomm_PE = c("KAR2,VPS52,VPS53,VPS54") # table_nested_complexList_lcomm_PE[5,3] VPS52,VPS53,VPS54 # table_nested_complexList_lcomm_PE[5,4] (just incomplete) NUP84_NPC_subcomplex = c("NUP120,NUP133,NUP145C,NUP84,NUP85,SEC13,SEH1") NUP84_NPC_subcomplex_like_linkcomm_PE1 = c("NUP120,NUP145,NUP84,NUP85,SEH1") # table_nested_complexList_lcomm_PE[6,3] NUP84_NPC_subcomplex_like_linkcomm_PE2 = c("NUP120,NUP145,NUP84,NUP85,SEC13") # table_nested_complexList_lcomm_PE[7,3] NUP84_NPC_subcomplex_like_linkcomm_PE3 = c("NUP120,NUP145,NUP84,NUP85") # table_nested_complexList_lcomm_PE[6,4] & [7,4] (just incomplete) #nuclear_pore_complex = c("ASM4, CDC31, GLE1, GLE2, NDC1, NIC96, NSP1, NUP1, NUP2, NUP100, NUP116, NUP120, NUP133, NUP145, NUP157, NUP159, NUP170, NUP188, NUP192, NUP42, NUP49, NUP53, NUP57, NUP60, NUP82, NUP84, NUP85, POM152, POM34, SEH1, SEC1") GLE2_NUP116_NUP82_complex = c("GLE2,NSP1,NUP116,NUP159,NUP82") # http://www.uniprot.org/uniprot/P40066 (not as a complex) GLE2_NUP116_NUP82_complex_lcomm_PE = c("GLE2,NSP1,NUP116,NUP159,NUP82") # 11,3 GLE2_NUP116_NUP82_complex_like_lcomm_PE = c("GLE2,NUP116,NUP159,NUP82") # 11,4 #NUP82_subcomplex = c("NSP1,NUP159,NUP82") # http://www.uniprot.org/uniprot/P40066 (undetected) #subcomplex of the NPC (undetected too) NUP57_subcomplex = c("NIC96,NSP1,NUP49,NUP57") # http://www.uniprot.org/uniprot/Q02199 NUP57_subcomplex_lcomm_PE = c("NIC96,NSP1,NUP49,NUP57") # 12,3 NUP57_subcomplex_like_lcomm_PE = c("NIC96,NUP49,NUP57") # 12,4 #subcomplex of the NPC (undetected too) polymerase_alpha_primase_complex = c("POL1,POL12,PRI1,PRI2") # http://books.google.no/books?id=nXJ1Ea7IpTUC&pg=PA146&lpg=PA146&dq=POL1+PRI2+PRI1+complex&source=bl&ots=mLUqiTvux_&sig=-hlgw2Ija0JaNdjT7oMRYftsWyg&hl=en&sa=X&ei=5l1VUcbsDeGq4AS77YC4CQ&ved=0CEgQ6AEwBQ#v=onepage&q=POL1%20PRI2%20PRI1%20complex&f=false polymerase_alpha_primase_complex_lcomm_PE = c("POL1,POL12,PRI1,PRI2") # 9,3 polymerase_alpha_primase_complex_like_lcomm_PE1 = c("POL1,POP2,PRI1,PRI2") # 8,3 polymerase_alpha_primase_complex_like_lcomm_PE2 = c("POL1,PRI1,PRI2") # 8,3 & 9,3 #DNA_primase = c("PRI1,PRI2") # http://www.uniprot.org/uniprot/P10363 (undetected) #DNA_polymerase_alpha = c("POL1,POL12") # http://www.uniprot.org/uniprot/P09884 http://cat.inist.fr/?aModele=afficheN&cpsidt=15743352 (undetected) SET1_complex = c("BRE2,SDC1,SET1,SHG1,SPP1,SWD1,SWD2,SWD3") # also COMPASS complex http://www.yeastgenome.org/cgi-bin/GO/goTerm.pl?goid=48188 SET1_complex_lcomm_PE = c("BRE2,SDC1,SET1,SPP1,SWD1,SWD2") # 10,3 SET1_complex_like_lcomm_PE = c("BRE2,SDC1,SET1,SPP1,SWD1") # 10,4 THO_complex = c("HPR1,MFT1,THO2,THP2") # http://www.ncbi.nlm.nih.gov/pubmed/22314234 TREX_complex = c("HPR1,MFT1,SUB2,TEX1,THO2,THP2,YRA1") # http://www.ncbi.nlm.nih.gov/pubmed/22314234 extended_TREX_complex = c("GBP2,HPR1,HRB1,MFT1,SUB2,TEX1,THO2,THP2,YRA1") # http://www.straesser.genzentrum.lmu.de/the-function-of-trex-in-gene-expression/ http://www.pnas.org/content/101/7/1858.full (called complex and not) TREX_like_complex_lcomm_PE1 = c("HPR1,THO2,THP2,SUB2,YRA1") # 13,3 TREX_like_complex_lcomm_PE2 = c("HPR1,THO2,THP2,SUB2,HRB1") # 14,3 TREX_like_complex_lcomm_PE3 = c("HPR1,THO2,THP2,SUB2") # 13,4 & 14,4 alpha_alpha_trehalose_phosphate_synthase_complex = c("TPS1,TPS2,TPS3,TSL1") # http://www.biocyc.org/YEAST/NEW-IMAGE?type=POLYPEPTIDE&object=CPLX3O-4031 aa_trehalose_phosphate_synthase_complex_lcomm_PE = c("TPS1,TPS2,TPS3,TSL1") # 15,3 aa_trehalose_phosphate_synthase_complex_like_lcomm_PE = c("TPS2,TPS3,TSL1") # 15,4 # TSL1 has a paralog, TPS3, that arose from the whole genome duplication # TPS1_TPS2_complex = c("TPS1,TPS2") # also called trehalose biosynthetic pathway (undetected) http://www.yeastgenome.org/cgi-bin/locus.fpl?locus=YML100w signal_recognition_particle = c("SEC65,SRP14,SRP21,SRP54,SRP68,SRP72") # http://www.uniprot.org/uniprot/P38985 signal_recognition_particle_extend_lcomm_PE = c("LHP1,SEC65,SRP14,SRP21,SRP54,SRP68,SRP72") # 16,3 # LHP1 interacts with all components of SRP (68,14,72,54,21,sec65) http://thebiogrid.org/32008/summary/saccharomyces-cerevisiae/lhp1.html signal_recognition_particle_lcomm_PE = c("SEC65,SRP14,SRP21,SRP54,SRP68,SRP72") # 16,4 AP1_adaptor_complex = c("APL2,APL4,APM1,APM2,APS1") # http://www.yeastgenome.org/cgi-bin/GO/goTerm.pl?goid=30121 AP1_adaptor_complex_lcomm_PE = c("APL2,APL4,APM2,APS1") # 17,3 AP1_adaptor_complex_like_lcomm_PE = c("APL4,APM2,APS1") # 17,4 #CDC28,CKS1,CLB3,CLN1,CLN2,SIC1 # 18,3 (cell cycle, dynamic case, pathway-style, quite complex) #CKS1,CLB3,CLN1,CLN2,SIC1 # 18,4 NuA4_HAT_complex = c("ACT1,ARP4,EAF1,EAF3,EAF5,EAF6,EAF7,EPL1,ESA1,SWC4,TRA1,YAF9,YNG2") # http://www.yeastgenome.org/cgi-bin/GO/goTerm.pl?goid=35267 NuA4_HAT_complex_lcomm_PE = c("ARP4,EPL1,ESA1,EAF1,EAF3,EAF5,TRA1,YAF9") # 19,3 NuA4_HAT_complex_like_lcomm_PE = c("ARP4,EPL1,ESA1,EAF1,EAF3,EAF5,YAF9") # 19,4 #EAF3_EAF5_EAF7_subcomplex = c("EAF3,EAF5,EAF7") # http://www.plosone.org/article/info:doi/10.1371/journal.pone.0025336 (undetected) #ACT1_ARP4_SWC4_YAF9_subcomplex = c("ACT1,ARP4,SWC4,YAF9") # http://www.plosone.org/article/info:doi/10.1371/journal.pone.0025336 (undetected) #ESA1_EPL1_YNG2_EAF6_subcomplex = c("ESA1,EPL1,YNG2,EAF6") # http://www.plosone.org/article/info:doi/10.1371/journal.pone.0025336 (undetected) HATB_HAT_complex = c("HAT1,HAT2,HIF1") # http://www.yeastrc.org/pdr/viewProtein.do?id=532148 HATB_complex_lcomm_PE = c("HAT1,HAT2,HIF1") # 20,4 & 21,4 HATB_complex_like_lcomm_PE1 = c("HAT1,HAT2,HHF2,HIF1") # 20,3 HATB_complex_like_lcomm_PE2 = c("HAT1,HAT2,HIF1,HSM3,PSH1") # 21,3 #HAT1_HAT2_HAT_complex = c("HAT1,HAT2") # http://www.yeastrc.org/pdr/viewProtein.do?id=534147 # about HATs: http://younglab.wi.mit.edu/pub/HATcomplexes.html http://mcb.asm.org/content/28/7/2244.long INO80_chromatin_remodeling_complex = c("ACT1,ARP4,ARP5,ARP8,IES1,IES3,IES4,INO80,NHP10,RVB1,RVB2,TAF14") # http://www.yeastgenome.org/cgi-bin/GO/goTerm.pl?goid=31011 INO80_complex_like_lcomm_PE1 = c("ARP5,ARP8,IES1,IES5,INO80,NHP10,RVB2") # 22,3 INO80_complex_like_lcomm_PE2 = c("ARP5,ARP8,IES1,IES5,INO80,NHP10,RVB1") # 23,3 INO80_complex_like_lcomm_PE3 = c("ARP5,ARP8,IES1,IES5,INO80,NHP10,IES3") # 24,3 INO80_complex_like_lcomm_PE4 = c("ARP5,ARP8,IES1,IES5,INO80,NHP10") # 22,4 & 23,4 & 24,4 set3_complex = c("CPR1,HOS2,HOS4,HST1,SET3,SIF2,SNT1") # http://www.yeastgenome.org/cgi-bin/GO/goTerm.pl?goid=34967 set3_complex_like_lcomm_PE1 = c("KAP95,HOS2,HOS4,SET3,SIF2,SNT1,ZDS1") # 25,3 set3_complex_like_lcomm_PE2 = c("HOS2,HOS4,SET3,SIF2,SNT1,ZDS1") # 25,4 CCT_complex = c("CCT2,CCT3,CCT4,CCT5,CCT6,CCT7,CCT8,TCP1") # also: chaperonin_containing_TCP1 complex, TRiC complex, TCP1 Ring Complex http://www.yeastgenome.org/cgi-bin/GO/goTerm.pl?goid=5832 extended_CCT_complex = c("CCT2,CCT3,CCT4,CCT5,CCT6,CCT7,CCT8,PLP2,TCP1") # http://www.yeastgenome.org/cgi-bin/locus.fpl?locus=YOR281c (not referenced as a complex but interaction) CCT_complex_like_lcomm_PE1 = c("CCT2,CCT6,CCT7,PLP2") # 26,3 CCT_complex_like_lcomm_PE2 = c("CCT2,CCT7,PLP2,CCT4") # 31,3 CCT_complex_like_lcomm_PE3 = c("CCT2,CCT7,PLP2") # 26,4 & 31,4 CCR4_NOT_complex = c("CAF130, CAF16, CAF4, CAF40, CCR4, CDC36, CDC39, MOT2, NOT3, NOT5, POP2") CCR4_NOT_like_complex_lcomm_PE1 = c("CAF130,CAF40,CCR4,CDC39") # 27,3 CCR4_NOT_like_complex_lcomm_PE2 = c("CAF130,CAF40,CCR4,POP2") # 28,3 CCR4_NOT_like_complex_lcomm_PE3 = c("CAF130,CAF40,CCR4") # 27,4 & 28,4 RPD3C_HDAC = c("ASH1,CTI6,DEP1,PHO23,RPD3,RXT2,RXT3,SAP30,SDS3,SIN3,UME1,UME6") # http://www.uniprot.org/uniprot/Q08923 RPD3C_HDAC_lcomm_PE = c("CTI6,DEP1,PHO23,RXT2,RXT3,SAP30,SDS3,SIN3,UME1,UME6") # 29,3 RPD3C_HDAC_short_lcomm_PE = c("CTI6,DEP1,PHO23,RXT2,RXT3,SAP30,SDS3,UME1,UME6") # 29,4 COPI_vesicle_coat = c("COP1,GLO3,RET2,RET3,SEC21,SEC26,SEC27,SEC28") # http://www.yeastgenome.org/cgi-bin/GO/goTerm.pl?goid=30126 COPI_vesicle_coat_lcomm_PE = c("COP1,ERV41,GLO3,RET2,RET3,SEC21,SEC26,SEC27,SEC28") # 30,3 COPI_vesicle_coat_short_lcomm_PE = c("COP1,ERV41,GLO3,RET2,RET3,SEC21,SEC26,SEC28") # 30,4 #ERV41-ERV46 COMPLEX, SEC13-SEC31 COMPLEX SWI_SNF_chromatin_remodeling_complex = c("ARP7,ARP9,RTT102,SNF11,SNF12,SNF2,SNF5,SNF6,SWI1,SWI3,SWP82,TAF14") # http://www.yeastgenome.org/cgi-bin/GO/goTerm.pl?goid=16514 SWI_SNF_chromatin_remodeling_complex_lcomm_PE = c("SNF11,SNF12,SNF2,SNF5,SNF6,SWI1,SWI3,SWP82,TAF14") # 51,3 SWI_SNF_chromatin_remodeling_complex_short_lcomm_PE = c("SNF11,SNF12,SNF2,SNF5,SNF6,SWI1,SWI3,SWP82") # 51,4 save.image("TRIBAL.RData")