-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathiterate_graphs.R
119 lines (112 loc) · 5.51 KB
/
iterate_graphs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
source("id_network_analysis.R")
iterate_do_graph = function(argument_file, save_path,include_jump=TRUE,col_start = 5,col_end = 313,min_reads = 10,highly_sim_clonos = c(1),nodes_size_scaling = TRUE,include_aa_muts = TRUE, separate_graphs = FALSE){
output_path = paste0(save_path, "/Output")
dir.create(output_path,showWarnings = FALSE)
args = data.table::fread(argument_file,
header = TRUE,
sep = "\t",
stringsAsFactors = FALSE)
n = nrow(args)
# no_muts_df <- data.frame(sample_id=character(n))
aa_muts_all = data.table()
aa_muts_main_variant_all = data.table()
metric_table = data.frame(
sample_id=character(n),
main_nt_var_identity=double(n),
convergence_score= double(n),
nb_reads_most_relevant_pathway=integer(n),
nb_reads_main_nt_var=integer(n),
most_relevant_pathway_score=double(n),
nb_nodes_most_relevant_pathway=integer(n),
max_path_length=integer(n),
max_muts_length=integer(n),
end_nodes_density=double(n),
nb_end_nodes=integer(n),
nb_extra_nodes=integer(n),
nb_reads_tot=integer(n),
avg_degree=double(n),
avg_distance=integer(n),
error_type=character(n),
stringsAsFactors=FALSE)
n_col = ncol(metric_table)
for (i in 1:n){
out_file_name = paste0(save_path,'/',args$sample_id[i],'/graph_info_',args$sample_id[i],'.txt')
metric_table[i,1] = args$sample_id[i]
if (file.exists(out_file_name)){
graph_info = data.table::fread(out_file_name,
header = TRUE,
sep = "\t",
stringsAsFactors = FALSE)
metric_table[i,1:(n_col-1)] = graph_info
}
else {
error_file = paste0(save_path,'/',args$sample_id[i],'/id_and_error_type.Rda')
if (file.exists(error_file)){
load(error_file)
graph_info = id_and_error_type
# temp <- args$sample_id[i]
# temp <- as.data.frame(temp)
# colnames(temp) <- "sample_id"
# no_muts_df <- rbind(no_muts_df, temp)
}
else if (!file.exists(paste0(save_path,'/',args$sample_id[i]))){
graph_info = doGraph(args$highly_sim_clonos_file[i],args$grouped_alignment_file[i],args$sample_id[i],save_path=save_path,include_jump=include_jump,col_start = col_start, col_end = col_end, min_reads = min_reads, highly_sim_clonos = highly_sim_clonos, nodes_size_scaling = nodes_size_scaling, include_aa_muts = include_aa_muts, separate_graphs = separate_graphs)
}
else {
graph_info = c(0/0,FALSE,FALSE,FALSE)
}
if (typeof(graph_info[1])=="double"){
metric_table[i,3:(n_col-1)] = 0/0
metric_table[i,2] = graph_info[1]
type_error = as.logical(graph_info[2:length(graph_info)])
names(type_error) = names(graph_info[2:length(graph_info)])
if (type_error[1]){
metric_table$max_muts_length[i] = NA
}
else if (type_error[2]){
metric_table$max_muts_length[i] = NA
}
if (any(type_error)){
metric_table$error_type[i] = names(type_error)[which(type_error)]
}
else{
metric_table$error_type[i] = "sample_output_folder_exits_without_result_or_error_files"
}
}
else{
metric_table[i,1:(n_col-1)] = graph_info
}
}
file_aa_muts = paste0(save_path,'/',args$sample_id[i],'/aa_muts_weight_',args$sample_id[i],'.txt')
file_aa_muts_main_variant = paste0(save_path,'/',args$sample_id[i],'/aa_muts_weight_main_variant_',args$sample_id[i],'.txt')
if (file.exists(file_aa_muts)){
aa_muts = data.table::fread(file_aa_muts,
header = TRUE,
sep = "\t",
stringsAsFactors = FALSE)
aa_muts$id = rep(args$sample_id[i],nrow(aa_muts))
aa_muts_all = rbind(aa_muts_all,aa_muts)
}
if (file.exists(file_aa_muts_main_variant)){
aa_muts_main_variant = data.table::fread(file_aa_muts_main_variant,
header = TRUE,
sep = "\t",
stringsAsFactors = FALSE)
aa_muts_main_variant$id = rep(args$sample_id[i],nrow(aa_muts_main_variant))
aa_muts_main_variant_all = rbind(aa_muts_main_variant_all,aa_muts_main_variant)
}
}
write.table(metric_table, paste0(output_path,'/metric_table_all.txt'), sep = "\t", dec = ".",
row.names = FALSE, col.names = TRUE,quote = FALSE)
write.table(aa_muts_all, paste0(output_path,'/aa_muts_weight.txt'), sep = "\t", dec = ".",
row.names = FALSE, col.names = TRUE,quote = FALSE)
write.table(aa_muts_main_variant_all, paste0(output_path,'/aa_muts_weight_main_variant.txt'), sep = "\t", dec = ".",
row.names = FALSE, col.names = TRUE,quote = FALSE)
discarded_samples <- data.table::fread(paste0(output_path,'/metric_table_all.txt'), header = TRUE, sep = "\t", stringsAsFactors = FALSE)
discarded_samples <- na.omit(discarded_samples, cols=setdiff(colnames(discarded_samples),"error_type"),invert=TRUE)
discarded_samples <- as.data.frame(discarded_samples$sample_id)
colnames(discarded_samples) <- "sample_id"
write.table(discarded_samples, paste0(output_path,"/discarded_samples_table.txt"), sep = "\t", dec = ".",
row.names = FALSE, col.names = TRUE, quote = FALSE, append=FALSE)
return(metric_table)
}