Skip to content

Commit dc23d09

Browse files
author
Danielle Gruber
committed
Added files
1 parent aff9fb6 commit dc23d09

13 files changed

+982
-0
lines changed

clean_no_tumor.m

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
%% Load data
2+
no_tumor = readtable('danielle_no_tumor.xlsx');
3+
4+
%% Remove rows where column 254 = 0
5+
no_tumor(no_tumor{:,254} == 0,:) = [];
6+
7+
%% Save new file
8+
writetable(no_tumor,'no_tumor.csv')

compare_strauss_otu.m

+160
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
%% Iterate over taxa types
2+
% For each taxanomic group
3+
for n = 1:n_names
4+
5+
%% Create tables containing discordant and concordant taxa
6+
discord_tbl = table('Size',[0,3],'VariableTypes',{'string', 'double', 'double'},...
7+
'VariableNames',{'Name','Straussman','Knight'});
8+
discord_tbl1 = table('Size',[0,3],'VariableTypes',{'string', 'double', 'double'},...
9+
'VariableNames',{'Name','Straussman','Knight'});
10+
discord_tbl2 = table('Size',[0,3],'VariableTypes',{'string', 'double', 'double'},...
11+
'VariableNames',{'Name','Straussman','Knight'});
12+
concord_tbl = table('Size',[0,3],'VariableTypes',{'string', 'double', 'double'},...
13+
'VariableNames',{'Name','Straussman','Knight'});
14+
15+
%% Create table comparing sums of strauss and otu
16+
comp_tbl = table('Size',[num_unique,3],'VariableTypes', ...
17+
{'string', 'double', 'double'}, 'VariableNames', {'Class','Strauss','Knight'});
18+
19+
%% Create table comparing whether class is present or absent in strauss and otu
20+
presence_absence_tbl = table('Size',[num_unique,3],'VariableTypes', ...
21+
{'string', 'double', 'double'}, 'VariableNames', {'Class','Strauss','Knight'});
22+
23+
%% Extract measures from existing csv files
24+
strauss_measures = readcell(['taxa_measures_', lower(names{n}), '.csv']);
25+
otu_measures = readcell(['otu_overlap_measures_', lower(names{n}), '.csv']);
26+
27+
%% Extract data
28+
% Make sure all instances of unknown values are identified the same way
29+
% so separate categories aren't created
30+
strauss_measures(strcmp(strauss_measures(:,1),'Unknown')) = {['Unknown ', names{n}]};
31+
otu_measures(strcmp(otu_measures(:,1),'Unknown')) = {['Unknown ', names{n}]};
32+
33+
% Index all the classes
34+
strauss_class = strauss_measures(2:end,1);
35+
otu_class = otu_measures(2:end,1);
36+
37+
% Find the union of the strauss and otu classes
38+
% (the unique() function ensures there are no repeats)
39+
all_class = [strauss_class; otu_class];
40+
unique_class = unique(all_class);
41+
unique_class(ismissing(unique_class)) = [];
42+
43+
num_unique = length(unique_class);
44+
45+
%% Iterate over classes
46+
% For each class in the union set
47+
for u = 1:num_unique
48+
49+
% See whether class occurs in strauss and/or otu
50+
strauss_idx = strcmp(unique_class(u), strauss_measures(:,1));
51+
otu_idx = strcmp(unique_class(u), otu_measures(:,1));
52+
53+
%% Find sums
54+
% If the class doesn't exist in strauss, set sum to 0
55+
if sum(strauss_idx) == 0
56+
strauss_sum = 0;
57+
58+
% If the class does exist in strauss, sum all measures
59+
else
60+
strauss_sum = strauss_measures{strauss_idx,3};
61+
end
62+
63+
% If the class doesn't exist in otu, set sum to 0
64+
if sum(otu_idx) == 0
65+
otu_sum = 0;
66+
else
67+
68+
% If the class does exist in otu, sum all measures
69+
otu_sum = otu_measures{otu_idx,3};
70+
end
71+
72+
%% Add entries to sum comparison and presence/absence tables
73+
74+
comp_tbl{u,1} = unique_class(u);
75+
comp_tbl{u,2} = strauss_sum;
76+
comp_tbl{u,3} = otu_sum;
77+
78+
presence_absence_tbl{u,1} = unique_class(u);
79+
if strauss_sum > 0
80+
presence_absence_tbl{u,2} = 1;
81+
else
82+
presence_absence_tbl{u,2} = 0;
83+
end
84+
if otu_sum > 0
85+
presence_absence_tbl{u,3} = 1;
86+
else
87+
presence_absence_tbl{u,3} = 0;
88+
end
89+
90+
%% Add entries to concordant/discordant tables
91+
92+
% If class exists in strauss and otu, add to concordant table
93+
if (strauss_sum > 0) && (otu_sum > 0)
94+
idx = height(concord_tbl) + 1;
95+
concord_tbl{idx, 1} = unique_class(u);
96+
concord_tbl{idx, 2} = strauss_sum;
97+
concord_tbl{idx, 3} = otu_sum;
98+
else
99+
% Class does not exist in both strauss and otu, so add to
100+
% general discordant table
101+
idx = height(discord_tbl) + 1;
102+
discord_tbl{idx, 1} = unique_class(u);
103+
discord_tbl{idx, 2} = strauss_sum;
104+
discord_tbl{idx, 3} = otu_sum;
105+
106+
% If the class exists in strauss but not otu, add to discord
107+
% table 1
108+
if strauss_sum > 0
109+
idx = height(discord_tbl1) + 1;
110+
discord_tbl1{idx, 1} = unique_class(u);
111+
discord_tbl1{idx, 2} = strauss_sum;
112+
discord_tbl1{idx, 3} = otu_sum;
113+
114+
% If the class exists in otu but not strauss, add to discord
115+
% table 2
116+
else
117+
idx = height(discord_tbl2) + 1;
118+
discord_tbl2{idx, 1} = unique_class(u);
119+
discord_tbl2{idx, 2} = strauss_sum;
120+
discord_tbl2{idx, 3} = otu_sum;
121+
end
122+
end
123+
124+
125+
end
126+
127+
%% Save tables to files
128+
129+
if ~isempty(discord_tbl)
130+
temp = discord_tbl{:,2:3};
131+
temp(temp>0) = 1;
132+
discord_tbl_pa = table(discord_tbl.Name, temp(:,1),temp(:,2));
133+
discord_tbl_pa.Properties.VariableNames = discord_tbl.Properties.VariableNames;
134+
writetable(discord_tbl_pa,['new_discord_pa_', lower(names{n}), '.csv'])
135+
end
136+
137+
if ~isempty(discord_tbl1)
138+
temp = discord_tbl1{:,2:3};
139+
temp(temp>0) = 1;
140+
discord_tbl1_pa = table(discord_tbl1.Name, temp(:,1),temp(:,2));
141+
discord_tbl1_pa.Properties.VariableNames = discord_tbl1.Properties.VariableNames;
142+
writetable(discord_tbl1_pa,['new_discord1_pa_', lower(names{n}), '.csv'])
143+
end
144+
145+
if ~isempty(discord_tbl2)
146+
temp = discord_tbl2{:,2:3};
147+
temp(temp>0) = 1;
148+
discord_tbl2_pa = table(discord_tbl2.Name, temp(:,1),temp(:,2));
149+
discord_tbl2_pa.Properties.VariableNames = discord_tbl2.Properties.VariableNames;
150+
writetable(discord_tbl2_pa,['new_discord2_pa_', lower(names{n}), '.csv'])
151+
end
152+
153+
writetable(discord_tbl,['new_discord_', lower(names{n}), '.csv'])
154+
writetable(discord_tbl1,['new_discord1_', lower(names{n}), '.csv'])
155+
writetable(discord_tbl2,['new_discord2_', lower(names{n}), '.csv'])
156+
writetable(concord_tbl,['new_concord_', lower(names{n}), '.csv'])
157+
158+
writetable(comp_tbl,['compare_sums_', lower(names{n}), '.csv'])
159+
writetable(presence_absence_tbl,['compare_pa_', lower(names{n}), '.csv'])
160+
end

0 commit comments

Comments
 (0)