Skip to content

Commit f225ba8

Browse files
committed
initial submission
1 parent 7ac64cf commit f225ba8

15 files changed

+914
-1
lines changed

ConMod.m

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
function [ modulesfinal ] = ConMod( multiNetworks, N, K, lambda, xita, maxIter )
2+
% The implement of ConMod method for identifying conserved functional
3+
% modules in multiple networks
4+
%
5+
% INPUT:
6+
% multiNetworks: a cell contains multiple networks, each is
7+
% presented by a sparse matrix or a full matrix with N nodes
8+
% N: the number of all nodes
9+
% K: the number of hidden factors
10+
% lambda: a vector containing the parameters for balancing the relative
11+
% weight among different views
12+
% xita: the parameter for selecting nodes
13+
% maxIter: the maximum number of iterations for multi-view NMF
14+
%
15+
% OUTPUT:
16+
% modulesfinal: a cell contains the final conserved modules
17+
%
18+
% Peizhuo Wang ([email protected])
19+
20+
%% parameters
21+
22+
23+
%% Calculting the feature matrices
24+
disp('Calculating the strengh matrix and the uniformity matrix...')
25+
[Strength, Distribution] = featureNets(multiNetworks, N);
26+
27+
%% Obtaining the candidate modules by multi-view NMF
28+
disp('Obtaining candidate modules by multi-view NMF...')
29+
disp(['K=', num2str(K)])
30+
X = {Strength, Distribution};
31+
[ H, Hc, objValue ] = multiViewNMF( X, K, lambda, maxIter );
32+
33+
%% Selecting nodes from the consensus factors
34+
modulesfinal = moduleNodesSelection( Hc, xita );
35+
36+
end
37+

README.md

Lines changed: 0 additions & 1 deletion
This file was deleted.

README.txt

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
===============================================
2+
ConMod: Identifying conserved functional modules in multiple networks
3+
===============================================
4+
5+
#----------------------The main function---------------------------
6+
ConMod.m % The implement of ConMod method
7+
>> [ modulesfinal ] = ConMod( multiNetworks, N, K, lambda, xita, maxIter )
8+
% INPUT:
9+
% multiNetworks: a cell contains multiple networks, each is presented by a sparse matrix or a full matrix with N nodes
10+
% N: the number of all nodes
11+
% K: the number of hidden factors
12+
% lambda: a vector containing the parameters for balancing the relative
13+
% weight among different views
14+
% xita: the parameter for selecting nodes
15+
% maxIter: the maximum number of iterations for multi-view NMF
16+
%
17+
% OUTPUT:
18+
% modulesfinal: a cell contains the final conserved modules
19+
20+
21+
---------------The code for generating the synthetic datasets---------
22+
syn_dataset_common.m % Conserved modules have the same size and are common to a given set of networks
23+
syn_dataset_overlap.m % Conserved modules are present only in a subset of networks and they are the overlapping parts of specific modules across different networks
24+
25+
--------------The code for evaluation measures-------------
26+
evaluation.m % Compute the performance measures (TPR, FPR, Accuracy and MCC)
27+
28+
------------------------------------------------------------------
29+
30+
If any questions, please contact [email protected].

SNMF.m

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
function [ H,objj ] = SNMF(X, K, H_init, maxIter, epsilon)
2+
% Symmetric Non-Negtive Matrix Factorization
3+
4+
% INPUT:
5+
% X: the adjacency matrix of a network
6+
% K: the number of hidden factors
7+
% maxIter: the maximal number of iterations for alternating minimization
8+
% epsilon: the convergence parameter
9+
%
10+
% OUTPUT:
11+
% H: the factor matirx
12+
% objj: the value of objective function
13+
%
14+
% Peizhuo Wang ([email protected])
15+
16+
%% Normalize the network
17+
X = X/sqrt(trace(X'*X));
18+
19+
%% Initializaiton
20+
N = size(X,1);
21+
if isempty(H_init)
22+
H = rand(N,K);
23+
else
24+
H = H_init;
25+
end
26+
H = H/sqrt(trace(H'*H));
27+
28+
obj_old = norm(X - H*H', 'fro')^2;
29+
beta = 0.5;
30+
objj = obj_old;
31+
%% Alternating update
32+
for iter = 1:maxIter
33+
temp_1 = X*H;
34+
temp_2 = (H')*H;
35+
temp_2 = H*temp_2;
36+
H = H.*(1 - beta + beta*(temp_1./(temp_2+eps)));
37+
38+
obj = norm(X - H*H', 'fro')^2;
39+
Delta = obj_old - obj;
40+
obj_old = obj;
41+
42+
objj = [objj, obj];
43+
if Delta < epsilon
44+
break;
45+
end
46+
end
47+
48+
end

SNMFforView.m

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
function [ H ] = SNMFforView(X, Hc, H, lambda, maxIter, epsilon)
2+
% Multi-View Non-negative symmetric Matrix Factorization for each view
3+
%
4+
% INPUT:
5+
% X: the adjacency matrix of a network
6+
% Hc: initialization for consensus factor matrix
7+
% H: initialization for factor matrix of each view
8+
% lambda: a vector containing the parameters for balancing the relative
9+
% weight among different views
10+
% MaxIter: the maximal number of iterations for alternating minimization
11+
% epsilon: the convergence parameter
12+
%
13+
% OUTPUT:
14+
% H: the factor matrix
15+
%
16+
% Peizhuo Wang ([email protected])
17+
18+
obj_old = norm(X - H*H', 'fro')^2 + lambda*norm(H - Hc, 'fro')^2;
19+
20+
% Fixing Hc, minimize objective function over H
21+
for iter = 1:maxIter
22+
% Update rule
23+
temp_1 = 2*X*H + lambda*Hc;
24+
temp_2 = 2*H*(H'*H) + lambda*H;
25+
H = H.*(temp_1./(temp_2+eps));
26+
27+
% Objective function
28+
obj_body = norm(X - H*H', 'fro')^2;
29+
obj_consensus = norm(H - Hc, 'fro')^2;
30+
obj = obj_body + lambda*obj_consensus;
31+
32+
Delta = obj_old - obj;
33+
obj_old = obj;
34+
35+
if Delta < epsilon
36+
break;
37+
end
38+
end
39+
40+
end

evaluation.m

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
function [ TPR, FPR, Accuracy, MCC ] = evaluation(C_preticted, C_reference, N)
2+
% Compute the performance measures (TPR, FPR, Accuracy and MCC)
3+
%
4+
% INPUT:
5+
% C_preticted: a cell containing the preticted cluster results
6+
% C_reference: a cell containing the ground truth
7+
% N: total number of items
8+
% OUTPUT:
9+
% TPR: the True Positive Rate
10+
% FPR: the False Positive Rate
11+
% Accuracy:
12+
% MCC: the Matthews Correlation Coefficient
13+
% I: Confusion Matrix
14+
%
15+
% Peizhuo Wang ([email protected])
16+
17+
%% Confusion matrix
18+
K_preticted = length(C_preticted);
19+
K_reference = length(C_reference);
20+
Ncount_P = 0;
21+
Ncount_R = 0;
22+
c_num = zeros(K_preticted+1, K_reference+1);
23+
C = cell(K_preticted, K_reference);
24+
for i = 1:K_preticted
25+
Ncount_P = Ncount_P + length(C_preticted{i});
26+
theRef_nonoverlap = [];
27+
for j = 1:K_reference
28+
C{i, j} = intersect(C_preticted{i}, C_reference{j});
29+
c_num(i, j) = length(C{i, j});
30+
if (i == 1)
31+
Ncount_R = Ncount_R + length(C_reference{j});
32+
end
33+
theRef_nonoverlap = union(theRef_nonoverlap, C{i, j});
34+
end
35+
c_num(i, j+1) = length(C_preticted{i}) - length(theRef_nonoverlap); % Background noise nodes
36+
end
37+
for j = 1:K_reference
38+
thePre_nonoverlap = [];
39+
for i = 1:K_preticted
40+
thePre_nonoverlap = union(thePre_nonoverlap, C{i, j});
41+
end
42+
c_num(i+1, j) = length(C_reference{j}) - length(thePre_nonoverlap); % Lost reference nodes
43+
end
44+
45+
%% TP, FP, FN, TN for nodes pairs
46+
TP = sum(sum(c_num(1:K_preticted, 1:K_reference).*(c_num(1:K_preticted, 1:K_reference)-1)/2));
47+
FP = 0;
48+
for j = 1:K_reference
49+
tempC = c_num(1:K_preticted, (j+1):(K_reference+1));
50+
FP = FP + sum(c_num(1:K_preticted,j).*sum(tempC, 2));
51+
end
52+
FN = 0;
53+
for i = 1:K_preticted
54+
tempC = c_num((i+1):(K_preticted+1), 1:K_reference);
55+
FN = FN + sum(c_num(i,1:K_reference).*sum(tempC, 1));
56+
end
57+
TN = N*(N-1)/2 - (TP+FN+FP);
58+
I = [TP, FP; FN, TN];
59+
60+
%% TPR, FPR, MCC
61+
TPR = TP/(TP+FN);
62+
FPR = FP/(FP+TN);
63+
Accuracy = (TP+TN)/(TP+FP+TN+FN);
64+
MCC = (TP*TN-FP*FN)/sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN));
65+
66+
end

featureNets.m

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
function [ Strength, Participation ] = featureNets( multiNetworks, N )
2+
% Compute two feature metrices from multiple networks
3+
%
4+
% INPUT:
5+
% multiNetworks : a cell contains multiple networks, each is
6+
% presented by a sparse matrix or a full matrix
7+
% with N nodes
8+
% N : the number of all nodes
9+
%
10+
% OUTPUT:
11+
% Strength : N x N matrix for Connection Strength
12+
% Participation : N x N matrix for Participation Coefficient
13+
%
14+
% Peizhuo Wang ([email protected])
15+
16+
network_count = length(multiNetworks);
17+
[n, m] = size(multiNetworks{1});
18+
19+
Strength = zeros(N);
20+
temp = zeros(N);
21+
A = zeros(N);
22+
if (m <= 3) % Sparse matrix format
23+
for k = 1:network_count
24+
theMatrix = multiNetworks{k};
25+
[edge_count, col_count] = size(theMatrix);
26+
weight_max = 1;
27+
weight_min = 0;
28+
if (col_count == 3)
29+
weight_max = max(multiNetworks{k}(:,3));
30+
end
31+
for e = 1:edge_count
32+
ii = theMatrix(e, 1);
33+
jj = theMatrix(e, 2);
34+
if (col_count == 3) % weighted network
35+
weight = abs(theMatrix(e, 3));
36+
else
37+
weight = 1; % unweighted network
38+
end
39+
40+
Strength(ii, jj) = Strength(ii ,jj) + weight;
41+
Strength(jj, ii) = Strength(ii, jj);
42+
43+
weight_1 = (weight - weight_min) / (weight_max-weight_min);
44+
weight_2 = 1/(1+exp(log(9999)-2*log(9999)*weight_1));
45+
if (weight_1 <= 0.3)
46+
weight_2 = 0;
47+
end
48+
49+
A(ii, jj) = A(ii, jj) + weight_2;
50+
A(jj, ii) = A(ii, jj);
51+
temp(ii, jj) = temp(ii, jj) + weight_2^2;
52+
temp(jj, ii) = temp(ii, jj);
53+
end
54+
end
55+
elseif (m == n) % Full matrix format
56+
N = n;
57+
for k = 1:network_count
58+
% The edge weight is transformed using a logistic function, such that for the
59+
% element less than 0.3, we make it close to 0; for the element more than
60+
% 0.6, we make it close to 1.
61+
weight_max = max(max(multiNetworks{k}));
62+
weight_min = 0;
63+
matrix_weight_1 = (multiNetworks{k} - ones(N)*weight_min) ./ (weight_max-weight_min);
64+
matrix_weight_2 = 1./(1+exp(log(9999)-2*log(9999)*matrix_weight_1));
65+
66+
matrix_weight_2(matrix_weight_1 <= 0.3) = 0;
67+
A = A + matrix_weight_2;
68+
temp = temp + matrix_weight_2.^2;
69+
Strength = Strength + multiNetworks{k};
70+
end
71+
end
72+
73+
Participation = (network_count/(network_count-1)) * (1-(temp./(A.^2)));
74+
Participation(isinf(Participation)) = 0;
75+
Participation(isnan(Participation)) = 0;
76+
Participation = Participation - diag(diag(Participation));
77+
78+
Strength = A./network_count;
79+
Strength = Strength - diag(diag(Strength));
80+
81+
end

main_run.m

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
clear
2+
clc
3+
%% Load multi-network data sets
4+
disp('Processing the networks...')
5+
6+
% I: Load data from files
7+
% nets = importdata('./networklist.txt');
8+
% T = length(nets);
9+
% multiNetworks = cell(T, 1);
10+
% for i = 1:T
11+
% disp(['network: ', num2str(i)])
12+
% multiNetworks{i} = load(['./', nets{i}]);
13+
% end
14+
% realLabels = importdata('./labels.txt');
15+
16+
% II: Automatically generated synthetic datasets
17+
%[multiNetworks, realLabels] = syn_dataset_common(0.5, false); % common type
18+
[multiNetworks, realLabels, lables_specific] = syn_dataset_overlap(0.3, false); % overlap type
19+
num_Nodes = 500;
20+
21+
%% One-step finding conserved functional modules
22+
tic
23+
K = 5;
24+
lambda = [0.01, 0.05];
25+
xita = 2;
26+
maxIter = 50;
27+
modules = ConMod( multiNetworks, num_Nodes, K, lambda, xita, maxIter );
28+
runtime = toc;
29+
disp(['Done. Running time: ', num2str(runtime), ' sec.'])
30+
31+
%% Step-by-step finding conserved functional modules
32+
% Calculting the feature networks
33+
% tic
34+
% disp('Calculating the strengh matrix and the uniformity matrix...')
35+
% [Strength, Participation] = featureNets(multiNetworks, num_Nodes);
36+
%
37+
% % Obtaining the candidate modules by multi-view NMF
38+
% disp('Obtaining candidate modules by multi-view NMF...')
39+
% K = 5;
40+
% disp(['K=', num2str(K)])
41+
% X = {Strength, Participation};
42+
% lambda = [0.01, 0.05];
43+
% [ H, Hc, objValue ] = multiViewNMF( X, K, lambda, 50 );
44+
%
45+
% % Selecting nodes from the consensus factors
46+
% xita = 1.5;
47+
% modules_final = modulesTruing( Hc, xita );
48+
% runtime = toc;
49+
% disp(['Running time: ', num2str(runtime), ' sec.'])
50+
51+
%% Module validation
52+
% disp('Validation...')
53+
% [ pvalues_modulePerNet, FDR2 ] = significantModules(modules_Merged, multiNetworks, num_Nodes);
54+
% disp('Done.')
55+
56+
%% Clustering performance
57+
[ TPR, FPR, Accuracy, MCC] = evaluation(modules, realLabels, num_Nodes);

0 commit comments

Comments
 (0)