Skip to content

Commit 73903de

Browse files
committed
Update scripts
1 parent d8f92b0 commit 73903de

File tree

2 files changed

+258
-233
lines changed

2 files changed

+258
-233
lines changed

import-db-neo4j

Lines changed: 117 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -10,120 +10,95 @@ DROP INDEX organizationsByID IF EXISTS;
1010
DROP INDEX initiativesByID IF EXISTS;
1111
DROP INDEX personsByName IF EXISTS;
1212
DROP INDEX papersByName IF EXISTS;
13+
DROP INDEX instByName IF EXISTS;
1314

1415
//import nodes
1516

16-
/////////////////////////////////////////////IMPORT Projects
17-
LOAD CSV WITH HEADERS FROM "https://docs.google.com/spreadsheets/d/e/2PACX-1vSRC8OGwpbeRPYsPLaHPRCFX1-RSfhs8SW1kCfPPXyQovZEd7hstZt_ihj0lsUg9QuL2MnYJ90dttq7/pub?gid=797434240&single=true&output=csv" AS row
18-
19-
CALL apoc.create.node([row.Label], { //Project
20-
ProjectID: row.ProjectID,
21-
Description: row.Description,
22-
//License
23-
Language: split(row.Language, ' | '),
24-
Dependency: split(row.Dependency, ' | '),
25-
Complementary: split(row.Complementary, ' | '),
26-
Enhancement: split(row.Enhancement, ' | '),
27-
Downstream: split(row.Downstream, ' | '),
28-
Alternatives: split(row.Alternatives, ' | '),
29-
`Fiscal Sponsor`: row.`Fiscal Sponsor`,
30-
`Granting Organization A | B | C`: row.`Granting Organization A | B | C`,
31-
Domain: row.Domain,
32-
Subdomain: row.Subdomain,
33-
Subfield: split(row.Subfield, ' | ')
34-
})
35-
YIELD node
36-
RETURN node;
37-
38-
/////////////////////////////////////////////IMPORT Packages
39-
LOAD CSV WITH HEADERS FROM "https://docs.google.com/spreadsheets/d/e/2PACX-1vSRC8OGwpbeRPYsPLaHPRCFX1-RSfhs8SW1kCfPPXyQovZEd7hstZt_ihj0lsUg9QuL2MnYJ90dttq7/pub?gid=815410892&single=true&output=csv" AS row
40-
41-
CALL apoc.create.node([row.Label], { //Package
42-
PackageID: row.PackageID,
43-
`Project/Package's Affiliated Projects`: split(row.`Project/Package's Affiliated Projects`, ' | '),
44-
`Sustaining/Parent`: row.`Sustaining/Parent`,
45-
Subfield: split(row.Subfield, ' | '),
46-
`Project Tags`: split(row.`Project Tags`, ' | '),
47-
Dependency: split(row.Dependency, ' | ')
48-
})
49-
YIELD node
50-
RETURN node;
51-
52-
53-
54-
/////////////////////////////////////////////IMPORT Organizations
55-
LOAD CSV WITH HEADERS FROM "https://docs.google.com/spreadsheets/d/e/2PACX-1vSRC8OGwpbeRPYsPLaHPRCFX1-RSfhs8SW1kCfPPXyQovZEd7hstZt_ihj0lsUg9QuL2MnYJ90dttq7/pub?gid=2131543771&single=true&output=csv" AS row
56-
57-
CALL apoc.create.node([row.Label], { //Organization
58-
OrganizationID: row.OrganizationID,
59-
`Consortium Affiliation`: row.`Consortium Affiliation`,
60-
`Org Type`: row.`Org Type`,
61-
`Focus Area`: split(row.`Focus Area`, ' | '),
62-
Location: row.Location,
63-
Website: row.Website,
64-
`Contact Information`: row.`Contact Information`,
65-
`Year Established`: row.`Year Established`,
66-
Status: row.Status,
67-
`Fiscal Sponsor`: row.`Fiscal Sponsor`
68-
})
69-
YIELD node
70-
RETURN node;
71-
72-
73-
74-
/////////////////////////////////////////////IMPORT Initiatives
75-
LOAD CSV WITH HEADERS FROM "https://docs.google.com/spreadsheets/d/e/2PACX-1vSRC8OGwpbeRPYsPLaHPRCFX1-RSfhs8SW1kCfPPXyQovZEd7hstZt_ihj0lsUg9QuL2MnYJ90dttq7/pub?gid=708905383&single=true&output=csv" AS row
76-
77-
CALL apoc.create.node([row.Label], { //Initiative
78-
InitiativeID: row.InitiativeID,
79-
`Affiliated Org`: row.`Affiliated Org`,
80-
`Focus Area`: split(row.`Focus Area`, ' | '),
81-
`Initiative Description`: row.`Initiative Description`,
82-
Tag: row.Tag
83-
})
84-
YIELD node
85-
RETURN node;
86-
87-
/////////////////////////////////////////////IMPORT Papers
88-
LOAD CSV WITH HEADERS FROM "https://docs.google.com/spreadsheets/d/e/2PACX-1vSRC8OGwpbeRPYsPLaHPRCFX1-RSfhs8SW1kCfPPXyQovZEd7hstZt_ihj0lsUg9QuL2MnYJ90dttq7/pub?gid=0&single=true&output=csv" AS row
89-
90-
CALL apoc.create.node([row.Label], { //Paper
91-
PaperName: row.PaperName,
92-
Authors: split(row.Authors, ' | '),
93-
//ORCID
94-
`Publication Date`: row.`Publication Date`,
95-
Journal: row.Journal,
96-
Abstract: row.Abstract,
97-
DOI: row.DOI,
98-
`Projects/Packages Cited`: split(row.`Projects/Packages Cited`, ' | '),
99-
`Granting Organization A | B | C`: split(row.`Granting Organization A | B | C`, ' | ')
100-
})
101-
YIELD node
102-
RETURN node;
103-
104-
/////////////////////////////////////////////IMPORT People
105-
LOAD CSV WITH HEADERS FROM "https://docs.google.com/spreadsheets/d/e/2PACX-1vSRC8OGwpbeRPYsPLaHPRCFX1-RSfhs8SW1kCfPPXyQovZEd7hstZt_ihj0lsUg9QuL2MnYJ90dttq7/pub?gid=324622033&single=true&output=csv" AS row
106-
107-
CALL apoc.create.node([row.Label], { //Person
108-
PersonName: row.PersonName,
109-
`Person's Associated Projects`: split(row.`Person's Associated Projects`, ' | '),
110-
`Person's Associated Packages`: split(row.`Person's Associated Packages`, ' | '),
111-
URL: row.URL
112-
})
113-
YIELD node
17+
/////////////////////////////////////////////IMPORT ALL ELEMENTS
18+
LOAD CSV WITH HEADERS FROM "" AS row
19+
20+
CALL apoc.create.node([row.Type],
21+
CASE row.Type
22+
WHEN 'Project' THEN {
23+
Name: row.Label,
24+
Description: row.Description,
25+
//License
26+
Language: split(row.Language, ' | '),
27+
Dependency: split(row.Dependency, ' | '),
28+
Complementary: split(row.Complementary, ' | '),
29+
Enhancement: split(row.Enhancement, ' | '),
30+
Downstream: split(row.Downstream, ' | '),
31+
Alternatives: split(row.Alternatives, ' | '),
32+
`Fiscal Sponsor`: row.`Fiscal Sponsor`,
33+
`Granting Organization A | B | C`: row.`Granting Organization A | B | C`,
34+
Domain: row.Domain,
35+
Subdomain: row.Subdomain,
36+
Subfield: split(row.Subfield, ' | ')
37+
}
38+
WHEN 'Package' THEN {
39+
Name: row.Label,
40+
`Project/Package's Affiliated Projects`: split(row.`Project/Package's Affiliated Projects`, ' | '),
41+
`Sustaining/Parent`: row.`Sustaining/Parent`,
42+
Subfield: split(row.Subfield, ' | '),
43+
`Project Tags`: split(row.`Project Tags`, ' | '),
44+
Dependency: split(row.Dependency, ' | ')
45+
}
46+
WHEN 'Organization' THEN {
47+
Name: row.Label,
48+
`Consortium Affiliation`: row.`Consortium Affiliation`,
49+
`Org Type`: row.`Org Type`,
50+
`Focus Area`: split(row.`Focus Area`, ' | '),
51+
Location: row.Location,
52+
Website: row.Website,
53+
`Contact Information`: row.`Contact Information`,
54+
`Year Established`: row.`Year Established`,
55+
Status: row.Status,
56+
`Fiscal Sponsor`: row.`Fiscal Sponsor`
57+
}
58+
WHEN 'Initiative' THEN {
59+
Name: row.Label,
60+
`Affiliated Org`: row.`Affiliated Org`,
61+
`Focus Area`: split(row.`Focus Area`, ' | '),
62+
`Initiative Description`: row.`Initiative Description`,
63+
Tag: row.Tag
64+
}
65+
WHEN 'Paper' THEN {
66+
Name: row.Label,
67+
Authors: split(row.Authors, ' | '),
68+
//ORCID
69+
`Publication Date`: row.`Publication Date`,
70+
Journal: row.Journal,
71+
Abstract: row.Abstract,
72+
DOI: row.DOI,
73+
`Projects/Packages Cited`: split(row.`Projects/Packages Cited`, ' | '),
74+
`Granting Organization A | B | C`: split(row.`Granting Organization A | B | C`, ' | '),
75+
`Sustainable Development Goals`: split(row.`Sustainable Development Goals`, ' | ')
76+
}
77+
WHEN 'Person' THEN {
78+
Name: row.Label,
79+
`Person's Associated Projects`: split(row.`Person's Associated Projects`, ' | '),
80+
`Person's Associated Packages`: split(row.`Person's Associated Packages`, ' | '),
81+
`Person's Associated Organizations`: split(row.`Affiliated Org`, ' | '),
82+
URL: row.URL
83+
}
84+
ELSE {
85+
Name: row.Label
86+
}
87+
END
88+
) YIELD node
11489
RETURN node;
11590

11691

11792

11893

11994
//Create Indices
120-
CREATE INDEX projectsByID FOR (n:Project) ON (n.ProjectID);
121-
CREATE INDEX packagesByID FOR (n:Package) ON (n.PackageID);
122-
CREATE INDEX organizationsByID FOR (n:Organization) ON (n.OrganizationID);
123-
CREATE INDEX initiativesByID FOR (n:Initiative) ON (n.InitiativeID);
124-
CREATE INDEX personsByName FOR (n:Person) ON (n.PersonName);
125-
CREATE INDEX papersByName FOR (n:Paper) ON (n.PaperName);
126-
95+
CREATE INDEX projectsByID FOR (n:Project) ON (n.Name);
96+
CREATE INDEX packagesByID FOR (n:Package) ON (n.Name);
97+
CREATE INDEX organizationsByID FOR (n:Organization) ON (n.Name);
98+
CREATE INDEX initiativesByID FOR (n:Initiative) ON (n.Name);
99+
CREATE INDEX personsByName FOR (n:Person) ON (n.Name);
100+
CREATE INDEX papersByName FOR (n:Paper) ON (n.Name);
101+
CREATE INDEX instByName FOR (n:Institution) ON (n.Name);
127102
CALL db.awaitIndexes();
128103

129104

@@ -137,86 +112,86 @@ CALL db.awaitIndexes();
137112
MATCH (n0:Project)
138113
UNWIND n0.`Fiscal Sponsor` as i
139114
MATCH (n1:Organization)
140-
WHERE n1.OrganizationID = i
115+
WHERE n1.Name = i
141116
MERGE (n0)-[:FISCALLY_SPONSORED_BY]->(n1);
142117

143118
// Project -> DEPENDS_ON -> Project
144119
MATCH (n0:Project)
145120
UNWIND n0.Dependency as i
146121
MATCH (n1:Project)
147-
WHERE n1.ProjectID = i
122+
WHERE n1.Name = i
148123
MERGE (n0)-[:DEPENDS_ON]->(n1);
149124

150125
// Project -> DEPENDS_ON -> Package
151126
MATCH (n0:Project)
152127
UNWIND n0.Dependency as i
153128
MATCH (n1:Package)
154-
WHERE n1.PackageID = i
129+
WHERE n1.Name = i
155130
MERGE (n0)-[:DEPENDS_ON]->(n1);
156131

157132
// Project -> COMPLEMENTS -> Project
158133
MATCH (n0:Project)
159134
UNWIND n0.Complementary as i
160135
MATCH (n1:Project)
161-
WHERE n1.ProjectID = i
136+
WHERE n1.Name = i
162137
MERGE (n0)-[:COMPLEMENTS]->(n1);
163138

164139
// Project -> ENHANCES -> Project
165140
MATCH (n0:Project)
166141
UNWIND n0.Enhancement as i
167142
MATCH (n1:Project)
168-
WHERE n1.ProjectID = i
143+
WHERE n1.Name = i
169144
MERGE (n0)-[:ENHANCES]->(n1);
170145

171146
// Project <- DOWNSTREAM_OF <- Project
172147
MATCH (n0:Project)
173148
UNWIND n0.Downstream as i
174149
MATCH (n1:Project)
175-
WHERE n1.ProjectID = i
150+
WHERE n1.Name = i
176151
MERGE (n1)-[:DOWNSTREAM_OF]->(n0);
177152

178153
// Project <- ALTERNATIVE_TO <- Project
179154
MATCH (n0:Project)
180155
UNWIND n0.Alternatives as i
181156
MATCH (n1:Project)
182-
WHERE n1.ProjectID = i
157+
WHERE n1.Name = i
183158
MERGE (n1)-[:DOWNSTREAM_OF]->(n0);
184159

185160
/////////////////////////////////////////// Package ->
186161
// Package -> AFFILIATED_PROJECT -> Project
187162
MATCH (n0:Package)
188163
UNWIND n0.`Project/Package's Affiliated Projects` as i
189164
MATCH (n1:Project)
190-
WHERE n1.ProjectID = i
165+
WHERE n1.Name = i
191166
MERGE (n0)-[:AFFILIATED_PROJECT]->(n1);
192167

193168
// Package -> DEPENDS_ON -> Package
194169
MATCH (n0:Package)
195170
UNWIND n0.Dependency as i
196171
MATCH (n1:Package)
197-
WHERE n1.PackageID = i
172+
WHERE n1.Name = i
198173
MERGE (n0)-[:DEPENDS_ON]->(n1);
199174

200175
// Package -> DEPENDS_ON -> Project
201176
MATCH (n0:Package)
202177
UNWIND n0.Dependency as i
203178
MATCH (n1:Project)
204-
WHERE n1.ProjectID = i
179+
WHERE n1.Name = i
205180
MERGE (n0)-[:DEPENDS_ON]->(n1);
206181

207182
/////////////////////////////////////////// Organization ->
208183
// Organization -> FISCALLY_SPONSORED_BY -> Organization
209184
MATCH (n0:Organization)
210185
UNWIND n0.`Fiscal Sponsor` as i
211186
MATCH (n1:Organization)
212-
WHERE n1.OrganizationID = i
187+
WHERE n1.Name = i
213188
MERGE (n0)-[:FISCALLY_SPONSORED_BY]->(n1);
214189

215190
// Organization -> AFFILIATED_WITH_CONSORTIUM -> Organization
216191
MATCH (n0:Organization)
217192
UNWIND n0.`Consortium Affiliation` as i
218193
MATCH (n1:Organization)
219-
WHERE n1.OrganizationID = i
194+
WHERE n1.Name = i
220195
MERGE (n0)-[:AFFILIATED_WITH_CONSORTIUM]->(n1);
221196

222197

@@ -225,51 +200,69 @@ MERGE (n0)-[:AFFILIATED_WITH_CONSORTIUM]->(n1);
225200
MATCH (n0:Initiative)
226201
UNWIND n0.`Affiliated Org` as i
227202
MATCH (n1:Organization)
228-
WHERE n1.OrganizationID = i
203+
WHERE n1.Name = i
229204
MERGE (n0)-[:AFFILIATED_WITH_ORGANIZATION]->(n1);
230205

231206
/////////////////////////////////////////// Person ->
232207
// Person -> ASSOCIATED_TO_PROJECT -> Project
233208
MATCH (n0:Person)
234209
UNWIND n0.`Person's Associated Projects` as i
235210
MATCH (n1:Project)
236-
WHERE n1.ProjectID = i
211+
WHERE n1.Name = i
237212
MERGE (n0)-[:ASSOCIATED_TO_PROJECT]->(n1);
238213

239214
// Person -> ASSOCIATED_TO_PACKAGE -> Package
240215
MATCH (n0:Person)
241216
UNWIND n0.`Person's Associated Packages` as i
242217
MATCH (n1:Package)
243-
WHERE n1.PackageID = i
218+
WHERE n1.Name = i
244219
MERGE (n0)-[:ASSOCIATED_TO_PACKAGE]->(n1);
245220

246221

222+
// Person -> ASSOCIATED_TO_ORGANIZATION -> Package
223+
MATCH (n0:Person)
224+
UNWIND n0.`Person's Associated Organizations` as i
225+
MATCH (n1:Institution)
226+
WHERE n1.Name = i
227+
MERGE (n0)-[:ASSOCIATED_TO_ORGANIZATION]->(n1);
228+
229+
247230
/////////////////////////////////////////// Paper ->
248231
// Paper -> WRITTEN_BY -> Person
249232
MATCH (n0:Paper)
250233
UNWIND n0.Authors as i
251234
MATCH (n1:Person)
252-
WHERE n1.PersonName = i
235+
WHERE n1.Name = i
253236
MERGE (n0)-[:WRITTEN_BY]->(n1);
254237

255238

256239
// Paper -> CITES -> Project
257240
MATCH (n0:Paper)
258241
UNWIND n0.`Projects/Packages Cited` as i
259242
MATCH (n1:Project)
260-
WHERE n1.ProjectID = i
243+
WHERE n1.Name = i
261244
MERGE (n0)-[:CITES]->(n1);
262245

263246
// Paper -> CITES -> Package
264247
MATCH (n0:Paper)
265248
UNWIND n0.`Projects/Packages Cited` as i
266249
MATCH (n1:Package)
267-
WHERE n1.PackageID = i
250+
WHERE n1.Name = i
268251
MERGE (n0)-[:CITES]->(n1);
269252

253+
// Paper -> ADDRESSES -> SDG
254+
MATCH (n0:Paper)
255+
UNWIND n0.`Sustainable Development Goals` as i
256+
MATCH (n1:SDG)
257+
WHERE n1.Name = i
258+
MERGE (n0)-[:ADDRESSES]->(n1);
259+
260+
261+
270262
// Paper -> GRANTING_ORGANIZATION -> Organization
271263
MATCH (n0:Paper)
272264
UNWIND n0.`Granting Organization A | B | C` as i
273265
MATCH (n1:Organization)
274-
WHERE n1.OrganizationID = i
266+
WHERE n1.Name = i
275267
MERGE (n0)-[:GRANTING_ORGANIZATION]->(n1);
268+

0 commit comments

Comments
 (0)