Skip to content

Commit 99d5d0a

Browse files
committed
Add getting started
1 parent 73903de commit 99d5d0a

24 files changed

+155
-31
lines changed

.~lock.ecosystms_output.csv#

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
,mark,bear-desk,21.07.2024 19:40,file:///home/mark/.config/libreoffice/4;

README.md

+1-6
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,7 @@ MOSS is a project of [OSSci](https://www.opensource.science/), an initiative of
88
> The Map of Open Source Science is a proof of concept and as such, nothing is accurate.
99
1010

11-
## Getting Started
12-
1. Clone this repository
13-
2. Deploy neo4j locally using Docker
14-
- view [neo4j-docker/README.md](./neo4j-docker/README.md)
15-
3. load example-data
16-
- run [import-db-neo4j]() cypher script from neo4j web interface
11+
## [Getting Started](./scripts/README.md)
1712

1813

1914
## Goal

index.html

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
<head>
2+
<style> body { margin: 0; } </style>
3+
4+
<script src="//unpkg.com/3d-force-graph"></script>
5+
<script src="https://unpkg.com/neo4j-driver"></script>
6+
<!--<script src="../../dist/3d-force-graph.js"></script>-->
7+
</head>
8+
9+
<body>
10+
<div id="3d-graph"></div>
11+
12+
<script>
13+
const elem = document.getElementById('3d-graph');
14+
const driver = neo4j.driver("bolt://localhost:7689", neo4j.auth.basic("neo4j", "mossmossmoss"));
15+
const session = driver.session({database:"neo4j"});
16+
const start = new Date()
17+
session
18+
.run('MATCH (n)-[r]->(m) RETURN { id: id(n), label:head(labels(n)), caption:n.Name } as source, { id: id(m), label:head(labels(m)), caption:m.Name } as target, {type:type(r)} as rel LIMIT $limit', {limit: neo4j.int(50000)})
19+
.then(function (result) {
20+
const nodes = {}
21+
const links = result.records.map(r => {
22+
var source = r.get('source');source.id = source.id.toNumber();
23+
nodes[source.id] = source;
24+
var target = r.get('target');target.id = target.id.toNumber();
25+
nodes[target.id] = target;
26+
var rel = r.get('rel');
27+
return Object.assign({source:source.id,target:target.id}, rel);
28+
});
29+
session.close();
30+
console.log(links.length+" links loaded in "+(new Date()-start)+" ms.")
31+
const gData = { nodes: Object.values(nodes), links: links}
32+
const Graph = ForceGraph3D()(elem)
33+
.graphData(gData)
34+
.nodeAutoColorBy('label')
35+
.linkAutoColorBy('type')
36+
.nodeLabel(node => `${node.label}: ${node.caption}`)
37+
.onNodeHover(node => elem.style.cursor = node ? 'pointer' : null);
38+
})
39+
.catch(function (error) {
40+
console.log(error);
41+
});
42+
</script>
43+
</body>

neo4j-docker/README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@
99
```
1010
> Notes:
1111
> - The '-d' argument tells Docker to run the containers detached (as a daemon) and is optional.
12-
> - You may need to use sudo if you have not configured your user to be in the docker group.
12+
> - You may need to use sudo if you have not configured your user to be in the docker group.
13+
> - Also, if you find yourself doing this over and over trying to get the password right, make sure to delete the folder created by docker between runs ;)

readme-assets/bloom-setup0.png

68.5 KB
Loading

readme-assets/bloom-setup1.png

1.91 MB
Loading

readme-assets/bloom-setup2.png

54.7 KB
Loading

readme-assets/ecosystms-setup0.png

91.7 KB
Loading

readme-assets/ecosystms-setup1.png

34.7 KB
Loading

readme-assets/ecosystms-setup2.png

106 KB
Loading

readme-assets/moss-import0.png

53 KB
Loading
128 KB
Loading

readme-assets/neo4j-setup0.png

49.2 KB
Loading

readme-assets/neo4j-setup1.png

8.55 KB
Loading

readme-assets/neo4j-setup2.png

63.2 KB
Loading

readme-assets/neo4j-setup3.png

76.9 KB
Loading

readme-assets/neo4j-setup4.png

57.8 KB
Loading

readme-assets/neo4j-setup5.png

62 KB
Loading

readme-assets/neo4j-setup6.png

50.3 KB
Loading

readme-assets/neo4j-setup7.png

172 KB
Loading

scripts/README.md

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
## Getting Started
2+
### Overview
3+
1. [Install neo4j desktop](https://neo4j.com/docs/desktop-manual/current/installation/download-installation/)
4+
2. Clone this repository
5+
3. Collect data to add to your graph using 'ecosyst.ms-api.py'
6+
4. Import data into neo4j using 'import-db-neo4j'
7+
5. Query and visualize with neo4j Bloom
8+
9+
### Start data collection
10+
1. Go to [ecosyst.ms](https://papers.ecosyste.ms/) and choose a project to analyze
11+
> Try to find one under 200 mentions for the first try, then go larger
12+
2. Find the project's api url by trying out the ['/projects/{ecosystem}/{name}' query](https://papers.ecosyste.ms/docs/index.html)
13+
14+
![](../readme-assets/ecosystms-setup0.png)
15+
16+
3. Open your terminal, navigate to the repository directory (or scripts) and run 'ecosyst.ms-api.py' with 'python3'
17+
18+
![](../readme-assets/ecosystms-setup1.png)
19+
20+
4. Paste the 'Request URL' as the URL of interest; choose y, its more interesting but takes a little longer
21+
22+
![](../readme-assets/ecosystms-setup2.png)
23+
24+
5. Once this is done you will have a csv file to import into neo4j
25+
26+
### Install & setup neo4j desktop
27+
1. [Install neo4j desktop](https://neo4j.com/docs/desktop-manual/current/installation/download-installation/)
28+
29+
2. Add a local DBMS to your project
30+
31+
![Add local DBMS](../readme-assets/neo4j-setup0.png)
32+
33+
3. Name it anything, remember the password, press 'create'
34+
35+
![](../readme-assets/neo4j-setup1.png)
36+
37+
4. Edit the settings in the 3 dot menu to the right of your DBMS: uncomment 'dbms.security.allow_csv_import_from_file_urls=true'
38+
39+
![](../readme-assets/neo4j-allow-file-imports1.png)
40+
41+
5. Start it and observe the 'Bolt port'
42+
43+
![](../readme-assets/neo4j-setup2.png)
44+
45+
46+
6. Open the Browser app
47+
48+
![](../readme-assets/neo4j-setup3.png)
49+
![](../readme-assets/neo4j-setup4.png)
50+
51+
### Import your data to the graph database
52+
1. Open the import folder in your local filesystem
53+
54+
![](../readme-assets/neo4j-setup5.png)
55+
56+
2. Copy the csv file created by the 'ecosyst.ms-api.py' script into the import directory
57+
58+
![](../readme-assets/moss-import0.png)
59+
60+
3. Copy the contents of 'import-db-neo4j' and paste into the shell in the browser app
61+
62+
![](../readme-assets/neo4j-setup6.png)
63+
64+
4. Press the blue play/run button and this will import the rows of the csv as nodes in the graph
65+
66+
![](../readme-assets/neo4j-setup7.png)
67+
68+
### Explore queries and visualizations
69+
1. Open neo4j Bloom the same way you opened the neo4j browser from neo4j desktop
70+
71+
![](../readme-assets/bloom-setup0.png)
72+
73+
2. Form your first query in the top left
74+
75+
![](../readme-assets/bloom-setup2.png)
76+
77+
3. Run it and see what happens! you can adjust max node count in setting in the bottom left
78+
79+
![](../readme-assets/bloom-setup1.png)
80+
81+
3. If you add the graph data science plugin to your DBMS back in neo4j desktop, you can use those algorithms to change node size and other exciting things

python-scripts/ecosyst.ms-api.py scripts/ecosyst.ms-api.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
projectUrlSet = set()
2222
headers = {'accept': 'application/json'}
23-
myFieldnames = ['id', 'type', 'display_name', 'orcid', 'institutions', 'title', 'doi', 'software_mentions', 'authors', 'homepage', 'repository_url', 'sdgs', 'score']
23+
myFieldnames = ['ID', 'Label', 'Name', 'ORCID', 'Persons Affiliated Institutions', 'DOI', 'Projects/Packages Cited', 'Authors', 'Homepage', 'repository_url', 'Sustainable Development Goals', 'sdg_score']
2424

2525
def processPaper(paperURL):
2626
paperResponse = requests.get(paperURL, headers=headers)
@@ -47,19 +47,19 @@ def processPaper(paperURL):
4747
thisAuthorInstitutions.append(institution['display_name'])
4848
if institution["id"] not in institutionSet:
4949
institutionSet.add(institution["id"])
50-
rowList.append({'id': institution['id'], 'type': "Institution", 'display_name': institution['display_name']})
50+
rowList.append({'ID': institution['id'], 'Label': "Institution", 'Name': institution['display_name']})
5151
thisPaperSDGs = []
5252
for sdg in paperDict['openalex_data']['sustainable_development_goals']:
5353
thisPaperSDGs.append(sdg['display_name'])
5454
if sdg["id"] not in sdgSet:
5555
sdgSet.add(sdg["id"])
56-
rowList.append({'id': sdg['id'], 'type': "SDG", 'display_name': sdg['display_name'], 'score': sdg['score']})
56+
rowList.append({'ID': sdg['id'], 'Label': "SDG", 'Name': sdg['display_name'], 'sdg_score': sdg['score']})
5757

58-
rowList.append({'id': paperDict['openalex_id'], 'type': "Paper", 'title': paperDict['title'], 'doi': paperDict['doi'], 'authors': paperAuthorNames, 'software_mentions': paperMentions, 'sdgs': thisPaperSDGs})
58+
rowList.append({'ID': paperDict['openalex_id'], 'Label': "Paper", 'Name': paperDict['title'], 'DOI': paperDict['doi'], 'Authors': " | ".join(paperAuthorNames), 'Projects/Packages Cited': " | ".join(paperMentions), 'Sustainable Development Goals': " | ".join(thisPaperSDGs)})
5959
authorDict = authorship["author"]
6060
if authorDict['id'] not in peopleSet:
6161
peopleSet.add(authorDict['id'])
62-
rowList.append({'type': "Person"} | authorDict | {'institutions': thisAuthorInstitutions})
62+
rowList.append({'ID': authorDict['id'], 'Label': "Person", 'Name': authorDict['display_name'], 'ORCID': authorDict['orcid'], 'Persons Affiliated Institutions': " | ".join(thisAuthorInstitutions)})
6363
except json.decoder.JSONDecodeError:
6464
paperAuthorNames = [] #meaningless
6565

@@ -90,7 +90,7 @@ def processPaperMentions(paperMentionsURL):
9090

9191
if projDict["czi_id"] not in projectSet:
9292
projectSet.add(projDict["czi_id"])
93-
rowList.append({'id': projDict["czi_id"], 'type': "Project", 'display_name': projDict["ecosystem"] + ":" + projDict["name"], 'homepage': home, 'repository_url': repo})
93+
rowList.append({'ID': projDict["czi_id"], 'Label': "Project", 'Name': projDict["ecosystem"] + ":" + projDict["name"], 'Homepage': home, 'repository_url': repo})
9494
except json.decoder.JSONDecodeError:
9595
thisPapersMentions = []
9696
except json.decoder.JSONDecodeError:
@@ -116,7 +116,7 @@ def processProject(projectU):
116116
repo = ""
117117

118118
projectSet.add(projectDict["czi_id"])
119-
rowList.append({'id': projectDict["czi_id"], 'type': "Project", 'display_name': projectDict["ecosystem"] + ":" + projectDict["name"], 'homepage': home, 'repository_url': repo})
119+
rowList.append({'ID': projectDict["czi_id"], 'Label': "Project", 'Name': projectDict["ecosystem"] + ":" + projectDict["name"], 'Homepage': home, 'repository_url': repo})
120120

121121
projectMentionsURL = projectDict["mentions_url"] + "?page=1&per_page=1000"
122122

@@ -172,7 +172,7 @@ def checkScope():
172172
mentionsAverage = sum(mentionsCounts) / len(mentionsCounts)
173173

174174
print("With an average of: " + str(mentionsAverage) + " mentions per project")
175-
return(mentionsAverage*len(projectUrlSet))
175+
return(sum(mentionsCounts))
176176

177177

178178

@@ -185,7 +185,7 @@ def checkScope():
185185

186186
papersEstimate = checkScope()
187187

188-
continueYN = input("Would you like to continue processing roughly " + str(papersEstimate) + " more papers? y/n: ")
188+
continueYN = input("Would you like to continue processing " + str(papersEstimate) + " more papers? y/n: ")
189189

190190
if continueYN == 'y':
191191
projUrlSetCopy = projectUrlSet.copy()

scripts/ecosystms_output.csv

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ID,Label,Name,ORCID,Persons Affiliated Institutions,DOI,Projects/Packages Cited,Authors,Homepage,repository_url,Sustainable Development Goals,sdg_score

import-db-neo4j scripts/import-db-neo4j

+17-15
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,14 @@ DROP INDEX instByName IF EXISTS;
1515
//import nodes
1616

1717
/////////////////////////////////////////////IMPORT ALL ELEMENTS
18-
LOAD CSV WITH HEADERS FROM "" AS row
18+
LOAD CSV WITH HEADERS FROM "file:///ecosystms_output.csv" AS row
1919

20-
CALL apoc.create.node([row.Type],
21-
CASE row.Type
20+
CALL apoc.create.node([row.Label],
21+
CASE row.Label
2222
WHEN 'Project' THEN {
23-
Name: row.Label,
23+
Name: row.Name,
2424
Description: row.Description,
25+
Homepage: row.Homepage,
2526
//License
2627
Language: split(row.Language, ' | '),
2728
Dependency: split(row.Dependency, ' | '),
@@ -36,15 +37,15 @@ CALL apoc.create.node([row.Type],
3637
Subfield: split(row.Subfield, ' | ')
3738
}
3839
WHEN 'Package' THEN {
39-
Name: row.Label,
40+
Name: row.Name,
4041
`Project/Package's Affiliated Projects`: split(row.`Project/Package's Affiliated Projects`, ' | '),
4142
`Sustaining/Parent`: row.`Sustaining/Parent`,
4243
Subfield: split(row.Subfield, ' | '),
4344
`Project Tags`: split(row.`Project Tags`, ' | '),
4445
Dependency: split(row.Dependency, ' | ')
4546
}
4647
WHEN 'Organization' THEN {
47-
Name: row.Label,
48+
Name: row.Name,
4849
`Consortium Affiliation`: row.`Consortium Affiliation`,
4950
`Org Type`: row.`Org Type`,
5051
`Focus Area`: split(row.`Focus Area`, ' | '),
@@ -56,16 +57,16 @@ CALL apoc.create.node([row.Type],
5657
`Fiscal Sponsor`: row.`Fiscal Sponsor`
5758
}
5859
WHEN 'Initiative' THEN {
59-
Name: row.Label,
60+
Name: row.Name,
6061
`Affiliated Org`: row.`Affiliated Org`,
6162
`Focus Area`: split(row.`Focus Area`, ' | '),
6263
`Initiative Description`: row.`Initiative Description`,
6364
Tag: row.Tag
6465
}
6566
WHEN 'Paper' THEN {
66-
Name: row.Label,
67+
Name: row.Name,
6768
Authors: split(row.Authors, ' | '),
68-
//ORCID
69+
ORCID: row.ORCID,
6970
`Publication Date`: row.`Publication Date`,
7071
Journal: row.Journal,
7172
Abstract: row.Abstract,
@@ -75,14 +76,14 @@ CALL apoc.create.node([row.Type],
7576
`Sustainable Development Goals`: split(row.`Sustainable Development Goals`, ' | ')
7677
}
7778
WHEN 'Person' THEN {
78-
Name: row.Label,
79+
Name: row.Name,
7980
`Person's Associated Projects`: split(row.`Person's Associated Projects`, ' | '),
8081
`Person's Associated Packages`: split(row.`Person's Associated Packages`, ' | '),
81-
`Person's Associated Organizations`: split(row.`Affiliated Org`, ' | '),
82+
`Person's Affiliated Institutions`: split(row.`Persons Affiliated Institutions`, ' | '),
8283
URL: row.URL
8384
}
8485
ELSE {
85-
Name: row.Label
86+
Name: row.Name
8687
}
8788
END
8889
) YIELD node
@@ -219,12 +220,12 @@ WHERE n1.Name = i
219220
MERGE (n0)-[:ASSOCIATED_TO_PACKAGE]->(n1);
220221

221222

222-
// Person -> ASSOCIATED_TO_ORGANIZATION -> Package
223+
// Person -> AFFILIATED_WITH_INSTITUTION -> Institution
223224
MATCH (n0:Person)
224-
UNWIND n0.`Person's Associated Organizations` as i
225+
UNWIND n0.`Person's Affiliated Institutions` as i
225226
MATCH (n1:Institution)
226227
WHERE n1.Name = i
227-
MERGE (n0)-[:ASSOCIATED_TO_ORGANIZATION]->(n1);
228+
MERGE (n0)-[:AFFILIATED_WITH_INSTITUTION]->(n1);
228229

229230

230231
/////////////////////////////////////////// Paper ->
@@ -266,3 +267,4 @@ MATCH (n1:Organization)
266267
WHERE n1.Name = i
267268
MERGE (n0)-[:GRANTING_ORGANIZATION]->(n1);
268269

270+

0 commit comments

Comments
 (0)