Skip to content

Commit 0c90eae

Browse files
committed
Use new filenames (with PascalCase node label names)
1 parent bf0bc7f commit 0c90eae

File tree

5 files changed

+72
-71
lines changed

5 files changed

+72
-71
lines changed

.circleci/config.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ jobs:
3838
command: |
3939
mkdir data/
4040
cd data
41-
wget https://ldbc.github.io/ldbc_snb_data_converter/csv-composite-projected-fk-legacy-filenames.zip
42-
unzip csv-composite-projected-fk-legacy-filenames.zip
41+
wget -q https://ldbc.github.io/ldbc_snb_data_converter/csv-composite-projected-fk.zip
42+
unzip csv-composite-projected-fk.zip
4343
cd ..
4444
- run:
4545
name: Load
@@ -49,7 +49,7 @@ jobs:
4949
# Cypher
5050
cd cypher
5151
. scripts/environment-variables-default.sh
52-
export NEO4J_CSV_DIR=`pwd`/../data/csv-composite-projected-fk-legacy-filenames
52+
export NEO4J_CSV_DIR=`pwd`/../data/csv-composite-projected-fk
5353
export NEO4J_CSV_POSTFIX=.csv
5454
scripts/load-in-one-step.sh
5555
cd ..

cypher/README.md

+3-6
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,13 @@ This script replaces the headers in the input CSVs, load them, starts Neo4j, and
2626

2727
## Loading the example data set
2828

29-
Transform the example data set in the [data converter](https://github.com/ldbc/ldbc_snb_data_converter) repository, then rename it:
29+
Transform the example data set in the [data converter](https://github.com/ldbc/ldbc_snb_data_converter) repository:
3030

31-
```bash
32-
./rename.sh
33-
```
31+
Then, in in this repository, run
3432

35-
In this repository, run
3633
```bash
3734
. scripts/environment-variables-default.sh
38-
export NEO4J_CSV_DIR=${DATA_CONVERTER_DIR}/ldbc_snb_data_converter/data/csv-composite-projected-fk-legacy-filenames
35+
export NEO4J_CSV_DIR=${DATA_CONVERTER_DIR}/ldbc_snb_data_converter/data/csv-composite-projected-fk
3936
export NEO4J_CSV_POSTFIX=.csv
4037
scripts/load-in-one-step.sh
4138
```

cypher/scripts/convert-csvs.sh

+4
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ while read line; do
2525

2626
echo ${FILENAME}: ${HEADER}
2727
# replace header (no point using sed to save space as it creates a temporary file as well)
28+
if [ ! -f ${NEO4J_CSV_DIR}/${FILENAME}${NEO4J_CSV_POSTFIX} ]; then
29+
echo "${NEO4J_CSV_DIR}/${FILENAME}${NEO4J_CSV_POSTFIX} does not exist"
30+
exit 1
31+
fi
2832
echo ${HEADER} | ${SNB_CAT} - <(tail -n +2 ${NEO4J_CSV_DIR}/${FILENAME}${NEO4J_CSV_POSTFIX}) > tmpfile.csv && mv tmpfile.csv ${NEO4J_CSV_DIR}/${FILENAME}${NEO4J_CSV_POSTFIX}
2933
done < headers.txt
3034

cypher/scripts/headers.txt

+31-31
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,31 @@
1-
static/organisation id:ID(Organisation)|:LABEL|name:STRING|url:STRING
2-
static/place id:ID(Place)|name:STRING|url:STRING|:LABEL
3-
static/tagclass id:ID(TagClass)|name:STRING|url:STRING
4-
static/tag id:ID(Tag)|name:STRING|url:STRING
5-
static/tagclass_isSubclassOf_tagclass :START_ID(TagClass)|:END_ID(TagClass)
6-
static/tag_hasType_tagclass :START_ID(Tag)|:END_ID(TagClass)
7-
static/organisation_isLocatedIn_place :START_ID(Organisation)|:END_ID(Place)
8-
static/place_isPartOf_place :START_ID(Place)|:END_ID(Place)
9-
dynamic/comment creationDate:DATETIME|id:ID(Comment)|locationIP:STRING|browserUsed:STRING|content:STRING|length:LONG
10-
dynamic/forum creationDate:DATETIME|id:ID(Forum)|title:STRING
11-
dynamic/person creationDate:DATETIME|id:ID(Person)|firstName:STRING|lastName:STRING|gender:STRING|birthday:DATE|locationIP:STRING|browserUsed:STRING|speaks:STRING[]|email:STRING[]
12-
dynamic/post creationDate:DATETIME|id:ID(Post)|imageFile:STRING|locationIP:STRING|browserUsed:STRING|language:STRING|content:STRING|length:LONG
13-
dynamic/comment_hasCreator_person creationDate:DATETIME|:START_ID(Comment)|:END_ID(Person)
14-
dynamic/comment_isLocatedIn_country creationDate:DATETIME|:START_ID(Comment)|:END_ID(Place)
15-
dynamic/comment_replyOf_comment creationDate:DATETIME|:START_ID(Comment)|:END_ID(Comment)
16-
dynamic/comment_replyOf_post creationDate:DATETIME|:START_ID(Comment)|:END_ID(Post)
17-
dynamic/forum_containerOf_post creationDate:DATETIME|:START_ID(Forum)|:END_ID(Post)
18-
dynamic/forum_hasMember_person creationDate:DATETIME|:START_ID(Forum)|:END_ID(Person)
19-
dynamic/forum_hasModerator_person creationDate:DATETIME|:START_ID(Forum)|:END_ID(Person)
20-
dynamic/forum_hasTag_tag creationDate:DATETIME|:START_ID(Forum)|:END_ID(Tag)
21-
dynamic/person_hasInterest_tag creationDate:DATETIME|:START_ID(Person)|:END_ID(Tag)
22-
dynamic/person_isLocatedIn_city creationDate:DATETIME|:START_ID(Person)|:END_ID(Place)
23-
dynamic/person_knows_person creationDate:DATETIME|:START_ID(Person)|:END_ID(Person)
24-
dynamic/person_likes_comment creationDate:DATETIME|:START_ID(Person)|:END_ID(Comment)
25-
dynamic/person_likes_post creationDate:DATETIME|:START_ID(Person)|:END_ID(Post)
26-
dynamic/person_studyAt_organisation creationDate:DATETIME|:START_ID(Person)|:END_ID(Organisation)|classYear:LONG
27-
dynamic/person_workAt_organisation creationDate:DATETIME|:START_ID(Person)|:END_ID(Organisation)|workFrom:LONG
28-
dynamic/post_hasCreator_person creationDate:DATETIME|:START_ID(Post)|:END_ID(Person)
29-
dynamic/comment_hasTag_tag creationDate:DATETIME|:START_ID(Comment)|:END_ID(Tag)
30-
dynamic/post_hasTag_tag creationDate:DATETIME|:START_ID(Post)|:END_ID(Tag)
31-
dynamic/post_isLocatedIn_country creationDate:DATETIME|:START_ID(Post)|:END_ID(Place)
1+
static/Organisation id:ID(Organisation)|:LABEL|name:STRING|url:STRING
2+
static/Place id:ID(Place)|name:STRING|url:STRING|:LABEL
3+
static/TagClass id:ID(TagClass)|name:STRING|url:STRING
4+
static/Tag id:ID(Tag)|name:STRING|url:STRING
5+
static/TagClass_isSubclassOf_TagClass :START_ID(TagClass)|:END_ID(TagClass)
6+
static/Tag_hasType_TagClass :START_ID(Tag)|:END_ID(TagClass)
7+
static/Organisation_isLocatedIn_Place :START_ID(Organisation)|:END_ID(Place)
8+
static/Place_isPartOf_Place :START_ID(Place)|:END_ID(Place)
9+
dynamic/Comment creationDate:DATETIME|id:ID(Comment)|locationIP:STRING|browserUsed:STRING|content:STRING|length:LONG
10+
dynamic/Forum creationDate:DATETIME|id:ID(Forum)|title:STRING
11+
dynamic/Person creationDate:DATETIME|id:ID(Person)|firstName:STRING|lastName:STRING|gender:STRING|birthday:DATE|locationIP:STRING|browserUsed:STRING|speaks:STRING[]|email:STRING[]
12+
dynamic/Post creationDate:DATETIME|id:ID(Post)|imageFile:STRING|locationIP:STRING|browserUsed:STRING|language:STRING|content:STRING|length:LONG
13+
dynamic/Comment_hasCreator_Person creationDate:DATETIME|:START_ID(Comment)|:END_ID(Person)
14+
dynamic/Comment_isLocatedIn_Country creationDate:DATETIME|:START_ID(Comment)|:END_ID(Place)
15+
dynamic/Comment_replyOf_Comment creationDate:DATETIME|:START_ID(Comment)|:END_ID(Comment)
16+
dynamic/Comment_replyOf_Post creationDate:DATETIME|:START_ID(Comment)|:END_ID(Post)
17+
dynamic/Forum_containerOf_Post creationDate:DATETIME|:START_ID(Forum)|:END_ID(Post)
18+
dynamic/Forum_hasMember_Person creationDate:DATETIME|:START_ID(Forum)|:END_ID(Person)
19+
dynamic/Forum_hasModerator_Person creationDate:DATETIME|:START_ID(Forum)|:END_ID(Person)
20+
dynamic/Forum_hasTag_Tag creationDate:DATETIME|:START_ID(Forum)|:END_ID(Tag)
21+
dynamic/Person_hasInterest_Tag creationDate:DATETIME|:START_ID(Person)|:END_ID(Tag)
22+
dynamic/Person_isLocatedIn_City creationDate:DATETIME|:START_ID(Person)|:END_ID(Place)
23+
dynamic/Person_knows_Person creationDate:DATETIME|:START_ID(Person)|:END_ID(Person)
24+
dynamic/Person_likes_Comment creationDate:DATETIME|:START_ID(Person)|:END_ID(Comment)
25+
dynamic/Person_likes_Post creationDate:DATETIME|:START_ID(Person)|:END_ID(Post)
26+
dynamic/Person_studyAt_University creationDate:DATETIME|:START_ID(Person)|:END_ID(Organisation)|classYear:LONG
27+
dynamic/Person_workAt_Company creationDate:DATETIME|:START_ID(Person)|:END_ID(Organisation)|workFrom:LONG
28+
dynamic/Post_hasCreator_Person creationDate:DATETIME|:START_ID(Post)|:END_ID(Person)
29+
dynamic/Comment_hasTag_Tag creationDate:DATETIME|:START_ID(Comment)|:END_ID(Tag)
30+
dynamic/Post_hasTag_Tag creationDate:DATETIME|:START_ID(Post)|:END_ID(Tag)
31+
dynamic/Post_isLocatedIn_Country creationDate:DATETIME|:START_ID(Post)|:END_ID(Place)

cypher/scripts/import-to-neo4j.sh

+31-31
Original file line numberDiff line numberDiff line change
@@ -29,35 +29,35 @@ docker run --rm \
2929
neo4j:${NEO4J_VERSION} \
3030
neo4j-admin import \
3131
--id-type=INTEGER \
32-
--nodes=Place="/import/static/place${NEO4J_CSV_POSTFIX}" \
33-
--nodes=Organisation="/import/static/organisation${NEO4J_CSV_POSTFIX}" \
34-
--nodes=TagClass="/import/static/tagclass${NEO4J_CSV_POSTFIX}" \
35-
--nodes=Tag="/import/static/tag${NEO4J_CSV_POSTFIX}" \
36-
--nodes=Forum="/import/dynamic/forum${NEO4J_CSV_POSTFIX}" \
37-
--nodes=Person="/import/dynamic/person${NEO4J_CSV_POSTFIX}" \
38-
--nodes=Message:Comment="/import/dynamic/comment${NEO4J_CSV_POSTFIX}" \
39-
--nodes=Message:Post="/import/dynamic/post${NEO4J_CSV_POSTFIX}" \
40-
--relationships=IS_PART_OF="/import/static/place_isPartOf_place${NEO4J_CSV_POSTFIX}" \
41-
--relationships=IS_SUBCLASS_OF="/import/static/tagclass_isSubclassOf_tagclass${NEO4J_CSV_POSTFIX}" \
42-
--relationships=IS_LOCATED_IN="/import/static/organisation_isLocatedIn_place${NEO4J_CSV_POSTFIX}" \
43-
--relationships=HAS_TYPE="/import/static/tag_hasType_tagclass${NEO4J_CSV_POSTFIX}" \
44-
--relationships=HAS_CREATOR="/import/dynamic/comment_hasCreator_person${NEO4J_CSV_POSTFIX}" \
45-
--relationships=IS_LOCATED_IN="/import/dynamic/comment_isLocatedIn_country${NEO4J_CSV_POSTFIX}" \
46-
--relationships=REPLY_OF="/import/dynamic/comment_replyOf_comment${NEO4J_CSV_POSTFIX}" \
47-
--relationships=REPLY_OF="/import/dynamic/comment_replyOf_post${NEO4J_CSV_POSTFIX}" \
48-
--relationships=CONTAINER_OF="/import/dynamic/forum_containerOf_post${NEO4J_CSV_POSTFIX}" \
49-
--relationships=HAS_MEMBER="/import/dynamic/forum_hasMember_person${NEO4J_CSV_POSTFIX}" \
50-
--relationships=HAS_MODERATOR="/import/dynamic/forum_hasModerator_person${NEO4J_CSV_POSTFIX}" \
51-
--relationships=HAS_TAG="/import/dynamic/forum_hasTag_tag${NEO4J_CSV_POSTFIX}" \
52-
--relationships=HAS_INTEREST="/import/dynamic/person_hasInterest_tag${NEO4J_CSV_POSTFIX}" \
53-
--relationships=IS_LOCATED_IN="/import/dynamic/person_isLocatedIn_city${NEO4J_CSV_POSTFIX}" \
54-
--relationships=KNOWS="/import/dynamic/person_knows_person${NEO4J_CSV_POSTFIX}" \
55-
--relationships=LIKES="/import/dynamic/person_likes_comment${NEO4J_CSV_POSTFIX}" \
56-
--relationships=LIKES="/import/dynamic/person_likes_post${NEO4J_CSV_POSTFIX}" \
57-
--relationships=HAS_CREATOR="/import/dynamic/post_hasCreator_person${NEO4J_CSV_POSTFIX}" \
58-
--relationships=HAS_TAG="/import/dynamic/comment_hasTag_tag${NEO4J_CSV_POSTFIX}" \
59-
--relationships=HAS_TAG="/import/dynamic/post_hasTag_tag${NEO4J_CSV_POSTFIX}" \
60-
--relationships=IS_LOCATED_IN="/import/dynamic/post_isLocatedIn_country${NEO4J_CSV_POSTFIX}" \
61-
--relationships=STUDY_AT="/import/dynamic/person_studyAt_organisation${NEO4J_CSV_POSTFIX}" \
62-
--relationships=WORK_AT="/import/dynamic/person_workAt_organisation${NEO4J_CSV_POSTFIX}" \
32+
--nodes=Place="/import/static/Place${NEO4J_CSV_POSTFIX}" \
33+
--nodes=Organisation="/import/static/Organisation${NEO4J_CSV_POSTFIX}" \
34+
--nodes=TagClass="/import/static/TagClass${NEO4J_CSV_POSTFIX}" \
35+
--nodes=Tag="/import/static/Tag${NEO4J_CSV_POSTFIX}" \
36+
--nodes=Forum="/import/dynamic/Forum${NEO4J_CSV_POSTFIX}" \
37+
--nodes=Person="/import/dynamic/Person${NEO4J_CSV_POSTFIX}" \
38+
--nodes=Message:Comment="/import/dynamic/Comment${NEO4J_CSV_POSTFIX}" \
39+
--nodes=Message:Post="/import/dynamic/Post${NEO4J_CSV_POSTFIX}" \
40+
--relationships=IS_PART_OF="/import/static/Place_isPartOf_Place${NEO4J_CSV_POSTFIX}" \
41+
--relationships=IS_SUBCLASS_OF="/import/static/TagClass_isSubclassOf_TagClass${NEO4J_CSV_POSTFIX}" \
42+
--relationships=IS_LOCATED_IN="/import/static/Organisation_isLocatedIn_Place${NEO4J_CSV_POSTFIX}" \
43+
--relationships=HAS_TYPE="/import/static/Tag_hasType_TagClass${NEO4J_CSV_POSTFIX}" \
44+
--relationships=HAS_CREATOR="/import/dynamic/Comment_hasCreator_Person${NEO4J_CSV_POSTFIX}" \
45+
--relationships=IS_LOCATED_IN="/import/dynamic/Comment_isLocatedIn_Country${NEO4J_CSV_POSTFIX}" \
46+
--relationships=REPLY_OF="/import/dynamic/Comment_replyOf_Comment${NEO4J_CSV_POSTFIX}" \
47+
--relationships=REPLY_OF="/import/dynamic/Comment_replyOf_Post${NEO4J_CSV_POSTFIX}" \
48+
--relationships=CONTAINER_OF="/import/dynamic/Forum_containerOf_Post${NEO4J_CSV_POSTFIX}" \
49+
--relationships=HAS_MEMBER="/import/dynamic/Forum_hasMember_Person${NEO4J_CSV_POSTFIX}" \
50+
--relationships=HAS_MODERATOR="/import/dynamic/Forum_hasModerator_Person${NEO4J_CSV_POSTFIX}" \
51+
--relationships=HAS_TAG="/import/dynamic/Forum_hasTag_Tag${NEO4J_CSV_POSTFIX}" \
52+
--relationships=HAS_INTEREST="/import/dynamic/Person_hasInterest_Tag${NEO4J_CSV_POSTFIX}" \
53+
--relationships=IS_LOCATED_IN="/import/dynamic/Person_isLocatedIn_City${NEO4J_CSV_POSTFIX}" \
54+
--relationships=KNOWS="/import/dynamic/Person_knows_Person${NEO4J_CSV_POSTFIX}" \
55+
--relationships=LIKES="/import/dynamic/Person_likes_Comment${NEO4J_CSV_POSTFIX}" \
56+
--relationships=LIKES="/import/dynamic/Person_likes_Post${NEO4J_CSV_POSTFIX}" \
57+
--relationships=HAS_CREATOR="/import/dynamic/Post_hasCreator_Person${NEO4J_CSV_POSTFIX}" \
58+
--relationships=HAS_TAG="/import/dynamic/Comment_hasTag_Tag${NEO4J_CSV_POSTFIX}" \
59+
--relationships=HAS_TAG="/import/dynamic/Post_hasTag_Tag${NEO4J_CSV_POSTFIX}" \
60+
--relationships=IS_LOCATED_IN="/import/dynamic/Post_isLocatedIn_Country${NEO4J_CSV_POSTFIX}" \
61+
--relationships=STUDY_AT="/import/dynamic/Person_studyAt_University${NEO4J_CSV_POSTFIX}" \
62+
--relationships=WORK_AT="/import/dynamic/Person_workAt_Company${NEO4J_CSV_POSTFIX}" \
6363
--delimiter '|'

0 commit comments

Comments
 (0)