Skip to content

Commit ac6a79f

Browse files
committed
Add wordcloud to git history
1 parent 6f67235 commit ac6a79f

File tree

1 file changed

+57
-20
lines changed

1 file changed

+57
-20
lines changed

jupyter/GitHistoryGeneral.ipynb

+57-20
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,9 @@
9191
"source": [
9292
"def get_cypher_query_from_file(cypher_file_name : str):\n",
9393
" with open(cypher_file_name) as file:\n",
94-
" return ' '.join(file.readlines())"
95-
]
96-
},
97-
{
98-
"cell_type": "code",
99-
"execution_count": null,
100-
"id": "59310f6f",
101-
"metadata": {},
102-
"outputs": [],
103-
"source": [
94+
" return ' '.join(file.readlines())\n",
95+
"\n",
96+
"\n",
10497
"def query_cypher_to_data_frame(filename : str, limit: int = -1):\n",
10598
" \"\"\"\n",
10699
" Execute the Cypher query of the given file and returns the result.\n",
@@ -111,16 +104,9 @@
111104
" if limit > 0:\n",
112105
" cypher_query = \"{query}\\nLIMIT {row_limit}\".format(query = cypher_query, row_limit = limit)\n",
113106
" records, summary, keys = driver.execute_query(cypher_query)\n",
114-
" return pd.DataFrame([r.values() for r in records], columns=keys)"
115-
]
116-
},
117-
{
118-
"cell_type": "code",
119-
"execution_count": null,
120-
"id": "c09da482",
121-
"metadata": {},
122-
"outputs": [],
123-
"source": [
107+
" return pd.DataFrame([r.values() for r in records], columns=keys)\n",
108+
"\n",
109+
"\n",
124110
"def query_first_non_empty_cypher_to_data_frame(*filenames : str, limit: int = -1):\n",
125111
" \"\"\"\n",
126112
" Executes the Cypher queries of the given files and returns the first result that is not empty.\n",
@@ -986,6 +972,57 @@
986972
")\n",
987973
"figure.show(**plotly_treemap_figure_show_settings)"
988974
]
975+
},
976+
{
977+
"cell_type": "markdown",
978+
"id": "14e87aff",
979+
"metadata": {},
980+
"source": [
981+
"## WordCloud of git authors"
982+
]
983+
},
984+
{
985+
"cell_type": "code",
986+
"execution_count": null,
987+
"id": "e2f68f02",
988+
"metadata": {},
989+
"outputs": [],
990+
"source": [
991+
"# Query data from graph database\n",
992+
"git_author_words_with_frequency = query_cypher_to_data_frame(\"../cypher/Overview/Words_for_git_author_Wordcloud_with_frequency.cypher\")\n",
993+
"# Debug \n",
994+
"# display(git_author_words_with_frequency.head(10))"
995+
]
996+
},
997+
{
998+
"cell_type": "code",
999+
"execution_count": null,
1000+
"id": "d83ce5f4",
1001+
"metadata": {},
1002+
"outputs": [],
1003+
"source": [
1004+
"from wordcloud import WordCloud\n",
1005+
"import matplotlib.pyplot as plot\n",
1006+
"\n",
1007+
"if not git_author_words_with_frequency.empty:\n",
1008+
" # Expects the first column of the DataFrame to contain the words/text and the second column to contain the count/frequency.\n",
1009+
" words_with_frequency_dict=git_author_words_with_frequency.set_index(git_author_words_with_frequency.columns[0]).to_dict()[git_author_words_with_frequency.columns[1]]\n",
1010+
" wordcloud = WordCloud(\n",
1011+
" width=800, \n",
1012+
" height=800,\n",
1013+
" max_words=600, \n",
1014+
" collocations=False,\n",
1015+
" background_color='white', \n",
1016+
" colormap='viridis'\n",
1017+
" ).generate_from_frequencies(words_with_frequency_dict)\n",
1018+
"\n",
1019+
" # Plot the word cloud\n",
1020+
" plot.figure(figsize=(15,15))\n",
1021+
" plot.imshow(wordcloud, interpolation='bilinear')\n",
1022+
" plot.axis(\"off\")\n",
1023+
" plot.title('Wordcloud of git authors')\n",
1024+
" plot.show()"
1025+
]
9891026
}
9901027
],
9911028
"metadata": {

0 commit comments

Comments
 (0)