Skip to content

Commit 7d165bc

Browse files
committed
Add wordcloud to git history
1 parent 545c39e commit 7d165bc

File tree

1 file changed

+57
-88
lines changed

1 file changed

+57
-88
lines changed

jupyter/GitHistoryGeneral.ipynb

+57-88
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,9 @@
9191
"source": [
9292
"def get_cypher_query_from_file(cypher_file_name : str):\n",
9393
" with open(cypher_file_name) as file:\n",
94-
" return ' '.join(file.readlines())"
95-
]
96-
},
97-
{
98-
"cell_type": "code",
99-
"execution_count": null,
100-
"id": "59310f6f",
101-
"metadata": {},
102-
"outputs": [],
103-
"source": [
94+
" return ' '.join(file.readlines())\n",
95+
"\n",
96+
"\n",
10497
"def query_cypher_to_data_frame(filename : str, limit: int = -1):\n",
10598
" \"\"\"\n",
10699
" Execute the Cypher query of the given file and returns the result.\n",
@@ -111,16 +104,9 @@
111104
" if limit > 0:\n",
112105
" cypher_query = \"{query}\\nLIMIT {row_limit}\".format(query = cypher_query, row_limit = limit)\n",
113106
" records, summary, keys = driver.execute_query(cypher_query)\n",
114-
" return pd.DataFrame([r.values() for r in records], columns=keys)"
115-
]
116-
},
117-
{
118-
"cell_type": "code",
119-
"execution_count": null,
120-
"id": "c09da482",
121-
"metadata": {},
122-
"outputs": [],
123-
"source": [
107+
" return pd.DataFrame([r.values() for r in records], columns=keys)\n",
108+
"\n",
109+
"\n",
124110
"def query_first_non_empty_cypher_to_data_frame(*filenames : str, limit: int = -1):\n",
125111
" \"\"\"\n",
126112
" Executes the Cypher queries of the given files and returns the first result that is not empty.\n",
@@ -625,74 +611,6 @@
625611
"### Main author per directory"
626612
]
627613
},
628-
{
629-
"cell_type": "code",
630-
"execution_count": null,
631-
"id": "29069753",
632-
"metadata": {},
633-
"outputs": [],
634-
"source": [
635-
"# TODO delete unused code"
636-
]
637-
},
638-
{
639-
"cell_type": "raw",
640-
"id": "7ccca44e",
641-
"metadata": {},
642-
"source": [
643-
"# TODO experiment again with plotly express\n",
644-
"\n",
645-
"import plotly.express as plotly_express\n",
646-
"\n",
647-
"plotly_treemap_color_settings = dict(\n",
648-
" color_continuous_scale='Hot_r', # Hot_r, amp, Reds, Blackbody_r, RdGy_r, RdBu_r\n",
649-
" color_discrete_sequence=plotly_express.colors.qualitative.Vivid,\n",
650-
")\n",
651-
"plotly_treemap_commit_statistics_custom_data= dict(\n",
652-
" custom_data=['fileCount', 'commitCount', 'authorCount', 'mainAuthor', 'lastCommitDate', 'daysSinceLastCommit', 'lastCreationDate', 'daysSinceLastCreation', 'lastModificationDate', 'daysSinceLastModification', 'directoryPath'],\n",
653-
")\n",
654-
"plotly_treemap_traces_base_settings = dict(\n",
655-
" root_color=\"lightgrey\",\n",
656-
" textinfo=\"label+value\",\n",
657-
" marker=dict(cornerradius=5),\n",
658-
")\n",
659-
"plotly_treemap_traces_commit_statistics_settings = dict(\n",
660-
" **plotly_treemap_traces_base_settings,\n",
661-
" hovertemplate='<b>%{label}</b><br>Files: %{customdata[0]}<br>Commits: %{customdata[1]}<br>Authors: %{customdata[2]}<br>Main Author: %{customdata[3]}<br>Last Commit: %{customdata[4]} (%{customdata[5]} days ago)<br>Last Created: %{customdata[6]} (%{customdata[7]} days ago)<br>Last Modified: %{customdata[8]} (%{customdata[9]} days ago)<br>Path: %{customdata[10]}',\n",
662-
")\n",
663-
"plotly_treemap_layout_base_settings = dict(\n",
664-
" margin=dict(t=50, l=15, r=15, b=15),\n",
665-
")\n",
666-
"\n",
667-
"# Extract unique authors for category orders\n",
668-
"#unique_authors = git_files_with_commit_statistics['mainAuthor'].unique()\n",
669-
"\n",
670-
"figure = plotly_express.treemap(\n",
671-
" git_files_with_commit_statistics,\n",
672-
" **plotly_treemap_color_settings,\n",
673-
" **plotly_treemap_commit_statistics_custom_data,\n",
674-
" ids='directoryPath',\n",
675-
" names='directoryName',\n",
676-
" parents='directoryParentPath',\n",
677-
" # Without values, much more squares are shown which gives a much better overview\n",
678-
" # values='fileCount', \n",
679-
" color='mainAuthor',\n",
680-
" title='Directories and their main author (discrete coloring, no legend?)',\n",
681-
")\n",
682-
"figure.update_traces(\n",
683-
" **plotly_treemap_traces_commit_statistics_settings,\n",
684-
")\n",
685-
"figure.update_layout(\n",
686-
" **plotly_treemap_layout_base_settings,\n",
687-
" # coloraxis_colorbar=dict(title=\"Author\"),\n",
688-
" legend_title_text='Main Author',\n",
689-
" showlegend=True,\n",
690-
" legend_visible=True,\n",
691-
") \n",
692-
"\n",
693-
"figure.show(**plotly_treemap_figure_show_settings)"
694-
]
695-
},
696614
{
697615
"cell_type": "code",
698616
"execution_count": null,
@@ -986,6 +904,57 @@
986904
")\n",
987905
"figure.show(**plotly_treemap_figure_show_settings)"
988906
]
907+
},
908+
{
909+
"cell_type": "markdown",
910+
"id": "14e87aff",
911+
"metadata": {},
912+
"source": [
913+
"## WordCloud of git authors"
914+
]
915+
},
916+
{
917+
"cell_type": "code",
918+
"execution_count": null,
919+
"id": "e2f68f02",
920+
"metadata": {},
921+
"outputs": [],
922+
"source": [
923+
"# Query data from graph database\n",
924+
"git_author_words_with_frequency = query_cypher_to_data_frame(\"../cypher/Overview/Words_for_git_author_Wordcloud_with_frequency.cypher\")\n",
925+
"# Debug \n",
926+
"# display(git_author_words_with_frequency.head(10))"
927+
]
928+
},
929+
{
930+
"cell_type": "code",
931+
"execution_count": null,
932+
"id": "d83ce5f4",
933+
"metadata": {},
934+
"outputs": [],
935+
"source": [
936+
"from wordcloud import WordCloud\n",
937+
"import matplotlib.pyplot as plot\n",
938+
"\n",
939+
"if not git_author_words_with_frequency.empty:\n",
940+
" # Expects the first column of the DataFrame to contain the words/text and the second column to contain the count/frequency.\n",
941+
" words_with_frequency_dict=git_author_words_with_frequency.set_index(git_author_words_with_frequency.columns[0]).to_dict()[git_author_words_with_frequency.columns[1]]\n",
942+
" wordcloud = WordCloud(\n",
943+
" width=800, \n",
944+
" height=800,\n",
945+
" max_words=600, \n",
946+
" collocations=False,\n",
947+
" background_color='white', \n",
948+
" colormap='viridis'\n",
949+
" ).generate_from_frequencies(words_with_frequency_dict)\n",
950+
"\n",
951+
" # Plot the word cloud\n",
952+
" plot.figure(figsize=(15,15))\n",
953+
" plot.imshow(wordcloud, interpolation='bilinear')\n",
954+
" plot.axis(\"off\")\n",
955+
" plot.title('Wordcloud of git authors')\n",
956+
" plot.show()"
957+
]
989958
}
990959
],
991960
"metadata": {

0 commit comments

Comments
 (0)