Skip to content

Commit 76c3111

Browse files
committed
Add wordcloud to git history
1 parent 2a4afd3 commit 76c3111

File tree

1 file changed

+57
-88
lines changed

1 file changed

+57
-88
lines changed

jupyter/GitHistoryGeneral.ipynb

Lines changed: 57 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,9 @@
9191
"source": [
9292
"def get_cypher_query_from_file(cypher_file_name : str):\n",
9393
" with open(cypher_file_name) as file:\n",
94-
" return ' '.join(file.readlines())"
95-
]
96-
},
97-
{
98-
"cell_type": "code",
99-
"execution_count": null,
100-
"id": "59310f6f",
101-
"metadata": {},
102-
"outputs": [],
103-
"source": [
94+
" return ' '.join(file.readlines())\n",
95+
"\n",
96+
"\n",
10497
"def query_cypher_to_data_frame(filename : str, limit: int = -1):\n",
10598
" \"\"\"\n",
10699
" Execute the Cypher query of the given file and returns the result.\n",
@@ -111,16 +104,9 @@
111104
" if limit > 0:\n",
112105
" cypher_query = \"{query}\\nLIMIT {row_limit}\".format(query = cypher_query, row_limit = limit)\n",
113106
" records, summary, keys = driver.execute_query(cypher_query)\n",
114-
" return pd.DataFrame([r.values() for r in records], columns=keys)"
115-
]
116-
},
117-
{
118-
"cell_type": "code",
119-
"execution_count": null,
120-
"id": "c09da482",
121-
"metadata": {},
122-
"outputs": [],
123-
"source": [
107+
" return pd.DataFrame([r.values() for r in records], columns=keys)\n",
108+
"\n",
109+
"\n",
124110
"def query_first_non_empty_cypher_to_data_frame(*filenames : str, limit: int = -1):\n",
125111
" \"\"\"\n",
126112
" Executes the Cypher queries of the given files and returns the first result that is not empty.\n",
@@ -632,74 +618,6 @@
632618
"### Main author per directory"
633619
]
634620
},
635-
{
636-
"cell_type": "code",
637-
"execution_count": null,
638-
"id": "29069753",
639-
"metadata": {},
640-
"outputs": [],
641-
"source": [
642-
"# TODO delete unused code"
643-
]
644-
},
645-
{
646-
"cell_type": "raw",
647-
"id": "7ccca44e",
648-
"metadata": {},
649-
"source": [
650-
"# TODO experiment again with plotly express\n",
651-
"\n",
652-
"import plotly.express as plotly_express\n",
653-
"\n",
654-
"plotly_treemap_color_settings = dict(\n",
655-
" color_continuous_scale='Hot_r', # Hot_r, amp, Reds, Blackbody_r, RdGy_r, RdBu_r\n",
656-
" color_discrete_sequence=plotly_express.colors.qualitative.Vivid,\n",
657-
")\n",
658-
"plotly_treemap_commit_statistics_custom_data= dict(\n",
659-
" custom_data=['fileCount', 'commitCount', 'authorCount', 'mainAuthor', 'lastCommitDate', 'daysSinceLastCommit', 'lastCreationDate', 'daysSinceLastCreation', 'lastModificationDate', 'daysSinceLastModification', 'directoryPath'],\n",
660-
")\n",
661-
"plotly_treemap_traces_base_settings = dict(\n",
662-
" root_color=\"lightgrey\",\n",
663-
" textinfo=\"label+value\",\n",
664-
" marker=dict(cornerradius=5),\n",
665-
")\n",
666-
"plotly_treemap_traces_commit_statistics_settings = dict(\n",
667-
" **plotly_treemap_traces_base_settings,\n",
668-
" hovertemplate='<b>%{label}</b><br>Files: %{customdata[0]}<br>Commits: %{customdata[1]}<br>Authors: %{customdata[2]}<br>Main Author: %{customdata[3]}<br>Last Commit: %{customdata[4]} (%{customdata[5]} days ago)<br>Last Created: %{customdata[6]} (%{customdata[7]} days ago)<br>Last Modified: %{customdata[8]} (%{customdata[9]} days ago)<br>Path: %{customdata[10]}',\n",
669-
")\n",
670-
"plotly_treemap_layout_base_settings = dict(\n",
671-
" margin=dict(t=50, l=15, r=15, b=15),\n",
672-
")\n",
673-
"\n",
674-
"# Extract unique authors for category orders\n",
675-
"#unique_authors = git_files_with_commit_statistics['mainAuthor'].unique()\n",
676-
"\n",
677-
"figure = plotly_express.treemap(\n",
678-
" git_files_with_commit_statistics,\n",
679-
" **plotly_treemap_color_settings,\n",
680-
" **plotly_treemap_commit_statistics_custom_data,\n",
681-
" ids='directoryPath',\n",
682-
" names='directoryName',\n",
683-
" parents='directoryParentPath',\n",
684-
" # Without values, much more squares are shown which gives a much better overview\n",
685-
" # values='fileCount', \n",
686-
" color='mainAuthor',\n",
687-
" title='Directories and their main author (discrete coloring, no legend?)',\n",
688-
")\n",
689-
"figure.update_traces(\n",
690-
" **plotly_treemap_traces_commit_statistics_settings,\n",
691-
")\n",
692-
"figure.update_layout(\n",
693-
" **plotly_treemap_layout_base_settings,\n",
694-
" # coloraxis_colorbar=dict(title=\"Author\"),\n",
695-
" legend_title_text='Main Author',\n",
696-
" showlegend=True,\n",
697-
" legend_visible=True,\n",
698-
") \n",
699-
"\n",
700-
"figure.show(**plotly_treemap_figure_show_settings)"
701-
]
702-
},
703621
{
704622
"cell_type": "code",
705623
"execution_count": null,
@@ -994,6 +912,57 @@
994912
")\n",
995913
"figure.show(**plotly_treemap_figure_show_settings)"
996914
]
915+
},
916+
{
917+
"cell_type": "markdown",
918+
"id": "14e87aff",
919+
"metadata": {},
920+
"source": [
921+
"## WordCloud of git authors"
922+
]
923+
},
924+
{
925+
"cell_type": "code",
926+
"execution_count": null,
927+
"id": "e2f68f02",
928+
"metadata": {},
929+
"outputs": [],
930+
"source": [
931+
"# Query data from graph database\n",
932+
"git_author_words_with_frequency = query_cypher_to_data_frame(\"../cypher/Overview/Words_for_git_author_Wordcloud_with_frequency.cypher\")\n",
933+
"# Debug \n",
934+
"# display(git_author_words_with_frequency.head(10))"
935+
]
936+
},
937+
{
938+
"cell_type": "code",
939+
"execution_count": null,
940+
"id": "d83ce5f4",
941+
"metadata": {},
942+
"outputs": [],
943+
"source": [
944+
"from wordcloud import WordCloud\n",
945+
"import matplotlib.pyplot as plot\n",
946+
"\n",
947+
"if not git_author_words_with_frequency.empty:\n",
948+
" # Expects the first column of the DataFrame to contain the words/text and the second column to contain the count/frequency.\n",
949+
" words_with_frequency_dict=git_author_words_with_frequency.set_index(git_author_words_with_frequency.columns[0]).to_dict()[git_author_words_with_frequency.columns[1]]\n",
950+
" wordcloud = WordCloud(\n",
951+
" width=800, \n",
952+
" height=800,\n",
953+
" max_words=600, \n",
954+
" collocations=False,\n",
955+
" background_color='white', \n",
956+
" colormap='viridis'\n",
957+
" ).generate_from_frequencies(words_with_frequency_dict)\n",
958+
"\n",
959+
" # Plot the word cloud\n",
960+
" plot.figure(figsize=(15,15))\n",
961+
" plot.imshow(wordcloud, interpolation='bilinear')\n",
962+
" plot.axis(\"off\")\n",
963+
" plot.title('Wordcloud of git authors')\n",
964+
" plot.show()"
965+
]
997966
}
998967
],
999968
"metadata": {

0 commit comments

Comments
 (0)