Skip to content

Commit 7a3e131

Browse files
committed
Add main and second git author treemap plots
1 parent bd3159d commit 7a3e131

File tree

1 file changed

+206
-26
lines changed

1 file changed

+206
-26
lines changed

jupyter/GitHistoryGeneral.ipynb

Lines changed: 206 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@
3636
"outputs": [],
3737
"source": [
3838
"from neo4j import GraphDatabase\n",
39-
"from plotly import graph_objects as plotly_graph_objects"
39+
"from plotly import graph_objects as plotly_graph_objects\n",
40+
"from plotly.express import colors as plotly_colors"
4041
]
4142
},
4243
{
@@ -246,8 +247,8 @@
246247
" labels=data_frame['directoryName'],\n",
247248
" parents=data_frame['directoryParentPath'],\n",
248249
" ids=data_frame['directoryPath'],\n",
249-
" customdata=data_frame[['fileCount', 'commitCount', 'authorCount', 'lastCommitDate', 'daysSinceLastCommit', 'lastCreationDate', 'daysSinceLastCreation', 'lastModificationDate', 'daysSinceLastModification', 'directoryPath']],\n",
250-
" hovertemplate='<b>%{label}</b><br>Files: %{customdata[0]}<br>Commits: %{customdata[1]}<br>Authors: %{customdata[2]}<br>Last Commit: %{customdata[3]} (%{customdata[4]} days ago)<br>Last Created: %{customdata[5]} (%{customdata[6]} days ago)<br>Last Modified: %{customdata[7]} (%{customdata[8]} days ago)<br>Path: %{customdata[9]}',\n",
250+
" customdata=data_frame[['fileCount', 'commitCount', 'authorCount', 'mainAuthor', 'secondAuthor','lastCommitDate', 'daysSinceLastCommit', 'lastCreationDate', 'daysSinceLastCreation', 'lastModificationDate', 'daysSinceLastModification', 'directoryPath']],\n",
251+
" hovertemplate='<b>%{label}</b><br>Files: %{customdata[0]}<br>Commits: %{customdata[1]}<br>Authors: %{customdata[3]}, %{customdata[4]},.. (%{customdata[2]})<br>Last Commit: %{customdata[5]} (%{customdata[6]} days ago)<br>Last Created: %{customdata[7]} (%{customdata[8]} days ago)<br>Last Modified: %{customdata[9]} (%{customdata[10]} days ago)<br>Path: %{customdata[11]}',\n",
251252
" maxdepth=-1,\n",
252253
" root_color=\"lightgrey\",\n",
253254
" marker=dict(**plotly_treemap_marker_base_style),\n",
@@ -421,6 +422,16 @@
421422
"source": [
422423
"git_files_with_commit_statistics = query_cypher_to_data_frame(\"../cypher/GitLog/List_git_files_with_commit_statistics_by_author.cypher\")\n",
423424
"\n",
425+
"# Get all authors, their commit count and based on it their rank in a separate dataframe.\n",
426+
"# This will then be needed to visualize the (main) author for each directory.\n",
427+
"git_file_authors=git_files_with_commit_statistics[['author', 'commitCount']].groupby('author').aggregate(\n",
428+
" authorCommitCount=pd.NamedAgg(column=\"commitCount\", aggfunc=\"sum\"),\n",
429+
" ).sort_values(by='authorCommitCount', ascending=False).reset_index()\n",
430+
"git_file_authors['authorCommitCountRank'] = git_file_authors['authorCommitCount'].rank(ascending=True, method='dense').astype(int)\n",
431+
"\n",
432+
"# Debug\n",
433+
"# display(git_file_authors)\n",
434+
"\n",
424435
"# Debug\n",
425436
"# display(\"1. query result ---------------------\")\n",
426437
"# display(git_files_with_commit_statistics)\n",
@@ -524,7 +535,7 @@
524535
"id": "ccc11f52",
525536
"metadata": {},
526537
"source": [
527-
"### Directories by file count"
538+
"### Number of files per directory"
528539
]
529540
},
530541
{
@@ -545,16 +556,6 @@
545556
"figure.show(**plotly_treemap_figure_show_settings)"
546557
]
547558
},
548-
{
549-
"cell_type": "code",
550-
"execution_count": null,
551-
"id": "fb399f44",
552-
"metadata": {},
553-
"outputs": [],
554-
"source": [
555-
"# TODO Directories by main author"
556-
]
557-
},
558559
{
559560
"cell_type": "markdown",
560561
"id": "e98ca7b1",
@@ -574,7 +575,8 @@
574575
"\n",
575576
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
576577
" create_treemap_commit_statistics_settings(git_commit_count_per_directory),\n",
577-
" values = git_commit_count_per_directory['fileCount'],\n",
578+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
579+
" # values = git_commit_count_per_directory['fileCount'],\n",
578580
" marker=dict(\n",
579581
" **plotly_treemap_marker_base_colorscale,\n",
580582
" colors=git_commit_count_per_directory['commitCount_limited'], \n",
@@ -603,11 +605,12 @@
603605
"metadata": {},
604606
"outputs": [],
605607
"source": [
606-
"git_commit_authors_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"authorCount\", 0.96)\n",
608+
"git_commit_authors_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"authorCount\", 0.98)\n",
607609
"\n",
608610
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
609611
" create_treemap_commit_statistics_settings(git_commit_authors_per_directory),\n",
610-
" values = git_commit_authors_per_directory['fileCount'],\n",
612+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
613+
" # values = git_commit_authors_per_directory['fileCount'],\n",
611614
" marker=dict(\n",
612615
" **plotly_treemap_marker_base_colorscale,\n",
613616
" colors=git_commit_authors_per_directory['authorCount_limited'], \n",
@@ -621,6 +624,171 @@
621624
"figure.show(**plotly_treemap_figure_show_settings)"
622625
]
623626
},
627+
{
628+
"cell_type": "markdown",
629+
"id": "5dbceaef",
630+
"metadata": {},
631+
"source": [
632+
"### Main author per directory"
633+
]
634+
},
635+
{
636+
"cell_type": "code",
637+
"execution_count": null,
638+
"id": "29069753",
639+
"metadata": {},
640+
"outputs": [],
641+
"source": [
642+
"# TODO delete unused code"
643+
]
644+
},
645+
{
646+
"cell_type": "raw",
647+
"id": "7ccca44e",
648+
"metadata": {},
649+
"source": [
650+
"# TODO experiment again with plotly express\n",
651+
"\n",
652+
"import plotly.express as plotly_express\n",
653+
"\n",
654+
"plotly_treemap_color_settings = dict(\n",
655+
" color_continuous_scale='Hot_r', # Hot_r, amp, Reds, Blackbody_r, RdGy_r, RdBu_r\n",
656+
" color_discrete_sequence=plotly_express.colors.qualitative.Vivid,\n",
657+
")\n",
658+
"plotly_treemap_commit_statistics_custom_data= dict(\n",
659+
" custom_data=['fileCount', 'commitCount', 'authorCount', 'mainAuthor', 'lastCommitDate', 'daysSinceLastCommit', 'lastCreationDate', 'daysSinceLastCreation', 'lastModificationDate', 'daysSinceLastModification', 'directoryPath'],\n",
660+
")\n",
661+
"plotly_treemap_traces_base_settings = dict(\n",
662+
" root_color=\"lightgrey\",\n",
663+
" textinfo=\"label+value\",\n",
664+
" marker=dict(cornerradius=5),\n",
665+
")\n",
666+
"plotly_treemap_traces_commit_statistics_settings = dict(\n",
667+
" **plotly_treemap_traces_base_settings,\n",
668+
" hovertemplate='<b>%{label}</b><br>Files: %{customdata[0]}<br>Commits: %{customdata[1]}<br>Authors: %{customdata[2]}<br>Main Author: %{customdata[3]}<br>Last Commit: %{customdata[4]} (%{customdata[5]} days ago)<br>Last Created: %{customdata[6]} (%{customdata[7]} days ago)<br>Last Modified: %{customdata[8]} (%{customdata[9]} days ago)<br>Path: %{customdata[10]}',\n",
669+
")\n",
670+
"plotly_treemap_layout_base_settings = dict(\n",
671+
" margin=dict(t=50, l=15, r=15, b=15),\n",
672+
")\n",
673+
"\n",
674+
"# Extract unique authors for category orders\n",
675+
"#unique_authors = git_files_with_commit_statistics['mainAuthor'].unique()\n",
676+
"\n",
677+
"figure = plotly_express.treemap(\n",
678+
" git_files_with_commit_statistics,\n",
679+
" **plotly_treemap_color_settings,\n",
680+
" **plotly_treemap_commit_statistics_custom_data,\n",
681+
" ids='directoryPath',\n",
682+
" names='directoryName',\n",
683+
" parents='directoryParentPath',\n",
684+
" # Without values, much more squares are shown which gives a much better overview\n",
685+
" # values='fileCount', \n",
686+
" color='mainAuthor',\n",
687+
" title='Directories and their main author (discrete coloring, no legend?)',\n",
688+
")\n",
689+
"figure.update_traces(\n",
690+
" **plotly_treemap_traces_commit_statistics_settings,\n",
691+
")\n",
692+
"figure.update_layout(\n",
693+
" **plotly_treemap_layout_base_settings,\n",
694+
" # coloraxis_colorbar=dict(title=\"Author\"),\n",
695+
" legend_title_text='Main Author',\n",
696+
" showlegend=True,\n",
697+
" legend_visible=True,\n",
698+
") \n",
699+
"\n",
700+
"figure.show(**plotly_treemap_figure_show_settings)"
701+
]
702+
},
703+
{
704+
"cell_type": "code",
705+
"execution_count": null,
706+
"id": "259f7278",
707+
"metadata": {},
708+
"outputs": [],
709+
"source": [
710+
"def create_git_authors_graph_objects_treemap_marker(main_data_frame: pd.DataFrame, author_rank_data_frame: pd.DataFrame, author_column_name: str):\n",
711+
" \"\"\"\n",
712+
" Creates a plotly graph_objects.Treemap marker object for git author plots.\n",
713+
" main_data_frame : pd.DataFrame : The DataFrame that contains the git directories and their commit statistics\n",
714+
" author_rank_data_frame : pd.DataFrame : The DataFrame that contains the git authors, their commit count and based on that their rank.\n",
715+
" author_column_name : str : The name of the (aggregated) author column for coloring the plot\n",
716+
" return : plotly_graph_objects.treemap.Marker : The created Marker object\n",
717+
" \"\"\"\n",
718+
" data_frame_with_authors=pd.merge(\n",
719+
" main_data_frame, \n",
720+
" author_rank_data_frame, \n",
721+
" left_on=author_column_name, \n",
722+
" right_on=\"author\",\n",
723+
" how=\"left\",\n",
724+
" validate=\"m:1\"\n",
725+
" )\n",
726+
" #display(data_frame_with_author_ranks)\n",
727+
"\n",
728+
" data_frame_with_author_ranks=data_frame_with_authors['authorCommitCountRank']\n",
729+
"\n",
730+
" return dict(\n",
731+
" cornerradius=5, \n",
732+
" colors=data_frame_with_author_ranks,\n",
733+
" colorscale=plotly_colors.qualitative.G10, #favorites: plotly_colors.qualitative.G10, Blackbody, ice, haline, hot\n",
734+
" colorbar=dict(\n",
735+
" title=\"Rank\",\n",
736+
" tickmode=\"array\",\n",
737+
" ticktext=data_frame_with_authors[author_column_name],\n",
738+
" tickvals=data_frame_with_author_ranks,\n",
739+
" tickfont_size=8\n",
740+
" ),\n",
741+
" )\n"
742+
]
743+
},
744+
{
745+
"cell_type": "code",
746+
"execution_count": null,
747+
"id": "e97c0d87",
748+
"metadata": {},
749+
"outputs": [],
750+
"source": [
751+
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
752+
" create_treemap_commit_statistics_settings(git_files_with_commit_statistics),\n",
753+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
754+
" # values = git_files_with_commit_statistics['fileCount'],\n",
755+
" marker=create_git_authors_graph_objects_treemap_marker(git_files_with_commit_statistics, git_file_authors, \"mainAuthor\")\n",
756+
"))\n",
757+
"figure.update_layout(\n",
758+
" **plotly_treemap_layout_base_settings,\n",
759+
" title='Main author (highest number of commits)'\n",
760+
")\n",
761+
"figure.show(**plotly_treemap_figure_show_settings)"
762+
]
763+
},
764+
{
765+
"cell_type": "markdown",
766+
"id": "349a1d03",
767+
"metadata": {},
768+
"source": [
769+
"### Second author per directory"
770+
]
771+
},
772+
{
773+
"cell_type": "code",
774+
"execution_count": null,
775+
"id": "29484f84",
776+
"metadata": {},
777+
"outputs": [],
778+
"source": [
779+
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
780+
" create_treemap_commit_statistics_settings(git_files_with_commit_statistics),\n",
781+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
782+
" # values = git_files_with_commit_statistics['fileCount'],\n",
783+
" marker=create_git_authors_graph_objects_treemap_marker(git_files_with_commit_statistics, git_file_authors, \"secondAuthor\")\n",
784+
"))\n",
785+
"figure.update_layout(\n",
786+
" **plotly_treemap_layout_base_settings,\n",
787+
" title='Second author (second highest number of commits)'\n",
788+
")\n",
789+
"figure.show(**plotly_treemap_figure_show_settings)"
790+
]
791+
},
624792
{
625793
"cell_type": "markdown",
626794
"id": "0ed919b0",
@@ -636,11 +804,12 @@
636804
"metadata": {},
637805
"outputs": [],
638806
"source": [
639-
"git_commit_days_since_last_commit_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastCommit\")\n",
807+
"git_commit_days_since_last_commit_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"daysSinceLastCommit\", 0.98)\n",
640808
"\n",
641809
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
642810
" create_treemap_commit_statistics_settings(git_commit_days_since_last_commit_per_directory),\n",
643-
" values = git_commit_days_since_last_commit_per_directory['fileCount'],\n",
811+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
812+
" #values = git_commit_days_since_last_commit_per_directory['fileCount'],\n",
644813
" marker=dict(\n",
645814
" **plotly_treemap_marker_base_colorscale,\n",
646815
" colors=git_commit_days_since_last_commit_per_directory['daysSinceLastCommit_limited'], \n",
@@ -670,9 +839,12 @@
670839
"metadata": {},
671840
"outputs": [],
672841
"source": [
842+
"git_commit_days_since_last_commit_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastCommit\")\n",
843+
"\n",
673844
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
674845
" create_treemap_commit_statistics_settings(git_commit_days_since_last_commit_per_directory),\n",
675-
" values = git_commit_days_since_last_commit_per_directory['fileCount'],\n",
846+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
847+
" # values = git_commit_days_since_last_commit_per_directory['fileCount'],\n",
676848
" marker=dict(\n",
677849
" **plotly_treemap_marker_base_colorscale,\n",
678850
" colors=git_commit_days_since_last_commit_per_directory['daysSinceLastCommit_rank'], \n",
@@ -702,11 +874,12 @@
702874
"metadata": {},
703875
"outputs": [],
704876
"source": [
705-
"git_commit_days_since_last_file_creation_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastCreation\")\n",
877+
"git_commit_days_since_last_file_creation_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"daysSinceLastCreation\", 0.98)\n",
706878
"\n",
707879
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
708880
" create_treemap_commit_statistics_settings(git_commit_days_since_last_file_creation_per_directory),\n",
709-
" values = git_commit_days_since_last_file_creation_per_directory['fileCount'],\n",
881+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
882+
" # values = git_commit_days_since_last_file_creation_per_directory['fileCount'],\n",
710883
" marker=dict(\n",
711884
" **plotly_treemap_marker_base_colorscale,\n",
712885
" colors=git_commit_days_since_last_file_creation_per_directory['daysSinceLastCreation_limited'], \n",
@@ -735,9 +908,12 @@
735908
"metadata": {},
736909
"outputs": [],
737910
"source": [
911+
"git_commit_days_since_last_file_creation_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastCreation\")\n",
912+
"\n",
738913
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
739914
" create_treemap_commit_statistics_settings(git_commit_days_since_last_file_creation_per_directory),\n",
740-
" values = git_commit_days_since_last_file_creation_per_directory['fileCount'],\n",
915+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
916+
" # values = git_commit_days_since_last_file_creation_per_directory['fileCount'],\n",
741917
" marker=dict(\n",
742918
" **plotly_treemap_marker_base_colorscale,\n",
743919
" colors=git_commit_days_since_last_file_creation_per_directory['daysSinceLastCreation_rank'], \n",
@@ -766,11 +942,12 @@
766942
"metadata": {},
767943
"outputs": [],
768944
"source": [
769-
"git_commit_days_since_last_file_modification_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastModification\")\n",
945+
"git_commit_days_since_last_file_modification_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"daysSinceLastModification\", 0.98)\n",
770946
"\n",
771947
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
772948
" create_treemap_commit_statistics_settings(git_commit_days_since_last_file_modification_per_directory),\n",
773-
" values = git_commit_days_since_last_file_modification_per_directory['fileCount'],\n",
949+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
950+
" # values = git_commit_days_since_last_file_modification_per_directory['fileCount'],\n",
774951
" marker=dict(\n",
775952
" **plotly_treemap_marker_base_colorscale,\n",
776953
" colors=git_commit_days_since_last_file_modification_per_directory['daysSinceLastModification_limited'], \n",
@@ -799,9 +976,12 @@
799976
"metadata": {},
800977
"outputs": [],
801978
"source": [
979+
"git_commit_days_since_last_file_modification_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastModification\")\n",
980+
"\n",
802981
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
803982
" create_treemap_commit_statistics_settings(git_commit_days_since_last_file_modification_per_directory),\n",
804-
" values = git_commit_days_since_last_file_modification_per_directory['fileCount'],\n",
983+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
984+
" # values = git_commit_days_since_last_file_modification_per_directory['fileCount'],\n",
805985
" marker=dict(\n",
806986
" **plotly_treemap_marker_base_colorscale,\n",
807987
" colors=git_commit_days_since_last_file_modification_per_directory['daysSinceLastModification_rank'], \n",

0 commit comments

Comments
 (0)