Skip to content

Commit 545c39e

Browse files
committed
Add main and second git author treemap plots
1 parent ce14759 commit 545c39e

File tree

1 file changed

+203
-30
lines changed

1 file changed

+203
-30
lines changed

jupyter/GitHistoryGeneral.ipynb

+203-30
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@
3636
"outputs": [],
3737
"source": [
3838
"from neo4j import GraphDatabase\n",
39-
"from plotly import graph_objects as plotly_graph_objects"
39+
"from plotly import graph_objects as plotly_graph_objects\n",
40+
"from plotly.express import colors as plotly_colors"
4041
]
4142
},
4243
{
@@ -216,7 +217,7 @@
216217
"plotly_treemap_figure_show_settings = dict(\n",
217218
" renderer=\"svg\" if is_command_line_execution() else None,\n",
218219
" width=1000,\n",
219-
" height=550\n",
220+
" height=800\n",
220221
")"
221222
]
222223
},
@@ -237,8 +238,8 @@
237238
" labels=data_frame['directoryName'],\n",
238239
" parents=data_frame['directoryParentPath'],\n",
239240
" ids=data_frame['directoryPath'],\n",
240-
" customdata=data_frame[['fileCount', 'commitCount', 'authorCount', 'lastCommitDate', 'daysSinceLastCommit', 'lastCreationDate', 'daysSinceLastCreation', 'lastModificationDate', 'daysSinceLastModification', 'directoryPath']],\n",
241-
" hovertemplate='<b>%{label}</b><br>Files: %{customdata[0]}<br>Commits: %{customdata[1]}<br>Authors: %{customdata[2]}<br>Last Commit: %{customdata[3]} (%{customdata[4]} days ago)<br>Last Created: %{customdata[5]} (%{customdata[6]} days ago)<br>Last Modified: %{customdata[7]} (%{customdata[8]} days ago)<br>Path: %{customdata[9]}',\n",
241+
" customdata=data_frame[['fileCount', 'commitCount', 'authorCount', 'mainAuthor', 'lastCommitDate', 'daysSinceLastCommit', 'lastCreationDate', 'daysSinceLastCreation', 'lastModificationDate', 'daysSinceLastModification', 'directoryPath']],\n",
242+
" hovertemplate='<b>%{label}</b><br>Files: %{customdata[0]}<br>Commits: %{customdata[1]}<br>Authors: %{customdata[2]}<br>Main Author: %{customdata[3]}<br>Last Commit: %{customdata[4]} (%{customdata[5]} days ago)<br>Last Created: %{customdata[6]} (%{customdata[7]} days ago)<br>Last Modified: %{customdata[8]} (%{customdata[9]} days ago)<br>Path: %{customdata[10]}',\n",
242243
" maxdepth=-1,\n",
243244
" root_color=\"lightgrey\",\n",
244245
" marker=dict(cornerradius=5),\n",
@@ -412,6 +413,16 @@
412413
"source": [
413414
"git_files_with_commit_statistics = query_cypher_to_data_frame(\"../cypher/GitLog/List_git_files_with_commit_statistics_by_author.cypher\")\n",
414415
"\n",
416+
"# Get all authors, their commit count and based on it their rank in a separate dataframe.\n",
417+
"# This will then be needed to visualize the (main) author for each directory.\n",
418+
"git_file_authors=git_files_with_commit_statistics[['author', 'commitCount']].groupby('author').aggregate(\n",
419+
" authorCommitCount=pd.NamedAgg(column=\"commitCount\", aggfunc=\"sum\"),\n",
420+
" ).sort_values(by='authorCommitCount', ascending=False).reset_index()\n",
421+
"git_file_authors['authorCommitCountRank'] = git_file_authors['authorCommitCount'].rank(ascending=True, method='dense').astype(int)\n",
422+
"\n",
423+
"# Debug\n",
424+
"# display(git_file_authors)\n",
425+
"\n",
415426
"# Debug\n",
416427
"# display(\"1. query result ---------------------\")\n",
417428
"# display(git_files_with_commit_statistics)\n",
@@ -515,7 +526,7 @@
515526
"id": "ccc11f52",
516527
"metadata": {},
517528
"source": [
518-
"### Directories by file count"
529+
"### Number of files per directory"
519530
]
520531
},
521532
{
@@ -536,16 +547,6 @@
536547
"figure.show(**plotly_treemap_figure_show_settings)"
537548
]
538549
},
539-
{
540-
"cell_type": "code",
541-
"execution_count": null,
542-
"id": "fb399f44",
543-
"metadata": {},
544-
"outputs": [],
545-
"source": [
546-
"# TODO Directories by main author"
547-
]
548-
},
549550
{
550551
"cell_type": "markdown",
551552
"id": "e98ca7b1",
@@ -565,7 +566,8 @@
565566
"\n",
566567
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
567568
" create_treemap_commit_statistics_settings(git_commit_count_per_directory),\n",
568-
" values = git_commit_count_per_directory['fileCount'],\n",
569+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
570+
" # values = git_commit_count_per_directory['fileCount'],\n",
569571
" marker=dict(\n",
570572
" cornerradius=5, \n",
571573
" colors=git_commit_count_per_directory['commitCount_limited'], \n",
@@ -595,11 +597,12 @@
595597
"metadata": {},
596598
"outputs": [],
597599
"source": [
598-
"git_commit_authors_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"authorCount\", 0.96)\n",
600+
"git_commit_authors_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"authorCount\", 0.98)\n",
599601
"\n",
600602
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
601603
" create_treemap_commit_statistics_settings(git_commit_authors_per_directory),\n",
602-
" values = git_commit_authors_per_directory['fileCount'],\n",
604+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
605+
" # values = git_commit_authors_per_directory['fileCount'],\n",
603606
" marker=dict(\n",
604607
" cornerradius=5, \n",
605608
" colors=git_commit_authors_per_directory['authorCount_limited'], \n",
@@ -614,6 +617,164 @@
614617
"figure.show(**plotly_treemap_figure_show_settings)"
615618
]
616619
},
620+
{
621+
"cell_type": "markdown",
622+
"id": "5dbceaef",
623+
"metadata": {},
624+
"source": [
625+
"### Main author per directory"
626+
]
627+
},
628+
{
629+
"cell_type": "code",
630+
"execution_count": null,
631+
"id": "29069753",
632+
"metadata": {},
633+
"outputs": [],
634+
"source": [
635+
"# TODO delete unused code"
636+
]
637+
},
638+
{
639+
"cell_type": "raw",
640+
"id": "7ccca44e",
641+
"metadata": {},
642+
"source": [
643+
"# TODO experiment again with plotly express\n",
644+
"\n",
645+
"import plotly.express as plotly_express\n",
646+
"\n",
647+
"plotly_treemap_color_settings = dict(\n",
648+
" color_continuous_scale='Hot_r', # Hot_r, amp, Reds, Blackbody_r, RdGy_r, RdBu_r\n",
649+
" color_discrete_sequence=plotly_express.colors.qualitative.Vivid,\n",
650+
")\n",
651+
"plotly_treemap_commit_statistics_custom_data= dict(\n",
652+
" custom_data=['fileCount', 'commitCount', 'authorCount', 'mainAuthor', 'lastCommitDate', 'daysSinceLastCommit', 'lastCreationDate', 'daysSinceLastCreation', 'lastModificationDate', 'daysSinceLastModification', 'directoryPath'],\n",
653+
")\n",
654+
"plotly_treemap_traces_base_settings = dict(\n",
655+
" root_color=\"lightgrey\",\n",
656+
" textinfo=\"label+value\",\n",
657+
" marker=dict(cornerradius=5),\n",
658+
")\n",
659+
"plotly_treemap_traces_commit_statistics_settings = dict(\n",
660+
" **plotly_treemap_traces_base_settings,\n",
661+
" hovertemplate='<b>%{label}</b><br>Files: %{customdata[0]}<br>Commits: %{customdata[1]}<br>Authors: %{customdata[2]}<br>Main Author: %{customdata[3]}<br>Last Commit: %{customdata[4]} (%{customdata[5]} days ago)<br>Last Created: %{customdata[6]} (%{customdata[7]} days ago)<br>Last Modified: %{customdata[8]} (%{customdata[9]} days ago)<br>Path: %{customdata[10]}',\n",
662+
")\n",
663+
"plotly_treemap_layout_base_settings = dict(\n",
664+
" margin=dict(t=50, l=15, r=15, b=15),\n",
665+
")\n",
666+
"\n",
667+
"# Extract unique authors for category orders\n",
668+
"#unique_authors = git_files_with_commit_statistics['mainAuthor'].unique()\n",
669+
"\n",
670+
"figure = plotly_express.treemap(\n",
671+
" git_files_with_commit_statistics,\n",
672+
" **plotly_treemap_color_settings,\n",
673+
" **plotly_treemap_commit_statistics_custom_data,\n",
674+
" ids='directoryPath',\n",
675+
" names='directoryName',\n",
676+
" parents='directoryParentPath',\n",
677+
" # Without values, much more squares are shown which gives a much better overview\n",
678+
" # values='fileCount', \n",
679+
" color='mainAuthor',\n",
680+
" title='Directories and their main author (discrete coloring, no legend?)',\n",
681+
")\n",
682+
"figure.update_traces(\n",
683+
" **plotly_treemap_traces_commit_statistics_settings,\n",
684+
")\n",
685+
"figure.update_layout(\n",
686+
" **plotly_treemap_layout_base_settings,\n",
687+
" # coloraxis_colorbar=dict(title=\"Author\"),\n",
688+
" legend_title_text='Main Author',\n",
689+
" showlegend=True,\n",
690+
" legend_visible=True,\n",
691+
") \n",
692+
"\n",
693+
"figure.show(**plotly_treemap_figure_show_settings)"
694+
]
695+
},
696+
{
697+
"cell_type": "code",
698+
"execution_count": null,
699+
"id": "259f7278",
700+
"metadata": {},
701+
"outputs": [],
702+
"source": [
703+
"def create_git_authors_graph_objects_treemap_marker(main_data_frame: pd.DataFrame, author_rank_data_frame: pd.DataFrame, author_column_name: str):\n",
704+
" \"\"\"\n",
705+
" Creates a plotly graph_objects.Treemap marker object for git author plots.\n",
706+
" main_data_frame : pd.DataFrame : The DataFrame that contains the git directories and their commit statistics\n",
707+
" author_rank_data_frame : pd.DataFrame : The DataFrame that contains the git authors, their commit count and based on that their rank.\n",
708+
" author_column_name : str : The name of the (aggregated) author column for coloring the plot\n",
709+
" return : plotly_graph_objects.treemap.Marker : The created Marker object\n",
710+
" \"\"\"\n",
711+
" data_frame_with_authors=pd.merge(git_files_with_commit_statistics, author_rank_data_frame, left_on=author_column_name, right_on=\"author\")\n",
712+
" #display(data_frame_with_author_ranks)\n",
713+
"\n",
714+
" data_frame_with_author_ranks=data_frame_with_authors['authorCommitCountRank']\n",
715+
"\n",
716+
" return dict(\n",
717+
" cornerradius=5, \n",
718+
" colors=data_frame_with_author_ranks,\n",
719+
" colorscale=plotly_colors.qualitative.G10, #favorites: plotly_colors.qualitative.G10, Blackbody, ice, haline, hot\n",
720+
" colorbar=dict(\n",
721+
" title=\"Rank\",\n",
722+
" tickmode=\"array\",\n",
723+
" ticktext=data_frame_with_authors[author_column_name],\n",
724+
" tickvals=data_frame_with_author_ranks,\n",
725+
" tickfont_size=8\n",
726+
" ),\n",
727+
" )\n"
728+
]
729+
},
730+
{
731+
"cell_type": "code",
732+
"execution_count": null,
733+
"id": "e97c0d87",
734+
"metadata": {},
735+
"outputs": [],
736+
"source": [
737+
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
738+
" create_treemap_commit_statistics_settings(git_files_with_commit_statistics),\n",
739+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
740+
" # values = git_files_with_commit_statistics['fileCount'],\n",
741+
" marker=create_git_authors_graph_objects_treemap_marker(git_files_with_commit_statistics, git_file_authors, \"mainAuthor\")\n",
742+
"))\n",
743+
"figure.update_layout(\n",
744+
" **plotly_treemap_layout_base_settings,\n",
745+
" title='Main author (highest number of commits)'\n",
746+
")\n",
747+
"figure.show(**plotly_treemap_figure_show_settings)"
748+
]
749+
},
750+
{
751+
"cell_type": "markdown",
752+
"id": "349a1d03",
753+
"metadata": {},
754+
"source": [
755+
"### Second author per directory"
756+
]
757+
},
758+
{
759+
"cell_type": "code",
760+
"execution_count": null,
761+
"id": "29484f84",
762+
"metadata": {},
763+
"outputs": [],
764+
"source": [
765+
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
766+
" create_treemap_commit_statistics_settings(git_files_with_commit_statistics),\n",
767+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
768+
" # values = git_files_with_commit_statistics['fileCount'],\n",
769+
" marker=create_git_authors_graph_objects_treemap_marker(git_files_with_commit_statistics, git_file_authors, \"secondAuthor\")\n",
770+
"))\n",
771+
"figure.update_layout(\n",
772+
" **plotly_treemap_layout_base_settings,\n",
773+
" title='Second author (second highest number of commits)'\n",
774+
")\n",
775+
"figure.show(**plotly_treemap_figure_show_settings)"
776+
]
777+
},
617778
{
618779
"cell_type": "markdown",
619780
"id": "0ed919b0",
@@ -629,14 +790,15 @@
629790
"metadata": {},
630791
"outputs": [],
631792
"source": [
632-
"git_commit_days_since_last_commit_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastCommit\")\n",
793+
"git_commit_days_since_last_commit_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"daysSinceLastCommit\", 0.98)\n",
633794
"\n",
634795
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
635796
" create_treemap_commit_statistics_settings(git_commit_days_since_last_commit_per_directory),\n",
636-
" values = git_commit_days_since_last_commit_per_directory['fileCount'],\n",
797+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
798+
" #values = git_commit_days_since_last_commit_per_directory['fileCount'],\n",
637799
" marker=dict(\n",
638800
" cornerradius=5, \n",
639-
" colors=git_commit_days_since_last_commit_per_directory['daysSinceLastCommit'], \n",
801+
" colors=git_commit_days_since_last_commit_per_directory['daysSinceLastCommit_limited'], \n",
640802
" colorscale='Hot_r',\n",
641803
" colorbar=dict(title=\"Days\"),\n",
642804
" ),\n",
@@ -664,9 +826,12 @@
664826
"metadata": {},
665827
"outputs": [],
666828
"source": [
829+
"git_commit_days_since_last_commit_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastCommit\")\n",
830+
"\n",
667831
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
668832
" create_treemap_commit_statistics_settings(git_commit_days_since_last_commit_per_directory),\n",
669-
" values = git_commit_days_since_last_commit_per_directory['fileCount'],\n",
833+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
834+
" # values = git_commit_days_since_last_commit_per_directory['fileCount'],\n",
670835
" marker=dict(\n",
671836
" cornerradius=5, \n",
672837
" colors=git_commit_days_since_last_commit_per_directory['daysSinceLastCommit_rank'], \n",
@@ -697,14 +862,15 @@
697862
"metadata": {},
698863
"outputs": [],
699864
"source": [
700-
"git_commit_days_since_last_file_creation_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastCreation\")\n",
865+
"git_commit_days_since_last_file_creation_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"daysSinceLastCreation\", 0.98)\n",
701866
"\n",
702867
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
703868
" create_treemap_commit_statistics_settings(git_commit_days_since_last_file_creation_per_directory),\n",
704-
" values = git_commit_days_since_last_file_creation_per_directory['fileCount'],\n",
869+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
870+
" # values = git_commit_days_since_last_file_creation_per_directory['fileCount'],\n",
705871
" marker=dict(\n",
706872
" cornerradius=5, \n",
707-
" colors=git_commit_days_since_last_file_creation_per_directory['daysSinceLastCreation'], \n",
873+
" colors=git_commit_days_since_last_file_creation_per_directory['daysSinceLastCreation_limited'], \n",
708874
" colorscale='Hot_r',\n",
709875
" colorbar=dict(title=\"Days\"),\n",
710876
" ),\n",
@@ -731,9 +897,12 @@
731897
"metadata": {},
732898
"outputs": [],
733899
"source": [
900+
"git_commit_days_since_last_file_creation_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastCreation\")\n",
901+
"\n",
734902
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
735903
" create_treemap_commit_statistics_settings(git_commit_days_since_last_file_creation_per_directory),\n",
736-
" values = git_commit_days_since_last_file_creation_per_directory['fileCount'],\n",
904+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
905+
" # values = git_commit_days_since_last_file_creation_per_directory['fileCount'],\n",
737906
" marker=dict(\n",
738907
" cornerradius=5, \n",
739908
" colors=git_commit_days_since_last_file_creation_per_directory['daysSinceLastCreation_rank'], \n",
@@ -763,14 +932,15 @@
763932
"metadata": {},
764933
"outputs": [],
765934
"source": [
766-
"git_commit_days_since_last_file_modification_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastModification\")\n",
935+
"git_commit_days_since_last_file_modification_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"daysSinceLastModification\", 0.98)\n",
767936
"\n",
768937
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
769938
" create_treemap_commit_statistics_settings(git_commit_days_since_last_file_modification_per_directory),\n",
770-
" values = git_commit_days_since_last_file_modification_per_directory['fileCount'],\n",
939+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
940+
" # values = git_commit_days_since_last_file_modification_per_directory['fileCount'],\n",
771941
" marker=dict(\n",
772942
" cornerradius=5, \n",
773-
" colors=git_commit_days_since_last_file_modification_per_directory['daysSinceLastModification'], \n",
943+
" colors=git_commit_days_since_last_file_modification_per_directory['daysSinceLastModification_limited'], \n",
774944
" colorscale='Hot_r',\n",
775945
" colorbar=dict(title=\"Days\"),\n",
776946
" ),\n",
@@ -797,9 +967,12 @@
797967
"metadata": {},
798968
"outputs": [],
799969
"source": [
970+
"git_commit_days_since_last_file_modification_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastModification\")\n",
971+
"\n",
800972
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
801973
" create_treemap_commit_statistics_settings(git_commit_days_since_last_file_modification_per_directory),\n",
802-
" values = git_commit_days_since_last_file_modification_per_directory['fileCount'],\n",
974+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
975+
" # values = git_commit_days_since_last_file_modification_per_directory['fileCount'],\n",
803976
" marker=dict(\n",
804977
" cornerradius=5, \n",
805978
" colors=git_commit_days_since_last_file_modification_per_directory['daysSinceLastModification_rank'], \n",

0 commit comments

Comments
 (0)