Skip to content

Commit 30349a7

Browse files
committed
Add treemap plot that shows commit counts of pairwise changed files
1 parent 46290ac commit 30349a7

3 files changed

+183
-1
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// List git files that where changed together frequently
2+
3+
MATCH (global_git_commit:Git:Commit)
4+
WITH count(global_git_commit) AS globalCommitCount
5+
MATCH (git_commit:Git:Commit)-[:CONTAINS_CHANGE]->(git_change:Git:Change:Update)-[:UPDATES]->(git_file:Git:File)MATCH (git_repository:Git&Repository)-[:HAS_FILE]->(git_file)
6+
MATCH (git_repository:Git&Repository)-[:HAS_FILE]->(git_file)
7+
WHERE git_file.deletedAt IS NULL
8+
WITH *, git_repository.name + '/' + git_file.relativePath AS filePath
9+
WITH globalCommitCount
10+
,git_commit.sha AS commitHash
11+
,collect(DISTINCT filePath) AS filesInCommit
12+
WHERE size(filesInCommit) >= 2
13+
AND size(filesInCommit) <= 50
14+
WITH globalCommitCount
15+
,commitHash
16+
,apoc.coll.combinations(filesInCommit, 2, 2) AS fileCombinations
17+
UNWIND fileCombinations AS fileCombination
18+
WITH globalCommitCount
19+
,apoc.coll.sort(fileCombination) AS fileCombination
20+
,count(DISTINCT commitHash) AS commitCount
21+
WHERE commitCount > globalCommitCount * 0.001 // Filter out combinations that are too rare
22+
RETURN fileCombination[0] AS firstFile
23+
,fileCombination[1] AS secondFile
24+
,commitCount
25+
ORDER BY commitCount DESC
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// List git files that where changed together frequently
2+
3+
MATCH (global_git_commit:Git:Commit)
4+
WITH count(global_git_commit) AS globalCommitCount
5+
MATCH (git_commit:Git:Commit)-[:CONTAINS_CHANGE]->(git_change:Git:Change:Update)-[:UPDATES]->(git_file:Git:File)
6+
MATCH (git_repository:Git&Repository)-[:HAS_FILE]->(git_file)
7+
WHERE git_file.deletedAt IS NULL
8+
WITH *, git_repository.name + '/' + git_file.relativePath AS filePath
9+
WITH globalCommitCount
10+
,git_commit.sha AS commitHash
11+
,collect(DISTINCT filePath) AS filesInCommit
12+
WHERE size(filesInCommit) >= 2
13+
AND size(filesInCommit) <= 50
14+
WITH globalCommitCount
15+
,commitHash
16+
,apoc.coll.combinations(filesInCommit, 2, 2) AS fileCombinations
17+
UNWIND fileCombinations AS fileCombination
18+
UNWIND fileCombination AS filePath
19+
WITH globalCommitCount
20+
,filePath
21+
,count(DISTINCT commitHash) AS commitCount
22+
WHERE commitCount > globalCommitCount * 0.001 // Filter out combinations that are too rare
23+
RETURN filePath
24+
,commitCount
25+
ORDER BY commitCount DESC

jupyter/GitHistoryGeneral.ipynb

+133-1
Original file line numberDiff line numberDiff line change
@@ -793,17 +793,62 @@
793793
" marker=dict(\n",
794794
" **plotly_treemap_marker_base_colorscale,\n",
795795
" colors=git_commit_authors_per_directory_low_focus['authorCount_limited'], \n",
796-
" colorbar=dict(title=\"Authors\", labelalias=author_count_top_limit_label_alias),\n",
796+
" colorbar=dict(title=\"Authors\",\n",
797+
" tickmode=\"auto\",\n",
798+
" labelalias=author_count_top_limit_label_alias\n",
799+
" ),\n",
797800
" reversescale=True\n",
798801
" ),\n",
799802
"))\n",
800803
"figure.update_layout(\n",
801804
" **plotly_treemap_layout_base_settings,\n",
802805
" title='Number of distinct commit authors (red/black = ony one or very few authors)',\n",
803806
")\n",
807+
"\n",
804808
"figure.show(**plotly_treemap_figure_show_settings)"
805809
]
806810
},
811+
{
812+
"cell_type": "code",
813+
"execution_count": null,
814+
"id": "e11947c5",
815+
"metadata": {},
816+
"outputs": [],
817+
"source": [
818+
"import plotly.graph_objects as go\n",
819+
"\n",
820+
"# Example data\n",
821+
"labels = [\"A\", \"B\", \"C\", \"D\", \"E\"]\n",
822+
"parents = [\"\", \"A\", \"A\", \"B\", \"B\"]\n",
823+
"values = [10, 20, 30, 40, 50] # Color scale values\n",
824+
"max_value = max(values)\n",
825+
"\n",
826+
"# Create treemap\n",
827+
"fig = go.Figure(go.Treemap(\n",
828+
" labels=labels,\n",
829+
" parents=parents,\n",
830+
" values=values,\n",
831+
" marker=dict(\n",
832+
" colors=values,\n",
833+
" colorscale=\"Blues\",\n",
834+
" colorbar=dict(\n",
835+
" title=\"Value\",\n",
836+
" tickmode=\"auto\", # Let Plotly auto-select ticks\n",
837+
" ticklabelposition=\"outside top\",\n",
838+
" tickformat=\",\", # Use default formatting\n",
839+
" ticklabeloverflow=\"allow\", # Ensure long labels are displayed\n",
840+
" ticklabelstep=1 # Show all labels\n",
841+
" )\n",
842+
" )\n",
843+
"))\n",
844+
"\n",
845+
"# Add an alias for the highest tick value dynamically\n",
846+
"fig.update_layout(coloraxis_colorbar_tickvals=[max_value])\n",
847+
"fig.update_layout(coloraxis_colorbar_ticktext=[f\"{max_value} or more\"])\n",
848+
"\n",
849+
"fig.show()\n"
850+
]
851+
},
807852
{
808853
"cell_type": "markdown",
809854
"id": "5dbceaef",
@@ -1083,6 +1128,93 @@
10831128
"figure.show(**plotly_treemap_figure_show_settings)"
10841129
]
10851130
},
1131+
{
1132+
"cell_type": "markdown",
1133+
"id": "80bd7c28",
1134+
"metadata": {},
1135+
"source": [
1136+
"### File changed frequently with other files"
1137+
]
1138+
},
1139+
{
1140+
"cell_type": "code",
1141+
"execution_count": null,
1142+
"id": "24055998",
1143+
"metadata": {},
1144+
"outputs": [],
1145+
"source": [
1146+
"pairwise_changed_git_files = query_cypher_to_data_frame(\"../cypher/GitLog/List_git_files_that_were_changed_together_unwinded.cypher\")\n",
1147+
"\n",
1148+
"# Debug\n",
1149+
"# display(\"1. pairwise changed files --------------\")\n",
1150+
"# display(pairwise_changed_git_files)\n",
1151+
"\n",
1152+
"# Add multiple rows for each file path containing all its directories paths in the new column 'directoryPath'\n",
1153+
"pairwise_changed_git_files = add_directory_column(pairwise_changed_git_files, 'filePath', 'directoryPath')\n",
1154+
"\n",
1155+
"# Debug\n",
1156+
"# display(\"2. added directories --------------\")\n",
1157+
"# display(pairwise_changed_git_files)\n",
1158+
"\n",
1159+
"# Group the git files by their directory and author and count the number of files of each directory (across all levels).\n",
1160+
"pairwise_changed_git_files = pairwise_changed_git_files.groupby(['directoryPath']).aggregate(\n",
1161+
" pairwiseChangeCommitCount=pd.NamedAgg(column=\"commitCount\", aggfunc=\"sum\"),\n",
1162+
" pairwiseChangeFileCount=pd.NamedAgg(column=\"filePath\", aggfunc=\"count\"),\n",
1163+
")\n",
1164+
"pairwise_changed_git_files.reset_index(inplace=True)\n",
1165+
"\n",
1166+
"# Debug\n",
1167+
"# display(\"3. after grouping --------------\")\n",
1168+
"# display(pairwise_changed_git_files)\n",
1169+
"\n",
1170+
"pairwise_changed_git_files = pd.merge(\n",
1171+
" git_files_with_commit_statistics, \n",
1172+
" pairwise_changed_git_files, \n",
1173+
" left_on='directoryPath', \n",
1174+
" right_on=\"directoryPath\",\n",
1175+
" how=\"left\",\n",
1176+
" validate=\"m:1\"\n",
1177+
")\n",
1178+
"\n",
1179+
"# Debug\n",
1180+
"# display(\"4. after merging --------------\")\n",
1181+
"# display(pairwise_changed_git_files)\n",
1182+
"\n",
1183+
"pairwise_changed_git_files['pairwiseChangeCommitCount'] = pairwise_changed_git_files['pairwiseChangeCommitCount'].fillna(0).astype(int)\n",
1184+
"pairwise_changed_git_files['pairwiseChangeFileCount'] = pairwise_changed_git_files['pairwiseChangeFileCount'].fillna(0).astype(int)\n",
1185+
"pairwise_changed_git_files.reset_index(inplace=True)\n",
1186+
"\n",
1187+
"# Debug\n",
1188+
"# display(\"5. after NaN fill --------------\")\n",
1189+
"# display(pairwise_changed_git_files)"
1190+
]
1191+
},
1192+
{
1193+
"cell_type": "code",
1194+
"execution_count": null,
1195+
"id": "19b5a98a",
1196+
"metadata": {},
1197+
"outputs": [],
1198+
"source": [
1199+
"pairwise_changed_git_files = add_quantile_limited_column(pairwise_changed_git_files, \"pairwiseChangeCommitCount\", 0.98)\n",
1200+
"\n",
1201+
"figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
1202+
" create_treemap_commit_statistics_settings(pairwise_changed_git_files),\n",
1203+
" # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n",
1204+
" # values = pairwise_changed_git_files['fileCount'],\n",
1205+
" marker=dict(\n",
1206+
" **plotly_treemap_marker_base_colorscale,\n",
1207+
" colors=pairwise_changed_git_files['pairwiseChangeCommitCount_limited'], \n",
1208+
" colorbar=dict(title=\"Changes\"),\n",
1209+
" ),\n",
1210+
"))\n",
1211+
"figure.update_layout(\n",
1212+
" **plotly_treemap_layout_base_settings,\n",
1213+
" title='Pairwise file changes',\n",
1214+
")\n",
1215+
"figure.show(**plotly_treemap_figure_show_settings)"
1216+
]
1217+
},
10861218
{
10871219
"cell_type": "markdown",
10881220
"id": "d8c6ccee",

0 commit comments

Comments
 (0)