|
36 | 36 | "outputs": [],
|
37 | 37 | "source": [
|
38 | 38 | "from neo4j import GraphDatabase\n",
|
39 |
| - "from plotly import graph_objects as plotly_graph_objects" |
| 39 | + "from plotly import graph_objects as plotly_graph_objects\n", |
| 40 | + "from plotly.express import colors as plotly_colors" |
40 | 41 | ]
|
41 | 42 | },
|
42 | 43 | {
|
|
246 | 247 | " labels=data_frame['directoryName'],\n",
|
247 | 248 | " parents=data_frame['directoryParentPath'],\n",
|
248 | 249 | " ids=data_frame['directoryPath'],\n",
|
249 |
| - " customdata=data_frame[['fileCount', 'commitCount', 'authorCount', 'lastCommitDate', 'daysSinceLastCommit', 'lastCreationDate', 'daysSinceLastCreation', 'lastModificationDate', 'daysSinceLastModification', 'directoryPath']],\n", |
250 |
| - " hovertemplate='<b>%{label}</b><br>Files: %{customdata[0]}<br>Commits: %{customdata[1]}<br>Authors: %{customdata[2]}<br>Last Commit: %{customdata[3]} (%{customdata[4]} days ago)<br>Last Created: %{customdata[5]} (%{customdata[6]} days ago)<br>Last Modified: %{customdata[7]} (%{customdata[8]} days ago)<br>Path: %{customdata[9]}',\n", |
| 250 | + " customdata=data_frame[['fileCount', 'commitCount', 'authorCount', 'mainAuthor', 'secondAuthor','lastCommitDate', 'daysSinceLastCommit', 'lastCreationDate', 'daysSinceLastCreation', 'lastModificationDate', 'daysSinceLastModification', 'directoryPath']],\n", |
| 251 | + " hovertemplate='<b>%{label}</b><br>Files: %{customdata[0]}<br>Commits: %{customdata[1]}<br>Authors: %{customdata[3]}, %{customdata[4]},.. (%{customdata[2]})<br>Last Commit: %{customdata[5]} (%{customdata[6]} days ago)<br>Last Created: %{customdata[7]} (%{customdata[8]} days ago)<br>Last Modified: %{customdata[9]} (%{customdata[10]} days ago)<br>Path: %{customdata[11]}',\n", |
251 | 252 | " maxdepth=-1,\n",
|
252 | 253 | " root_color=\"lightgrey\",\n",
|
253 | 254 | " marker=dict(**plotly_treemap_marker_base_style),\n",
|
|
421 | 422 | "source": [
|
422 | 423 | "git_files_with_commit_statistics = query_cypher_to_data_frame(\"../cypher/GitLog/List_git_files_with_commit_statistics_by_author.cypher\")\n",
|
423 | 424 | "\n",
|
| 425 | + "# Get all authors, their commit count and based on it their rank in a separate dataframe.\n", |
| 426 | + "# This will then be needed to visualize the (main) author for each directory.\n", |
| 427 | + "git_file_authors=git_files_with_commit_statistics[['author', 'commitCount']].groupby('author').aggregate(\n", |
| 428 | + " authorCommitCount=pd.NamedAgg(column=\"commitCount\", aggfunc=\"sum\"),\n", |
| 429 | + " ).sort_values(by='authorCommitCount', ascending=False).reset_index()\n", |
| 430 | + "git_file_authors['authorCommitCountRank'] = git_file_authors['authorCommitCount'].rank(ascending=True, method='dense').astype(int)\n", |
| 431 | + "\n", |
| 432 | + "# Debug\n", |
| 433 | + "# display(git_file_authors)\n", |
| 434 | + "\n", |
424 | 435 | "# Debug\n",
|
425 | 436 | "# display(\"1. query result ---------------------\")\n",
|
426 | 437 | "# display(git_files_with_commit_statistics)\n",
|
|
524 | 535 | "id": "ccc11f52",
|
525 | 536 | "metadata": {},
|
526 | 537 | "source": [
|
527 |
| - "### Directories by file count" |
| 538 | + "### Number of files per directory" |
528 | 539 | ]
|
529 | 540 | },
|
530 | 541 | {
|
|
545 | 556 | "figure.show(**plotly_treemap_figure_show_settings)"
|
546 | 557 | ]
|
547 | 558 | },
|
548 |
| - { |
549 |
| - "cell_type": "code", |
550 |
| - "execution_count": null, |
551 |
| - "id": "fb399f44", |
552 |
| - "metadata": {}, |
553 |
| - "outputs": [], |
554 |
| - "source": [ |
555 |
| - "# TODO Directories by main author" |
556 |
| - ] |
557 |
| - }, |
558 | 559 | {
|
559 | 560 | "cell_type": "markdown",
|
560 | 561 | "id": "e98ca7b1",
|
|
574 | 575 | "\n",
|
575 | 576 | "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
|
576 | 577 | " create_treemap_commit_statistics_settings(git_commit_count_per_directory),\n",
|
577 |
| - " values = git_commit_count_per_directory['fileCount'],\n", |
| 578 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 579 | + " # values = git_commit_count_per_directory['fileCount'],\n", |
578 | 580 | " marker=dict(\n",
|
579 | 581 | " **plotly_treemap_marker_base_colorscale,\n",
|
580 | 582 | " colors=git_commit_count_per_directory['commitCount_limited'], \n",
|
|
603 | 605 | "metadata": {},
|
604 | 606 | "outputs": [],
|
605 | 607 | "source": [
|
606 |
| - "git_commit_authors_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"authorCount\", 0.96)\n", |
| 608 | + "git_commit_authors_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"authorCount\", 0.98)\n", |
607 | 609 | "\n",
|
608 | 610 | "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
|
609 | 611 | " create_treemap_commit_statistics_settings(git_commit_authors_per_directory),\n",
|
610 |
| - " values = git_commit_authors_per_directory['fileCount'],\n", |
| 612 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 613 | + " # values = git_commit_authors_per_directory['fileCount'],\n", |
611 | 614 | " marker=dict(\n",
|
612 | 615 | " **plotly_treemap_marker_base_colorscale,\n",
|
613 | 616 | " colors=git_commit_authors_per_directory['authorCount_limited'], \n",
|
|
621 | 624 | "figure.show(**plotly_treemap_figure_show_settings)"
|
622 | 625 | ]
|
623 | 626 | },
|
| 627 | + { |
| 628 | + "cell_type": "markdown", |
| 629 | + "id": "5dbceaef", |
| 630 | + "metadata": {}, |
| 631 | + "source": [ |
| 632 | + "### Main author per directory" |
| 633 | + ] |
| 634 | + }, |
| 635 | + { |
| 636 | + "cell_type": "code", |
| 637 | + "execution_count": null, |
| 638 | + "id": "29069753", |
| 639 | + "metadata": {}, |
| 640 | + "outputs": [], |
| 641 | + "source": [ |
| 642 | + "# TODO delete unused code" |
| 643 | + ] |
| 644 | + }, |
| 645 | + { |
| 646 | + "cell_type": "raw", |
| 647 | + "id": "7ccca44e", |
| 648 | + "metadata": {}, |
| 649 | + "source": [ |
| 650 | + "# TODO experiment again with plotly express\n", |
| 651 | + "\n", |
| 652 | + "import plotly.express as plotly_express\n", |
| 653 | + "\n", |
| 654 | + "plotly_treemap_color_settings = dict(\n", |
| 655 | + " color_continuous_scale='Hot_r', # Hot_r, amp, Reds, Blackbody_r, RdGy_r, RdBu_r\n", |
| 656 | + " color_discrete_sequence=plotly_express.colors.qualitative.Vivid,\n", |
| 657 | + ")\n", |
| 658 | + "plotly_treemap_commit_statistics_custom_data= dict(\n", |
| 659 | + " custom_data=['fileCount', 'commitCount', 'authorCount', 'mainAuthor', 'lastCommitDate', 'daysSinceLastCommit', 'lastCreationDate', 'daysSinceLastCreation', 'lastModificationDate', 'daysSinceLastModification', 'directoryPath'],\n", |
| 660 | + ")\n", |
| 661 | + "plotly_treemap_traces_base_settings = dict(\n", |
| 662 | + " root_color=\"lightgrey\",\n", |
| 663 | + " textinfo=\"label+value\",\n", |
| 664 | + " marker=dict(cornerradius=5),\n", |
| 665 | + ")\n", |
| 666 | + "plotly_treemap_traces_commit_statistics_settings = dict(\n", |
| 667 | + " **plotly_treemap_traces_base_settings,\n", |
| 668 | + " hovertemplate='<b>%{label}</b><br>Files: %{customdata[0]}<br>Commits: %{customdata[1]}<br>Authors: %{customdata[2]}<br>Main Author: %{customdata[3]}<br>Last Commit: %{customdata[4]} (%{customdata[5]} days ago)<br>Last Created: %{customdata[6]} (%{customdata[7]} days ago)<br>Last Modified: %{customdata[8]} (%{customdata[9]} days ago)<br>Path: %{customdata[10]}',\n", |
| 669 | + ")\n", |
| 670 | + "plotly_treemap_layout_base_settings = dict(\n", |
| 671 | + " margin=dict(t=50, l=15, r=15, b=15),\n", |
| 672 | + ")\n", |
| 673 | + "\n", |
| 674 | + "# Extract unique authors for category orders\n", |
| 675 | + "#unique_authors = git_files_with_commit_statistics['mainAuthor'].unique()\n", |
| 676 | + "\n", |
| 677 | + "figure = plotly_express.treemap(\n", |
| 678 | + " git_files_with_commit_statistics,\n", |
| 679 | + " **plotly_treemap_color_settings,\n", |
| 680 | + " **plotly_treemap_commit_statistics_custom_data,\n", |
| 681 | + " ids='directoryPath',\n", |
| 682 | + " names='directoryName',\n", |
| 683 | + " parents='directoryParentPath',\n", |
| 684 | + " # Without values, much more squares are shown which gives a much better overview\n", |
| 685 | + " # values='fileCount', \n", |
| 686 | + " color='mainAuthor',\n", |
| 687 | + " title='Directories and their main author (discrete coloring, no legend?)',\n", |
| 688 | + ")\n", |
| 689 | + "figure.update_traces(\n", |
| 690 | + " **plotly_treemap_traces_commit_statistics_settings,\n", |
| 691 | + ")\n", |
| 692 | + "figure.update_layout(\n", |
| 693 | + " **plotly_treemap_layout_base_settings,\n", |
| 694 | + " # coloraxis_colorbar=dict(title=\"Author\"),\n", |
| 695 | + " legend_title_text='Main Author',\n", |
| 696 | + " showlegend=True,\n", |
| 697 | + " legend_visible=True,\n", |
| 698 | + ") \n", |
| 699 | + "\n", |
| 700 | + "figure.show(**plotly_treemap_figure_show_settings)" |
| 701 | + ] |
| 702 | + }, |
| 703 | + { |
| 704 | + "cell_type": "code", |
| 705 | + "execution_count": null, |
| 706 | + "id": "259f7278", |
| 707 | + "metadata": {}, |
| 708 | + "outputs": [], |
| 709 | + "source": [ |
| 710 | + "def create_git_authors_graph_objects_treemap_marker(main_data_frame: pd.DataFrame, author_rank_data_frame: pd.DataFrame, author_column_name: str):\n", |
| 711 | + " \"\"\"\n", |
| 712 | + " Creates a plotly graph_objects.Treemap marker object for git author plots.\n", |
| 713 | + " main_data_frame : pd.DataFrame : The DataFrame that contains the git directories and their commit statistics\n", |
| 714 | + " author_rank_data_frame : pd.DataFrame : The DataFrame that contains the git authors, their commit count and based on that their rank.\n", |
| 715 | + " author_column_name : str : The name of the (aggregated) author column for coloring the plot\n", |
| 716 | + " return : plotly_graph_objects.treemap.Marker : The created Marker object\n", |
| 717 | + " \"\"\"\n", |
| 718 | + " data_frame_with_authors=pd.merge(\n", |
| 719 | + " main_data_frame, \n", |
| 720 | + " author_rank_data_frame, \n", |
| 721 | + " left_on=author_column_name, \n", |
| 722 | + " right_on=\"author\",\n", |
| 723 | + " how=\"left\",\n", |
| 724 | + " validate=\"m:1\"\n", |
| 725 | + " )\n", |
| 726 | + " #display(data_frame_with_author_ranks)\n", |
| 727 | + "\n", |
| 728 | + " data_frame_with_author_ranks=data_frame_with_authors['authorCommitCountRank']\n", |
| 729 | + "\n", |
| 730 | + " return dict(\n", |
| 731 | + " cornerradius=5, \n", |
| 732 | + " colors=data_frame_with_author_ranks,\n", |
| 733 | + " colorscale=plotly_colors.qualitative.G10, #favorites: plotly_colors.qualitative.G10, Blackbody, ice, haline, hot\n", |
| 734 | + " colorbar=dict(\n", |
| 735 | + " title=\"Rank\",\n", |
| 736 | + " tickmode=\"array\",\n", |
| 737 | + " ticktext=data_frame_with_authors[author_column_name],\n", |
| 738 | + " tickvals=data_frame_with_author_ranks,\n", |
| 739 | + " tickfont_size=8\n", |
| 740 | + " ),\n", |
| 741 | + " )\n" |
| 742 | + ] |
| 743 | + }, |
| 744 | + { |
| 745 | + "cell_type": "code", |
| 746 | + "execution_count": null, |
| 747 | + "id": "e97c0d87", |
| 748 | + "metadata": {}, |
| 749 | + "outputs": [], |
| 750 | + "source": [ |
| 751 | + "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n", |
| 752 | + " create_treemap_commit_statistics_settings(git_files_with_commit_statistics),\n", |
| 753 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 754 | + " # values = git_files_with_commit_statistics['fileCount'],\n", |
| 755 | + " marker=create_git_authors_graph_objects_treemap_marker(git_files_with_commit_statistics, git_file_authors, \"mainAuthor\")\n", |
| 756 | + "))\n", |
| 757 | + "figure.update_layout(\n", |
| 758 | + " **plotly_treemap_layout_base_settings,\n", |
| 759 | + " title='Main author (highest number of commits)'\n", |
| 760 | + ")\n", |
| 761 | + "figure.show(**plotly_treemap_figure_show_settings)" |
| 762 | + ] |
| 763 | + }, |
| 764 | + { |
| 765 | + "cell_type": "markdown", |
| 766 | + "id": "349a1d03", |
| 767 | + "metadata": {}, |
| 768 | + "source": [ |
| 769 | + "### Second author per directory" |
| 770 | + ] |
| 771 | + }, |
| 772 | + { |
| 773 | + "cell_type": "code", |
| 774 | + "execution_count": null, |
| 775 | + "id": "29484f84", |
| 776 | + "metadata": {}, |
| 777 | + "outputs": [], |
| 778 | + "source": [ |
| 779 | + "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n", |
| 780 | + " create_treemap_commit_statistics_settings(git_files_with_commit_statistics),\n", |
| 781 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 782 | + " # values = git_files_with_commit_statistics['fileCount'],\n", |
| 783 | + " marker=create_git_authors_graph_objects_treemap_marker(git_files_with_commit_statistics, git_file_authors, \"secondAuthor\")\n", |
| 784 | + "))\n", |
| 785 | + "figure.update_layout(\n", |
| 786 | + " **plotly_treemap_layout_base_settings,\n", |
| 787 | + " title='Second author (second highest number of commits)'\n", |
| 788 | + ")\n", |
| 789 | + "figure.show(**plotly_treemap_figure_show_settings)" |
| 790 | + ] |
| 791 | + }, |
624 | 792 | {
|
625 | 793 | "cell_type": "markdown",
|
626 | 794 | "id": "0ed919b0",
|
|
636 | 804 | "metadata": {},
|
637 | 805 | "outputs": [],
|
638 | 806 | "source": [
|
639 |
| - "git_commit_days_since_last_commit_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastCommit\")\n", |
| 807 | + "git_commit_days_since_last_commit_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"daysSinceLastCommit\", 0.98)\n", |
640 | 808 | "\n",
|
641 | 809 | "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
|
642 | 810 | " create_treemap_commit_statistics_settings(git_commit_days_since_last_commit_per_directory),\n",
|
643 |
| - " values = git_commit_days_since_last_commit_per_directory['fileCount'],\n", |
| 811 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 812 | + " #values = git_commit_days_since_last_commit_per_directory['fileCount'],\n", |
644 | 813 | " marker=dict(\n",
|
645 | 814 | " **plotly_treemap_marker_base_colorscale,\n",
|
646 | 815 | " colors=git_commit_days_since_last_commit_per_directory['daysSinceLastCommit_limited'], \n",
|
|
670 | 839 | "metadata": {},
|
671 | 840 | "outputs": [],
|
672 | 841 | "source": [
|
| 842 | + "git_commit_days_since_last_commit_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastCommit\")\n", |
| 843 | + "\n", |
673 | 844 | "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
|
674 | 845 | " create_treemap_commit_statistics_settings(git_commit_days_since_last_commit_per_directory),\n",
|
675 |
| - " values = git_commit_days_since_last_commit_per_directory['fileCount'],\n", |
| 846 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 847 | + " # values = git_commit_days_since_last_commit_per_directory['fileCount'],\n", |
676 | 848 | " marker=dict(\n",
|
677 | 849 | " **plotly_treemap_marker_base_colorscale,\n",
|
678 | 850 | " colors=git_commit_days_since_last_commit_per_directory['daysSinceLastCommit_rank'], \n",
|
|
702 | 874 | "metadata": {},
|
703 | 875 | "outputs": [],
|
704 | 876 | "source": [
|
705 |
| - "git_commit_days_since_last_file_creation_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastCreation\")\n", |
| 877 | + "git_commit_days_since_last_file_creation_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"daysSinceLastCreation\", 0.98)\n", |
706 | 878 | "\n",
|
707 | 879 | "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
|
708 | 880 | " create_treemap_commit_statistics_settings(git_commit_days_since_last_file_creation_per_directory),\n",
|
709 |
| - " values = git_commit_days_since_last_file_creation_per_directory['fileCount'],\n", |
| 881 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 882 | + " # values = git_commit_days_since_last_file_creation_per_directory['fileCount'],\n", |
710 | 883 | " marker=dict(\n",
|
711 | 884 | " **plotly_treemap_marker_base_colorscale,\n",
|
712 | 885 | " colors=git_commit_days_since_last_file_creation_per_directory['daysSinceLastCreation_limited'], \n",
|
|
735 | 908 | "metadata": {},
|
736 | 909 | "outputs": [],
|
737 | 910 | "source": [
|
| 911 | + "git_commit_days_since_last_file_creation_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastCreation\")\n", |
| 912 | + "\n", |
738 | 913 | "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
|
739 | 914 | " create_treemap_commit_statistics_settings(git_commit_days_since_last_file_creation_per_directory),\n",
|
740 |
| - " values = git_commit_days_since_last_file_creation_per_directory['fileCount'],\n", |
| 915 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 916 | + " # values = git_commit_days_since_last_file_creation_per_directory['fileCount'],\n", |
741 | 917 | " marker=dict(\n",
|
742 | 918 | " **plotly_treemap_marker_base_colorscale,\n",
|
743 | 919 | " colors=git_commit_days_since_last_file_creation_per_directory['daysSinceLastCreation_rank'], \n",
|
|
766 | 942 | "metadata": {},
|
767 | 943 | "outputs": [],
|
768 | 944 | "source": [
|
769 |
| - "git_commit_days_since_last_file_modification_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastModification\")\n", |
| 945 | + "git_commit_days_since_last_file_modification_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"daysSinceLastModification\", 0.98)\n", |
770 | 946 | "\n",
|
771 | 947 | "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
|
772 | 948 | " create_treemap_commit_statistics_settings(git_commit_days_since_last_file_modification_per_directory),\n",
|
773 |
| - " values = git_commit_days_since_last_file_modification_per_directory['fileCount'],\n", |
| 949 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 950 | + " # values = git_commit_days_since_last_file_modification_per_directory['fileCount'],\n", |
774 | 951 | " marker=dict(\n",
|
775 | 952 | " **plotly_treemap_marker_base_colorscale,\n",
|
776 | 953 | " colors=git_commit_days_since_last_file_modification_per_directory['daysSinceLastModification_limited'], \n",
|
|
799 | 976 | "metadata": {},
|
800 | 977 | "outputs": [],
|
801 | 978 | "source": [
|
| 979 | + "git_commit_days_since_last_file_modification_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastModification\")\n", |
| 980 | + "\n", |
802 | 981 | "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
|
803 | 982 | " create_treemap_commit_statistics_settings(git_commit_days_since_last_file_modification_per_directory),\n",
|
804 |
| - " values = git_commit_days_since_last_file_modification_per_directory['fileCount'],\n", |
| 983 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 984 | + " # values = git_commit_days_since_last_file_modification_per_directory['fileCount'],\n", |
805 | 985 | " marker=dict(\n",
|
806 | 986 | " **plotly_treemap_marker_base_colorscale,\n",
|
807 | 987 | " colors=git_commit_days_since_last_file_modification_per_directory['daysSinceLastModification_rank'], \n",
|
|
0 commit comments