|
36 | 36 | "outputs": [],
|
37 | 37 | "source": [
|
38 | 38 | "from neo4j import GraphDatabase\n",
|
39 |
| - "from plotly import graph_objects as plotly_graph_objects" |
| 39 | + "from plotly import graph_objects as plotly_graph_objects\n", |
| 40 | + "from plotly.express import colors as plotly_colors" |
40 | 41 | ]
|
41 | 42 | },
|
42 | 43 | {
|
|
216 | 217 | "plotly_treemap_figure_show_settings = dict(\n",
|
217 | 218 | " renderer=\"svg\" if is_command_line_execution() else None,\n",
|
218 | 219 | " width=1000,\n",
|
219 |
| - " height=550\n", |
| 220 | + " height=800\n", |
220 | 221 | ")"
|
221 | 222 | ]
|
222 | 223 | },
|
|
237 | 238 | " labels=data_frame['directoryName'],\n",
|
238 | 239 | " parents=data_frame['directoryParentPath'],\n",
|
239 | 240 | " ids=data_frame['directoryPath'],\n",
|
240 |
| - " customdata=data_frame[['fileCount', 'commitCount', 'authorCount', 'lastCommitDate', 'daysSinceLastCommit', 'lastCreationDate', 'daysSinceLastCreation', 'lastModificationDate', 'daysSinceLastModification', 'directoryPath']],\n", |
241 |
| - " hovertemplate='<b>%{label}</b><br>Files: %{customdata[0]}<br>Commits: %{customdata[1]}<br>Authors: %{customdata[2]}<br>Last Commit: %{customdata[3]} (%{customdata[4]} days ago)<br>Last Created: %{customdata[5]} (%{customdata[6]} days ago)<br>Last Modified: %{customdata[7]} (%{customdata[8]} days ago)<br>Path: %{customdata[9]}',\n", |
| 241 | + " customdata=data_frame[['fileCount', 'commitCount', 'authorCount', 'mainAuthor', 'secondAuthor','lastCommitDate', 'daysSinceLastCommit', 'lastCreationDate', 'daysSinceLastCreation', 'lastModificationDate', 'daysSinceLastModification', 'directoryPath']],\n", |
| 242 | + " hovertemplate='<b>%{label}</b><br>Files: %{customdata[0]}<br>Commits: %{customdata[1]}<br>Authors: %{customdata[3]}, %{customdata[4]},.. (%{customdata[2]})<br>Last Commit: %{customdata[5]} (%{customdata[6]} days ago)<br>Last Created: %{customdata[7]} (%{customdata[8]} days ago)<br>Last Modified: %{customdata[9]} (%{customdata[10]} days ago)<br>Path: %{customdata[11]}',\n", |
242 | 243 | " maxdepth=-1,\n",
|
243 | 244 | " root_color=\"lightgrey\",\n",
|
244 | 245 | " marker=dict(cornerradius=5),\n",
|
|
412 | 413 | "source": [
|
413 | 414 | "git_files_with_commit_statistics = query_cypher_to_data_frame(\"../cypher/GitLog/List_git_files_with_commit_statistics_by_author.cypher\")\n",
|
414 | 415 | "\n",
|
| 416 | + "# Get all authors, their commit count and based on it their rank in a separate dataframe.\n", |
| 417 | + "# This will then be needed to visualize the (main) author for each directory.\n", |
| 418 | + "git_file_authors=git_files_with_commit_statistics[['author', 'commitCount']].groupby('author').aggregate(\n", |
| 419 | + " authorCommitCount=pd.NamedAgg(column=\"commitCount\", aggfunc=\"sum\"),\n", |
| 420 | + " ).sort_values(by='authorCommitCount', ascending=False).reset_index()\n", |
| 421 | + "git_file_authors['authorCommitCountRank'] = git_file_authors['authorCommitCount'].rank(ascending=True, method='dense').astype(int)\n", |
| 422 | + "\n", |
| 423 | + "# Debug\n", |
| 424 | + "# display(git_file_authors)\n", |
| 425 | + "\n", |
415 | 426 | "# Debug\n",
|
416 | 427 | "# display(\"1. query result ---------------------\")\n",
|
417 | 428 | "# display(git_files_with_commit_statistics)\n",
|
|
515 | 526 | "id": "ccc11f52",
|
516 | 527 | "metadata": {},
|
517 | 528 | "source": [
|
518 |
| - "### Directories by file count" |
| 529 | + "### Number of files per directory" |
519 | 530 | ]
|
520 | 531 | },
|
521 | 532 | {
|
|
536 | 547 | "figure.show(**plotly_treemap_figure_show_settings)"
|
537 | 548 | ]
|
538 | 549 | },
|
539 |
| - { |
540 |
| - "cell_type": "code", |
541 |
| - "execution_count": null, |
542 |
| - "id": "fb399f44", |
543 |
| - "metadata": {}, |
544 |
| - "outputs": [], |
545 |
| - "source": [ |
546 |
| - "# TODO Directories by main author" |
547 |
| - ] |
548 |
| - }, |
549 | 550 | {
|
550 | 551 | "cell_type": "markdown",
|
551 | 552 | "id": "e98ca7b1",
|
|
565 | 566 | "\n",
|
566 | 567 | "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
|
567 | 568 | " create_treemap_commit_statistics_settings(git_commit_count_per_directory),\n",
|
568 |
| - " values = git_commit_count_per_directory['fileCount'],\n", |
| 569 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 570 | + " # values = git_commit_count_per_directory['fileCount'],\n", |
569 | 571 | " marker=dict(\n",
|
570 | 572 | " cornerradius=5, \n",
|
571 | 573 | " colors=git_commit_count_per_directory['commitCount_limited'], \n",
|
|
595 | 597 | "metadata": {},
|
596 | 598 | "outputs": [],
|
597 | 599 | "source": [
|
598 |
| - "git_commit_authors_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"authorCount\", 0.96)\n", |
| 600 | + "git_commit_authors_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"authorCount\", 0.98)\n", |
599 | 601 | "\n",
|
600 | 602 | "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
|
601 | 603 | " create_treemap_commit_statistics_settings(git_commit_authors_per_directory),\n",
|
602 |
| - " values = git_commit_authors_per_directory['fileCount'],\n", |
| 604 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 605 | + " # values = git_commit_authors_per_directory['fileCount'],\n", |
603 | 606 | " marker=dict(\n",
|
604 | 607 | " cornerradius=5, \n",
|
605 | 608 | " colors=git_commit_authors_per_directory['authorCount_limited'], \n",
|
|
614 | 617 | "figure.show(**plotly_treemap_figure_show_settings)"
|
615 | 618 | ]
|
616 | 619 | },
|
| 620 | + { |
| 621 | + "cell_type": "markdown", |
| 622 | + "id": "5dbceaef", |
| 623 | + "metadata": {}, |
| 624 | + "source": [ |
| 625 | + "### Main author per directory" |
| 626 | + ] |
| 627 | + }, |
| 628 | + { |
| 629 | + "cell_type": "code", |
| 630 | + "execution_count": null, |
| 631 | + "id": "29069753", |
| 632 | + "metadata": {}, |
| 633 | + "outputs": [], |
| 634 | + "source": [ |
| 635 | + "# TODO delete unused code" |
| 636 | + ] |
| 637 | + }, |
| 638 | + { |
| 639 | + "cell_type": "raw", |
| 640 | + "id": "7ccca44e", |
| 641 | + "metadata": {}, |
| 642 | + "source": [ |
| 643 | + "# TODO experiment again with plotly express\n", |
| 644 | + "\n", |
| 645 | + "import plotly.express as plotly_express\n", |
| 646 | + "\n", |
| 647 | + "plotly_treemap_color_settings = dict(\n", |
| 648 | + " color_continuous_scale='Hot_r', # Hot_r, amp, Reds, Blackbody_r, RdGy_r, RdBu_r\n", |
| 649 | + " color_discrete_sequence=plotly_express.colors.qualitative.Vivid,\n", |
| 650 | + ")\n", |
| 651 | + "plotly_treemap_commit_statistics_custom_data= dict(\n", |
| 652 | + " custom_data=['fileCount', 'commitCount', 'authorCount', 'mainAuthor', 'lastCommitDate', 'daysSinceLastCommit', 'lastCreationDate', 'daysSinceLastCreation', 'lastModificationDate', 'daysSinceLastModification', 'directoryPath'],\n", |
| 653 | + ")\n", |
| 654 | + "plotly_treemap_traces_base_settings = dict(\n", |
| 655 | + " root_color=\"lightgrey\",\n", |
| 656 | + " textinfo=\"label+value\",\n", |
| 657 | + " marker=dict(cornerradius=5),\n", |
| 658 | + ")\n", |
| 659 | + "plotly_treemap_traces_commit_statistics_settings = dict(\n", |
| 660 | + " **plotly_treemap_traces_base_settings,\n", |
| 661 | + " hovertemplate='<b>%{label}</b><br>Files: %{customdata[0]}<br>Commits: %{customdata[1]}<br>Authors: %{customdata[2]}<br>Main Author: %{customdata[3]}<br>Last Commit: %{customdata[4]} (%{customdata[5]} days ago)<br>Last Created: %{customdata[6]} (%{customdata[7]} days ago)<br>Last Modified: %{customdata[8]} (%{customdata[9]} days ago)<br>Path: %{customdata[10]}',\n", |
| 662 | + ")\n", |
| 663 | + "plotly_treemap_layout_base_settings = dict(\n", |
| 664 | + " margin=dict(t=50, l=15, r=15, b=15),\n", |
| 665 | + ")\n", |
| 666 | + "\n", |
| 667 | + "# Extract unique authors for category orders\n", |
| 668 | + "#unique_authors = git_files_with_commit_statistics['mainAuthor'].unique()\n", |
| 669 | + "\n", |
| 670 | + "figure = plotly_express.treemap(\n", |
| 671 | + " git_files_with_commit_statistics,\n", |
| 672 | + " **plotly_treemap_color_settings,\n", |
| 673 | + " **plotly_treemap_commit_statistics_custom_data,\n", |
| 674 | + " ids='directoryPath',\n", |
| 675 | + " names='directoryName',\n", |
| 676 | + " parents='directoryParentPath',\n", |
| 677 | + " # Without values, much more squares are shown which gives a much better overview\n", |
| 678 | + " # values='fileCount', \n", |
| 679 | + " color='mainAuthor',\n", |
| 680 | + " title='Directories and their main author (discrete coloring, no legend?)',\n", |
| 681 | + ")\n", |
| 682 | + "figure.update_traces(\n", |
| 683 | + " **plotly_treemap_traces_commit_statistics_settings,\n", |
| 684 | + ")\n", |
| 685 | + "figure.update_layout(\n", |
| 686 | + " **plotly_treemap_layout_base_settings,\n", |
| 687 | + " # coloraxis_colorbar=dict(title=\"Author\"),\n", |
| 688 | + " legend_title_text='Main Author',\n", |
| 689 | + " showlegend=True,\n", |
| 690 | + " legend_visible=True,\n", |
| 691 | + ") \n", |
| 692 | + "\n", |
| 693 | + "figure.show(**plotly_treemap_figure_show_settings)" |
| 694 | + ] |
| 695 | + }, |
| 696 | + { |
| 697 | + "cell_type": "code", |
| 698 | + "execution_count": null, |
| 699 | + "id": "259f7278", |
| 700 | + "metadata": {}, |
| 701 | + "outputs": [], |
| 702 | + "source": [ |
| 703 | + "def create_git_authors_graph_objects_treemap_marker(main_data_frame: pd.DataFrame, author_rank_data_frame: pd.DataFrame, author_column_name: str):\n", |
| 704 | + " \"\"\"\n", |
| 705 | + " Creates a plotly graph_objects.Treemap marker object for git author plots.\n", |
| 706 | + " main_data_frame : pd.DataFrame : The DataFrame that contains the git directories and their commit statistics\n", |
| 707 | + " author_rank_data_frame : pd.DataFrame : The DataFrame that contains the git authors, their commit count and based on that their rank.\n", |
| 708 | + " author_column_name : str : The name of the (aggregated) author column for coloring the plot\n", |
| 709 | + " return : plotly_graph_objects.treemap.Marker : The created Marker object\n", |
| 710 | + " \"\"\"\n", |
| 711 | + " data_frame_with_authors=pd.merge(git_files_with_commit_statistics, author_rank_data_frame, left_on=author_column_name, right_on=\"author\")\n", |
| 712 | + " #display(data_frame_with_author_ranks)\n", |
| 713 | + "\n", |
| 714 | + " data_frame_with_author_ranks=data_frame_with_authors['authorCommitCountRank']\n", |
| 715 | + "\n", |
| 716 | + " return dict(\n", |
| 717 | + " cornerradius=5, \n", |
| 718 | + " colors=data_frame_with_author_ranks,\n", |
| 719 | + " colorscale=plotly_colors.qualitative.G10, #favorites: plotly_colors.qualitative.G10, Blackbody, ice, haline, hot\n", |
| 720 | + " colorbar=dict(\n", |
| 721 | + " title=\"Rank\",\n", |
| 722 | + " tickmode=\"array\",\n", |
| 723 | + " ticktext=data_frame_with_authors[author_column_name],\n", |
| 724 | + " tickvals=data_frame_with_author_ranks,\n", |
| 725 | + " tickfont_size=8\n", |
| 726 | + " ),\n", |
| 727 | + " )\n" |
| 728 | + ] |
| 729 | + }, |
| 730 | + { |
| 731 | + "cell_type": "code", |
| 732 | + "execution_count": null, |
| 733 | + "id": "e97c0d87", |
| 734 | + "metadata": {}, |
| 735 | + "outputs": [], |
| 736 | + "source": [ |
| 737 | + "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n", |
| 738 | + " create_treemap_commit_statistics_settings(git_files_with_commit_statistics),\n", |
| 739 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 740 | + " # values = git_files_with_commit_statistics['fileCount'],\n", |
| 741 | + " marker=create_git_authors_graph_objects_treemap_marker(git_files_with_commit_statistics, git_file_authors, \"mainAuthor\")\n", |
| 742 | + "))\n", |
| 743 | + "figure.update_layout(\n", |
| 744 | + " **plotly_treemap_layout_base_settings,\n", |
| 745 | + " title='Main author (highest number of commits)'\n", |
| 746 | + ")\n", |
| 747 | + "figure.show(**plotly_treemap_figure_show_settings)" |
| 748 | + ] |
| 749 | + }, |
| 750 | + { |
| 751 | + "cell_type": "markdown", |
| 752 | + "id": "349a1d03", |
| 753 | + "metadata": {}, |
| 754 | + "source": [ |
| 755 | + "### Second author per directory" |
| 756 | + ] |
| 757 | + }, |
| 758 | + { |
| 759 | + "cell_type": "code", |
| 760 | + "execution_count": null, |
| 761 | + "id": "29484f84", |
| 762 | + "metadata": {}, |
| 763 | + "outputs": [], |
| 764 | + "source": [ |
| 765 | + "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n", |
| 766 | + " create_treemap_commit_statistics_settings(git_files_with_commit_statistics),\n", |
| 767 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 768 | + " # values = git_files_with_commit_statistics['fileCount'],\n", |
| 769 | + " marker=create_git_authors_graph_objects_treemap_marker(git_files_with_commit_statistics, git_file_authors, \"secondAuthor\")\n", |
| 770 | + "))\n", |
| 771 | + "figure.update_layout(\n", |
| 772 | + " **plotly_treemap_layout_base_settings,\n", |
| 773 | + " title='Second author (second highest number of commits)'\n", |
| 774 | + ")\n", |
| 775 | + "figure.show(**plotly_treemap_figure_show_settings)" |
| 776 | + ] |
| 777 | + }, |
617 | 778 | {
|
618 | 779 | "cell_type": "markdown",
|
619 | 780 | "id": "0ed919b0",
|
|
629 | 790 | "metadata": {},
|
630 | 791 | "outputs": [],
|
631 | 792 | "source": [
|
632 |
| - "git_commit_days_since_last_commit_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastCommit\")\n", |
| 793 | + "git_commit_days_since_last_commit_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"daysSinceLastCommit\", 0.98)\n", |
633 | 794 | "\n",
|
634 | 795 | "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
|
635 | 796 | " create_treemap_commit_statistics_settings(git_commit_days_since_last_commit_per_directory),\n",
|
636 |
| - " values = git_commit_days_since_last_commit_per_directory['fileCount'],\n", |
| 797 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 798 | + " #values = git_commit_days_since_last_commit_per_directory['fileCount'],\n", |
637 | 799 | " marker=dict(\n",
|
638 | 800 | " cornerradius=5, \n",
|
639 |
| - " colors=git_commit_days_since_last_commit_per_directory['daysSinceLastCommit'], \n", |
| 801 | + " colors=git_commit_days_since_last_commit_per_directory['daysSinceLastCommit_limited'], \n", |
640 | 802 | " colorscale='Hot_r',\n",
|
641 | 803 | " colorbar=dict(title=\"Days\"),\n",
|
642 | 804 | " ),\n",
|
|
664 | 826 | "metadata": {},
|
665 | 827 | "outputs": [],
|
666 | 828 | "source": [
|
| 829 | + "git_commit_days_since_last_commit_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastCommit\")\n", |
| 830 | + "\n", |
667 | 831 | "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
|
668 | 832 | " create_treemap_commit_statistics_settings(git_commit_days_since_last_commit_per_directory),\n",
|
669 |
| - " values = git_commit_days_since_last_commit_per_directory['fileCount'],\n", |
| 833 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 834 | + " # values = git_commit_days_since_last_commit_per_directory['fileCount'],\n", |
670 | 835 | " marker=dict(\n",
|
671 | 836 | " cornerradius=5, \n",
|
672 | 837 | " colors=git_commit_days_since_last_commit_per_directory['daysSinceLastCommit_rank'], \n",
|
|
697 | 862 | "metadata": {},
|
698 | 863 | "outputs": [],
|
699 | 864 | "source": [
|
700 |
| - "git_commit_days_since_last_file_creation_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastCreation\")\n", |
| 865 | + "git_commit_days_since_last_file_creation_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"daysSinceLastCreation\", 0.98)\n", |
701 | 866 | "\n",
|
702 | 867 | "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
|
703 | 868 | " create_treemap_commit_statistics_settings(git_commit_days_since_last_file_creation_per_directory),\n",
|
704 |
| - " values = git_commit_days_since_last_file_creation_per_directory['fileCount'],\n", |
| 869 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 870 | + " # values = git_commit_days_since_last_file_creation_per_directory['fileCount'],\n", |
705 | 871 | " marker=dict(\n",
|
706 | 872 | " cornerradius=5, \n",
|
707 |
| - " colors=git_commit_days_since_last_file_creation_per_directory['daysSinceLastCreation'], \n", |
| 873 | + " colors=git_commit_days_since_last_file_creation_per_directory['daysSinceLastCreation_limited'], \n", |
708 | 874 | " colorscale='Hot_r',\n",
|
709 | 875 | " colorbar=dict(title=\"Days\"),\n",
|
710 | 876 | " ),\n",
|
|
731 | 897 | "metadata": {},
|
732 | 898 | "outputs": [],
|
733 | 899 | "source": [
|
| 900 | + "git_commit_days_since_last_file_creation_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastCreation\")\n", |
| 901 | + "\n", |
734 | 902 | "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
|
735 | 903 | " create_treemap_commit_statistics_settings(git_commit_days_since_last_file_creation_per_directory),\n",
|
736 |
| - " values = git_commit_days_since_last_file_creation_per_directory['fileCount'],\n", |
| 904 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 905 | + " # values = git_commit_days_since_last_file_creation_per_directory['fileCount'],\n", |
737 | 906 | " marker=dict(\n",
|
738 | 907 | " cornerradius=5, \n",
|
739 | 908 | " colors=git_commit_days_since_last_file_creation_per_directory['daysSinceLastCreation_rank'], \n",
|
|
763 | 932 | "metadata": {},
|
764 | 933 | "outputs": [],
|
765 | 934 | "source": [
|
766 |
| - "git_commit_days_since_last_file_modification_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastModification\")\n", |
| 935 | + "git_commit_days_since_last_file_modification_per_directory = add_quantile_limited_column(git_files_with_commit_statistics, \"daysSinceLastModification\", 0.98)\n", |
767 | 936 | "\n",
|
768 | 937 | "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
|
769 | 938 | " create_treemap_commit_statistics_settings(git_commit_days_since_last_file_modification_per_directory),\n",
|
770 |
| - " values = git_commit_days_since_last_file_modification_per_directory['fileCount'],\n", |
| 939 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 940 | + " # values = git_commit_days_since_last_file_modification_per_directory['fileCount'],\n", |
771 | 941 | " marker=dict(\n",
|
772 | 942 | " cornerradius=5, \n",
|
773 |
| - " colors=git_commit_days_since_last_file_modification_per_directory['daysSinceLastModification'], \n", |
| 943 | + " colors=git_commit_days_since_last_file_modification_per_directory['daysSinceLastModification_limited'], \n", |
774 | 944 | " colorscale='Hot_r',\n",
|
775 | 945 | " colorbar=dict(title=\"Days\"),\n",
|
776 | 946 | " ),\n",
|
|
797 | 967 | "metadata": {},
|
798 | 968 | "outputs": [],
|
799 | 969 | "source": [
|
| 970 | + "git_commit_days_since_last_file_modification_per_directory = add_rank_column(git_files_with_commit_statistics, \"daysSinceLastModification\")\n", |
| 971 | + "\n", |
800 | 972 | "figure = plotly_graph_objects.Figure(plotly_graph_objects.Treemap(\n",
|
801 | 973 | " create_treemap_commit_statistics_settings(git_commit_days_since_last_file_modification_per_directory),\n",
|
802 |
| - " values = git_commit_days_since_last_file_modification_per_directory['fileCount'],\n", |
| 974 | + " # Without values, much more squares are shown which gives a much better overview. The drawback is that the fileCount isn't visible.\n", |
| 975 | + " # values = git_commit_days_since_last_file_modification_per_directory['fileCount'],\n", |
803 | 976 | " marker=dict(\n",
|
804 | 977 | " cornerradius=5, \n",
|
805 | 978 | " colors=git_commit_days_since_last_file_modification_per_directory['daysSinceLastModification_rank'], \n",
|
|
0 commit comments