|
91 | 91 | "source": [
|
92 | 92 | "def get_cypher_query_from_file(cypher_file_name : str):\n",
|
93 | 93 | " with open(cypher_file_name) as file:\n",
|
94 |
| - " return ' '.join(file.readlines())" |
95 |
| - ] |
96 |
| - }, |
97 |
| - { |
98 |
| - "cell_type": "code", |
99 |
| - "execution_count": null, |
100 |
| - "id": "59310f6f", |
101 |
| - "metadata": {}, |
102 |
| - "outputs": [], |
103 |
| - "source": [ |
| 94 | + " return ' '.join(file.readlines())\n", |
| 95 | + "\n", |
| 96 | + "\n", |
104 | 97 | "def query_cypher_to_data_frame(filename : str, limit: int = -1):\n",
|
105 | 98 | " \"\"\"\n",
|
106 | 99 | " Execute the Cypher query of the given file and returns the result.\n",
|
|
111 | 104 | " if limit > 0:\n",
|
112 | 105 | " cypher_query = \"{query}\\nLIMIT {row_limit}\".format(query = cypher_query, row_limit = limit)\n",
|
113 | 106 | " records, summary, keys = driver.execute_query(cypher_query)\n",
|
114 |
| - " return pd.DataFrame([r.values() for r in records], columns=keys)" |
115 |
| - ] |
116 |
| - }, |
117 |
| - { |
118 |
| - "cell_type": "code", |
119 |
| - "execution_count": null, |
120 |
| - "id": "c09da482", |
121 |
| - "metadata": {}, |
122 |
| - "outputs": [], |
123 |
| - "source": [ |
| 107 | + " return pd.DataFrame([r.values() for r in records], columns=keys)\n", |
| 108 | + "\n", |
| 109 | + "\n", |
124 | 110 | "def query_first_non_empty_cypher_to_data_frame(*filenames : str, limit: int = -1):\n",
|
125 | 111 | " \"\"\"\n",
|
126 | 112 | " Executes the Cypher queries of the given files and returns the first result that is not empty.\n",
|
|
625 | 611 | "### Main author per directory"
|
626 | 612 | ]
|
627 | 613 | },
|
628 |
| - { |
629 |
| - "cell_type": "code", |
630 |
| - "execution_count": null, |
631 |
| - "id": "29069753", |
632 |
| - "metadata": {}, |
633 |
| - "outputs": [], |
634 |
| - "source": [ |
635 |
| - "# TODO delete unused code" |
636 |
| - ] |
637 |
| - }, |
638 |
| - { |
639 |
| - "cell_type": "raw", |
640 |
| - "id": "7ccca44e", |
641 |
| - "metadata": {}, |
642 |
| - "source": [ |
643 |
| - "# TODO experiment again with plotly express\n", |
644 |
| - "\n", |
645 |
| - "import plotly.express as plotly_express\n", |
646 |
| - "\n", |
647 |
| - "plotly_treemap_color_settings = dict(\n", |
648 |
| - " color_continuous_scale='Hot_r', # Hot_r, amp, Reds, Blackbody_r, RdGy_r, RdBu_r\n", |
649 |
| - " color_discrete_sequence=plotly_express.colors.qualitative.Vivid,\n", |
650 |
| - ")\n", |
651 |
| - "plotly_treemap_commit_statistics_custom_data= dict(\n", |
652 |
| - " custom_data=['fileCount', 'commitCount', 'authorCount', 'mainAuthor', 'lastCommitDate', 'daysSinceLastCommit', 'lastCreationDate', 'daysSinceLastCreation', 'lastModificationDate', 'daysSinceLastModification', 'directoryPath'],\n", |
653 |
| - ")\n", |
654 |
| - "plotly_treemap_traces_base_settings = dict(\n", |
655 |
| - " root_color=\"lightgrey\",\n", |
656 |
| - " textinfo=\"label+value\",\n", |
657 |
| - " marker=dict(cornerradius=5),\n", |
658 |
| - ")\n", |
659 |
| - "plotly_treemap_traces_commit_statistics_settings = dict(\n", |
660 |
| - " **plotly_treemap_traces_base_settings,\n", |
661 |
| - " hovertemplate='<b>%{label}</b><br>Files: %{customdata[0]}<br>Commits: %{customdata[1]}<br>Authors: %{customdata[2]}<br>Main Author: %{customdata[3]}<br>Last Commit: %{customdata[4]} (%{customdata[5]} days ago)<br>Last Created: %{customdata[6]} (%{customdata[7]} days ago)<br>Last Modified: %{customdata[8]} (%{customdata[9]} days ago)<br>Path: %{customdata[10]}',\n", |
662 |
| - ")\n", |
663 |
| - "plotly_treemap_layout_base_settings = dict(\n", |
664 |
| - " margin=dict(t=50, l=15, r=15, b=15),\n", |
665 |
| - ")\n", |
666 |
| - "\n", |
667 |
| - "# Extract unique authors for category orders\n", |
668 |
| - "#unique_authors = git_files_with_commit_statistics['mainAuthor'].unique()\n", |
669 |
| - "\n", |
670 |
| - "figure = plotly_express.treemap(\n", |
671 |
| - " git_files_with_commit_statistics,\n", |
672 |
| - " **plotly_treemap_color_settings,\n", |
673 |
| - " **plotly_treemap_commit_statistics_custom_data,\n", |
674 |
| - " ids='directoryPath',\n", |
675 |
| - " names='directoryName',\n", |
676 |
| - " parents='directoryParentPath',\n", |
677 |
| - " # Without values, much more squares are shown which gives a much better overview\n", |
678 |
| - " # values='fileCount', \n", |
679 |
| - " color='mainAuthor',\n", |
680 |
| - " title='Directories and their main author (discrete coloring, no legend?)',\n", |
681 |
| - ")\n", |
682 |
| - "figure.update_traces(\n", |
683 |
| - " **plotly_treemap_traces_commit_statistics_settings,\n", |
684 |
| - ")\n", |
685 |
| - "figure.update_layout(\n", |
686 |
| - " **plotly_treemap_layout_base_settings,\n", |
687 |
| - " # coloraxis_colorbar=dict(title=\"Author\"),\n", |
688 |
| - " legend_title_text='Main Author',\n", |
689 |
| - " showlegend=True,\n", |
690 |
| - " legend_visible=True,\n", |
691 |
| - ") \n", |
692 |
| - "\n", |
693 |
| - "figure.show(**plotly_treemap_figure_show_settings)" |
694 |
| - ] |
695 |
| - }, |
696 | 614 | {
|
697 | 615 | "cell_type": "code",
|
698 | 616 | "execution_count": null,
|
|
986 | 904 | ")\n",
|
987 | 905 | "figure.show(**plotly_treemap_figure_show_settings)"
|
988 | 906 | ]
|
| 907 | + }, |
| 908 | + { |
| 909 | + "cell_type": "markdown", |
| 910 | + "id": "14e87aff", |
| 911 | + "metadata": {}, |
| 912 | + "source": [ |
| 913 | + "## WordCloud of git authors" |
| 914 | + ] |
| 915 | + }, |
| 916 | + { |
| 917 | + "cell_type": "code", |
| 918 | + "execution_count": null, |
| 919 | + "id": "e2f68f02", |
| 920 | + "metadata": {}, |
| 921 | + "outputs": [], |
| 922 | + "source": [ |
| 923 | + "# Query data from graph database\n", |
| 924 | + "git_author_words_with_frequency = query_cypher_to_data_frame(\"../cypher/Overview/Words_for_git_author_Wordcloud_with_frequency.cypher\")\n", |
| 925 | + "# Debug \n", |
| 926 | + "# display(git_author_words_with_frequency.head(10))" |
| 927 | + ] |
| 928 | + }, |
| 929 | + { |
| 930 | + "cell_type": "code", |
| 931 | + "execution_count": null, |
| 932 | + "id": "d83ce5f4", |
| 933 | + "metadata": {}, |
| 934 | + "outputs": [], |
| 935 | + "source": [ |
| 936 | + "from wordcloud import WordCloud\n", |
| 937 | + "import matplotlib.pyplot as plot\n", |
| 938 | + "\n", |
| 939 | + "if not git_author_words_with_frequency.empty:\n", |
| 940 | + " # Expects the first column of the DataFrame to contain the words/text and the second column to contain the count/frequency.\n", |
| 941 | + " words_with_frequency_dict=git_author_words_with_frequency.set_index(git_author_words_with_frequency.columns[0]).to_dict()[git_author_words_with_frequency.columns[1]]\n", |
| 942 | + " wordcloud = WordCloud(\n", |
| 943 | + " width=800, \n", |
| 944 | + " height=800,\n", |
| 945 | + " max_words=600, \n", |
| 946 | + " collocations=False,\n", |
| 947 | + " background_color='white', \n", |
| 948 | + " colormap='viridis'\n", |
| 949 | + " ).generate_from_frequencies(words_with_frequency_dict)\n", |
| 950 | + "\n", |
| 951 | + " # Plot the word cloud\n", |
| 952 | + " plot.figure(figsize=(15,15))\n", |
| 953 | + " plot.imshow(wordcloud, interpolation='bilinear')\n", |
| 954 | + " plot.axis(\"off\")\n", |
| 955 | + " plot.title('Wordcloud of git authors')\n", |
| 956 | + " plot.show()" |
| 957 | + ] |
989 | 958 | }
|
990 | 959 | ],
|
991 | 960 | "metadata": {
|
|
0 commit comments