|
91 | 91 | "source": [
|
92 | 92 | "def get_cypher_query_from_file(cypher_file_name : str):\n",
|
93 | 93 | " with open(cypher_file_name) as file:\n",
|
94 |
| - " return ' '.join(file.readlines())" |
95 |
| - ] |
96 |
| - }, |
97 |
| - { |
98 |
| - "cell_type": "code", |
99 |
| - "execution_count": null, |
100 |
| - "id": "59310f6f", |
101 |
| - "metadata": {}, |
102 |
| - "outputs": [], |
103 |
| - "source": [ |
| 94 | + " return ' '.join(file.readlines())\n", |
| 95 | + "\n", |
| 96 | + "\n", |
104 | 97 | "def query_cypher_to_data_frame(filename : str, limit: int = -1):\n",
|
105 | 98 | " \"\"\"\n",
|
106 | 99 | " Execute the Cypher query of the given file and returns the result.\n",
|
|
111 | 104 | " if limit > 0:\n",
|
112 | 105 | " cypher_query = \"{query}\\nLIMIT {row_limit}\".format(query = cypher_query, row_limit = limit)\n",
|
113 | 106 | " records, summary, keys = driver.execute_query(cypher_query)\n",
|
114 |
| - " return pd.DataFrame([r.values() for r in records], columns=keys)" |
115 |
| - ] |
116 |
| - }, |
117 |
| - { |
118 |
| - "cell_type": "code", |
119 |
| - "execution_count": null, |
120 |
| - "id": "c09da482", |
121 |
| - "metadata": {}, |
122 |
| - "outputs": [], |
123 |
| - "source": [ |
| 107 | + " return pd.DataFrame([r.values() for r in records], columns=keys)\n", |
| 108 | + "\n", |
| 109 | + "\n", |
124 | 110 | "def query_first_non_empty_cypher_to_data_frame(*filenames : str, limit: int = -1):\n",
|
125 | 111 | " \"\"\"\n",
|
126 | 112 | " Executes the Cypher queries of the given files and returns the first result that is not empty.\n",
|
|
986 | 972 | ")\n",
|
987 | 973 | "figure.show(**plotly_treemap_figure_show_settings)"
|
988 | 974 | ]
|
| 975 | + }, |
| 976 | + { |
| 977 | + "cell_type": "markdown", |
| 978 | + "id": "14e87aff", |
| 979 | + "metadata": {}, |
| 980 | + "source": [ |
| 981 | + "## WordCloud of git authors" |
| 982 | + ] |
| 983 | + }, |
| 984 | + { |
| 985 | + "cell_type": "code", |
| 986 | + "execution_count": null, |
| 987 | + "id": "e2f68f02", |
| 988 | + "metadata": {}, |
| 989 | + "outputs": [], |
| 990 | + "source": [ |
| 991 | + "# Query data from graph database\n", |
| 992 | + "git_author_words_with_frequency = query_cypher_to_data_frame(\"../cypher/Overview/Words_for_git_author_Wordcloud_with_frequency.cypher\")\n", |
| 993 | + "# Debug \n", |
| 994 | + "# display(git_author_words_with_frequency.head(10))" |
| 995 | + ] |
| 996 | + }, |
| 997 | + { |
| 998 | + "cell_type": "code", |
| 999 | + "execution_count": null, |
| 1000 | + "id": "d83ce5f4", |
| 1001 | + "metadata": {}, |
| 1002 | + "outputs": [], |
| 1003 | + "source": [ |
| 1004 | + "from wordcloud import WordCloud\n", |
| 1005 | + "import matplotlib.pyplot as plot\n", |
| 1006 | + "\n", |
| 1007 | + "if not git_author_words_with_frequency.empty:\n", |
| 1008 | + " # Expects the first column of the DataFrame to contain the words/text and the second column to contain the count/frequency.\n", |
| 1009 | + " words_with_frequency_dict=git_author_words_with_frequency.set_index(git_author_words_with_frequency.columns[0]).to_dict()[git_author_words_with_frequency.columns[1]]\n", |
| 1010 | + " wordcloud = WordCloud(\n", |
| 1011 | + " width=800, \n", |
| 1012 | + " height=800,\n", |
| 1013 | + " max_words=600, \n", |
| 1014 | + " collocations=False,\n", |
| 1015 | + " background_color='white', \n", |
| 1016 | + " colormap='viridis'\n", |
| 1017 | + " ).generate_from_frequencies(words_with_frequency_dict)\n", |
| 1018 | + "\n", |
| 1019 | + " # Plot the word cloud\n", |
| 1020 | + " plot.figure(figsize=(15,15))\n", |
| 1021 | + " plot.imshow(wordcloud, interpolation='bilinear')\n", |
| 1022 | + " plot.axis(\"off\")\n", |
| 1023 | + " plot.title('Wordcloud of git authors')\n", |
| 1024 | + " plot.show()" |
| 1025 | + ] |
989 | 1026 | }
|
990 | 1027 | ],
|
991 | 1028 | "metadata": {
|
|
0 commit comments