Skip to content

Commit b2b6c65

Browse files
Added features, the return to spaghetti code.
Current features: - Wordcloud for all text - Wordcloud for first text sent in the day - Frequency chart for different messages (top 50) - Frequency chart for first messages of the day (top 50) - Overall message trend - Initiative chart (who sends the first message of the day) Raw data: - All word frequency - Average hourly frequency - Daily message total - First message contents and frequency - Overall messaging trend - Total messages per hour
1 parent bafab34 commit b2b6c65

File tree

3 files changed

+83
-13
lines changed

3 files changed

+83
-13
lines changed

combine.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@ def combine_csv_files(output_file):
1414
for row in reader:
1515
if row not in unique_records:
1616
unique_records.append(row)
17-
else:
18-
print(row)
1917

2018
# Write unique records to the output file
2119
with open(output_file, 'w', encoding='utf-8-sig', newline='') as file:

dataminer.py

Lines changed: 78 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def generateHours():
193193
# I will figure this out someday...
194194

195195

196-
print("----")
196+
#print("----")
197197
if not os.path.isdir("data"):
198198
os.mkdir("data")
199199

@@ -214,7 +214,8 @@ def generateHours():
214214
try:
215215
date_list.append(person[x])
216216
except KeyError as k:
217-
print(k)
217+
#print(k)
218+
pass
218219
proc_msg.append(date_list)
219220

220221
for row in proc_msg:
@@ -303,47 +304,117 @@ def generateHours():
303304
for row in proc_msg:
304305
writer.writerow(row)
305306

306-
wordcloud = WordCloud(width=1600, height=800, font_path='HanyiSentyRubber.ttf',relative_scaling = 0.69, colormap='winter',min_font_size=10, background_color="white").generate_from_frequencies(first_msg_contents)
307+
with open("data/first_msg_contents.csv", 'w', encoding='utf-8-sig', newline='') as file:
308+
writer = csv.writer(file)
309+
headers = ["Phrase", "Frequency"]
310+
writer.writerow(headers)
311+
for row in first_msg_contents:
312+
writer.writerow(row)
313+
314+
with open("data/all_word_freq.csv", 'w', encoding='utf-8-sig', newline='') as file:
315+
writer = csv.writer(file)
316+
headers = ["Phrase", "Frequency"]
317+
writer.writerow(headers)
318+
for row in all_word_freq:
319+
# print(row)
320+
writer.writerow([row, all_word_freq[row]])
321+
322+
323+
stop = ["动画表情", "图片"] # I will implement this later
324+
325+
wordcloud = WordCloud(width=3840, height=2160, font_path='HanyiSentyRubber.ttf', colormap='winter', background_color="white").generate_from_frequencies(first_msg_contents)
307326
wordcloud.to_file("data/first_msg_contents.png")
308327

309-
wordcloud = WordCloud(width=1600, height=800, font_path='HanyiSentyRubber.ttf', colormap='winter',min_font_size=7,background_color="white").generate_from_frequencies(all_word_freq)
328+
wordcloud = WordCloud(width=3840, height=2160, font_path='HanyiSentyRubber.ttf', colormap='winter',background_color="white").generate_from_frequencies(all_word_freq)
310329
wordcloud.to_file("data/all_msg_contents.png")
311330

312-
color_scheme = ["#003f5c","#bc5090","#ffa600", "#58508d"]
331+
from heapq import nlargest
332+
333+
color_scheme = ["#fd7f6f", "#7eb0d5", "#b2e061", "#bd7ebe", "#ffb55a", "#ffee65", "#beb9db", "#fdcce5", "#8bd3c7"]
334+
all_word_freq_tracedata = []
335+
most_frequent_words = nlargest(50, all_word_freq, key=all_word_freq.get)
336+
word_freq = []
337+
for word in most_frequent_words:
338+
word_freq.append(all_word_freq[word])
339+
340+
all_word_freq_trace = go.Bar(
341+
x=list(most_frequent_words),
342+
y=list(word_freq),
343+
marker=dict(
344+
color=random.choice(color_scheme),
345+
)
346+
)
347+
all_word_freq_tracedata.append(all_word_freq_trace)
348+
349+
all_word_freq_bar = go.Figure(
350+
data=all_word_freq_tracedata,
351+
layout_title_text="All word frequency"
352+
)
353+
354+
first_word_freq_tracedata = []
355+
most_frequent_words = nlargest(50, first_msg_contents, key=first_msg_contents.get)
356+
word_freq = []
357+
for word in most_frequent_words:
358+
word_freq.append(first_msg_contents[word])
359+
360+
first_word_freq_trace = go.Bar(
361+
x=list(most_frequent_words),
362+
y=list(word_freq),
363+
marker=dict(
364+
color=random.choice(color_scheme),
365+
)
366+
)
367+
first_word_freq_tracedata.append(first_word_freq_trace)
368+
369+
first_word_freq_bar = go.Figure(
370+
data=first_word_freq_tracedata,
371+
layout_title_text="First message word frequency"
372+
)
373+
313374
total_daily_messages_tracedata = []
314375
for u in users:
376+
c = random.choice(color_scheme)
315377
trace = go.Bar(
316378
x=list(daily_msg[u].keys()),
317379
y=list(daily_msg[u].values()),
318380
name=f'by {u}',
319381
marker=dict(
320-
color=random.choice(color_scheme),
382+
color=c,
321383
)
322384
)
385+
color_scheme.remove(c)
323386
total_daily_messages_tracedata.append(trace)
324387

325388
total_daily_messages = go.Figure(
326389
data=total_daily_messages_tracedata,
327390
layout_title_text="Total Daily Messages"
328391
)
329392

393+
color_scheme = ["#003f5c","#58508d","#bc5090", "#ff6361", "#ffa600"]
330394
monthly_first_msg_tracedata = []
395+
#print(monthly_first_msg)
331396
for u in users:
397+
if u not in monthly_first_msg:
398+
continue
399+
c = random.choice(color_scheme)
332400
trace = go.Bar(
333401
x=list(monthly_first_msg[u].keys()),
334402
y=list(monthly_first_msg[u].values()),
335403
name=f'by {u}',
336404
marker=dict(
337-
color=random.choice(color_scheme),
405+
color=c,
338406
)
339407
)
408+
color_scheme.remove(c)
340409
monthly_first_msg_tracedata.append(trace)
341410

342411
monthly_first_message = go.Figure(
343412
data=monthly_first_msg_tracedata,
344413
layout_title_text="Monthly First Message"
345414
)
346415

416+
all_word_freq_bar.show()
417+
first_word_freq_bar.show()
347418
total_daily_messages.show()
348419
monthly_first_message.show()
349420

mindful.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
import csv
88

99
# Variables
10-
CHATNAME = "小狐狸🦊"
11-
SCROLLS = 500
10+
CHATNAME = "死亡组(逃学失败版)"
11+
SCROLLS = 1000
1212

1313
# Classes
1414
class RawMessage:
@@ -90,15 +90,16 @@ def printMsg(message):
9090
for cycle in range(0, SCROLLS):
9191
chats = chat_win.wrapper_object().descendants()
9292
cur_cycle = []
93-
print(f"Collected {len(all_msg)} (raw) messages")
93+
print(cycle)
94+
# print(f"Collected {len(all_msg)} (raw) messages")
9495
for message in chats:
9596
classname = message.friendly_class_name()
9697
if (classname == "ListItem"):
9798
time, author, msg = extract()
9899
cur_cycle.append(RawMessage(time, author, msg))
99100
all_msg.extend(cur_cycle)
100101
cords = chat_win.rectangle()
101-
pywinauto.mouse.scroll(wheel_dist=4, coords=(cords.left+10, cords.bottom-10))
102+
pywinauto.mouse.scroll(wheel_dist=5, coords=(cords.left+10, cords.bottom-10))
102103

103104
all_msg = removeDuplicates()
104105

0 commit comments

Comments
 (0)