This repository has been archived by the owner on Jun 15, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcollect.py
211 lines (174 loc) · 6.89 KB
/
collect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import glob
import os
import sys
from optparse import OptionParser
from struct import Struct
from urllib.parse import parse_qs, unquote
from urllib.request import urlopen
from urllib.error import HTTPError
import struct
import codecs
def tenhou_hash(log_id):
table = [
22136, 52719, 55146, 42104, 59591, 46934, 9248, 28891,
49597, 52974, 62844, 4015, 18311, 50730, 43056, 17939,
64838, 38145, 27008, 39128, 35652, 63407, 65535, 23473,
35164, 55230, 27536, 4386, 64920, 29075, 42617, 17294, 18868, 2081
]
code_pos = log_id.rindex("-") + 1
code = log_id[code_pos:]
if code[0] == 'x':
a, b, c = struct.unpack(">HHH", bytes.fromhex(code[1:]))
index = 0
if log_id[:12] > "2010041111gm":
x = int("3" + log_id[4:10])
y = int(log_id[9])
index = x % (33 - y)
first = (a ^ b ^ table[index]) & 0xFFFF
second = (b ^ c ^ table[index] ^ table[index + 1]) & 0xFFFF
return log_id[:code_pos] + codecs.getencoder('hex_codec')(struct.pack(">HH", first, second))[0].decode('ASCII')
else:
return log_id
def prepare_sol_files():
results = []
chrome_directories = [
# linux
'.config/chromium/*/',
'.config/google-chrome/*/',
# mac os
'Library/Application Support/Google/Chrome/*/',
# windows
'AppData/Local/Google/Chrome/User Data/*/'
]
for directory in chrome_directories:
results.extend(glob.glob(os.path.join(
os.path.expanduser('~'),
'{0}/Pepper Data/Shockwave Flash/WritableRoot/#SharedObjects/*/mjv.jp/mjinfo.sol'.format(directory))
))
# FireFox?
results.extend(glob.glob(os.path.join(
os.path.expanduser('~'),
'.macromedia/Flash_Player/#SharedObjects/*/mjv.jp/mjinfo.sol')))
return results
def extract_logs_from_windows_client():
config_path = os.path.join(os.path.expanduser('~'), 'AppData/Local/C-EGG/tenhou/130/config.ini')
if not os.path.exists(config_path):
return []
with open(config_path, 'rb') as f:
data = f.read().decode('ASCII')
logs = data.split('[LOG]')[1].split()
# first items is not log entity, so let's cut it
logs = logs[1:]
results = []
# remove index number from log entity
for log in logs:
log = log.split('=')
results.append('='.join(log[1:]))
return results
def parse_sol_files(sol_files):
results = []
for sol_file in sol_files:
print("Reading Flash state file: {0}\n".format(sol_file))
with open(sol_file, 'rb') as f:
data = f.read()
# What follows is a limited parser for Flash Local Shared Object files -
# a more complete implementation may be found at:
# https://pypi.python.org/pypi/PyAMF
header = Struct('>HI10s8sI')
magic, objlength, magic2, mjinfo, padding = header.unpack_from(data)
offset = header.size
assert magic == 0xbf
assert magic2 == b'TCSO\0\x04\0\0\0\0'
assert mjinfo == b'\0\x06mjinfo'
assert padding == 0
ushort = Struct('>H')
ubyte = Struct('>B')
while offset < len(data):
length, = ushort.unpack_from(data, offset)
offset += ushort.size
name = data[offset:offset+length]
offset += length
amf0_type, = ubyte.unpack_from(data, offset)
offset += ubyte.size
# Type 2: UTF-8 String, prefixed with 2-byte length
if amf0_type == 2:
length, = ushort.unpack_from(data, offset)
offset += ushort.size
value = data[offset:offset+length]
offset += length
# Type 6: Undefined
elif amf0_type == 6:
value = None
# Type 1: Boolean
elif amf0_type == 1:
value = bool(data[offset])
offset += 1
# Other types from the AMF0 specification are not implemented, as they
# have not been observed in mjinfo.sol files. If required, see
# http://download.macromedia.com/pub/labs/amf/amf0_spec_121207.pdf
else:
print("Unimplemented AMF0 type {} at offset={} (hex {})".format(amf0_type, offset, hex(offset)))
trailer_byte = data[offset]
assert trailer_byte == 0
offset += 1
if name == b'logstr':
results = filter(None, value.split(b'\n'))
results = [i.decode('ASCII') for i in results]
return results
def download_logs(directory, results):
for log_id in results:
log_name = parse_qs(log_id)['file'][0]
hashed_log_name = tenhou_hash(log_name)
# save file with original name
target_fname = os.path.join(directory, "{}.xml".format(log_name))
if os.path.exists(target_fname):
print("Game {} already downloaded".format(hashed_log_name))
else:
print("Downloading game {}".format(hashed_log_name))
try:
response = urlopen('http://e.mjv.jp/0/log/?{0}'.format(hashed_log_name))
data = response.read()
with open(target_fname, 'wb') as f:
f.write(data)
except HTTPError as e:
if e.code == 404:
print("Could not download game {}. Is the game still in progress?".format(hashed_log_name))
else:
raise
def main():
attrs = OptionParser()
attrs.add_option('-d', '--directory',
help='Directory to store downloaded XML. If empty, script will not to do downloads')
attrs.add_option('-m', '--meta',
default=os.path.join(os.path.expanduser('~'), 'tenhou-meta.txt'),
help='File for store meta information')
opts, args = attrs.parse_args()
if args:
attrs.error('This command takes no positional arguments')
sol_files = prepare_sol_files()
results = parse_sol_files(sol_files)
is_windows = sys.platform.startswith('win')
if is_windows:
results += extract_logs_from_windows_client()
# let's decode lines
results = [unquote(i) for i in results]
old_data = []
if os.path.exists(opts.meta):
with open(opts.meta, 'r', encoding='utf-8') as f:
old_data = f.read().split('\n')
new_logs = list(set(results) - set(old_data))
if new_logs:
with open(opts.meta, 'a', encoding='utf-8') as f:
# we append old file, need to add new line
if old_data:
f.write('\n')
f.write('\n'.join(new_logs))
print('Added {0} new logs'.format(len(new_logs)))
if opts.directory:
if not os.path.exists(opts.directory):
os.makedirs(opts.directory)
download_logs(opts.directory, new_logs)
else:
print('Nothing to add')
if __name__ == '__main__':
main()