-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
134 lines (114 loc) · 3.9 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import re
class ConvertError(Exception):
pass
def mlrc2vrc(ori_txt):
"""
:param ori_txt: string, the original mixed-lrc text
:return: object, the vrc-format lyric
"""
vrc_obj = {
'karaoke': False,
'scrollDisabled': False,
'translated': False,
'origin': {
'version': 2,
'text': ''
},
'translate': {
'version': 2,
'text': ''
}
}
pattern = re.compile(r'(\[.*?\])\s*([^\[\]]*)')
matches = pattern.findall(ori_txt)
for time_stamp, lrc in matches:
lrc = lrc.strip().split('\n')
if len(lrc) > 1:
vrc_obj['translated'] = True
vrc_obj['origin']['text'] += time_stamp + lrc[0] + '\n'
vrc_obj['translate']['text'] += time_stamp + (lrc[1] if len(lrc) > 1 else '') + '\n'
return vrc_obj
def lrcs2mlrc(origin, translated):
"""
:param origin: string, lrc text with origin language
:param translated: string, lrc text with translated language
:return: string, mixed-lrc text with origin language at first line and translated language at second line
"""
pattern = re.compile(r'(\[.*?\])\s*([^\[\]]*)')
ori_matches = pattern.findall(origin)
trans_matches = pattern.findall(translated)
trans_dict = dict(trans_matches)
mixed_lrc = ''
for time_stamp, ori_lrc in ori_matches:
lrc = time_stamp + '\n' + ori_lrc + (trans_dict[time_stamp] if time_stamp in trans_dict else '\n')
mixed_lrc += lrc
return mixed_lrc
def vrc2mlrc(vrc_obj):
"""
:param vrc_obj: object, the vrc-format lyric
:return: string, mixed-lrc text with origin language at first line and translated language at second line
"""
ori, trans = vrc_obj['origin']['text'], vrc_obj['translate']['text']
return lrcs2mlrc(ori, trans)
def ass2vrc(ass_txt):
"""
:param ass_txt: str, lyric in format of ass
:return: object, the vrc-format lyric
"""
vrc_obj = {
'karaoke': False,
'scrollDisabled': False,
'translated': True,
'origin': {
'version': 2,
'text': ''
},
'translate': {
'version': 2,
'text': ''
}
}
lines = ass_txt.split('\n')
targeted = 0 # state marker
start_id = -1
end_id = -1
text_id = -1
for idx, line in enumerate(lines):
if line == '[Events]':
targeted = 1
continue
if not targeted:
continue
line = line.replace(' ', '') # erase spaces
if len(line) == 0:
continue # empty line
pos = line.find(':')
items = line[pos + 1:]
items = items.split(',')
if targeted == 1:
# find position for start, end, text.
for i, attr in enumerate(items):
if attr == 'Start':
start_id = i
elif attr == 'End':
end_id = i
elif attr == 'Text':
text_id = i
if start_id == -1:
raise ConvertError('Fail to find "Start" attribute in [Events]')
if end_id == -1:
raise ConvertError('Fail to find "End" attribute in [Events]')
if text_id == -1:
raise ConvertError('Fail to find "Text" attribute in [Events]')
targeted = 2
elif targeted == 2:
start = items[start_id]
end = items[end_id] # not used for now
text = items[text_id]
text = text.split('\\N')
if len(text) < 2:
raise ConvertError('Fail to find translation in line {:d}'.format(idx + 1))
ori_txt, trans_txt = text[0], text[1]
vrc_obj['origin']['text'] += '[{}]{}\n'.format(start, ori_txt)
vrc_obj['translate']['text'] += '[{}]{}\n'.format(start, trans_txt)
return vrc_obj