Skip to content

Commit eab9465

Browse files
authored
Merge pull request #20 from castlabs/VTK-2434-striprtf-fonttbl-support
Vtk 2434 striprtf fonttbl support
2 parents e6d1ea1 + a60f726 commit eab9465

File tree

7 files changed

+34
-6
lines changed

7 files changed

+34
-6
lines changed

pycaption/pl_stt.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from striprtf import striprtf
12
from .base import BaseReader, CaptionSet, CaptionList, Caption, CaptionNode
23
from .exceptions import CaptionReadNoCaptions, InvalidInputError
34

@@ -39,6 +40,8 @@ def _parse_sub(self, sub):
3940
return sub_start, sub_end, sub_text
4041

4142
def detect(self, content):
43+
if content.startswith(u'{\\rtf1'):
44+
content = striprtf.rtf_to_text(content)
4245
if self._get_header(content) and self._get_body(content):
4346
return True
4447
else:
@@ -59,6 +62,8 @@ def _guess_framerate(self, nonempty_splits):
5962
def read(self, content, lang="en-US"):
6063
if type(content) != str:
6164
raise InvalidInputError("The content is not a unicode string.")
65+
if content.startswith(u'{\\rtf1'):
66+
content = striprtf.rtf_to_text(content)
6267

6368
try:
6469
header = self._get_header(content)

pycaption/stl/ebu_stl_reader.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -237,15 +237,19 @@ def decode(self, input, errors="strict"):
237237
state = None
238238
return output, len(input)
239239

240-
def search(self, name):
241-
if name in ("iso6937", "iso_6937-2"):
242-
return codecs.CodecInfo(self.encode, self.decode, name="iso_6937-2")
240+
def search(name):
241+
if name in ("iso6937", "iso_6937-2", "iso_6937_2"):
242+
return codecs.CodecInfo(
243+
name="iso_6937-2",
244+
encode=iso6937().encode,
245+
decode=iso6937().decode,
246+
)
243247

244248
def encode(self, input, errors="strict"):
245249
pass
246250

247251

248-
codecs.register(iso6937().search)
252+
codecs.register(iso6937.search)
249253

250254

251255
class STLReader(BaseReader):

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ Pillow>=7.0.0
55
fonttools~=4.33.3
66
langcodes~=3.3.0
77
arabic-reshaper==3.0.0
8-
python-bidi==0.4.2
8+
python-bidi==0.4.2
9+
git+https://github.com/castlabs/striprtf.git@4d2a1fb9b1437fe068bdc3ffb1c87f0a4fe5d373#egg=striprtf

setup.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77
'README.rst')
88
with open('requirements.txt') as f:
99
requirements = f.read().splitlines()
10+
install_requires = [req for req in requirements if not req.startswith('git+')]
11+
install_requires += [req.split('#egg=')[1] + ' @ ' + req for req in requirements if req.startswith('git+')]
1012

13+
print(install_requires)
1114

1215
setup(
1316
name='pycaption',
@@ -17,7 +20,7 @@
1720
author='Sebastian Annies',
1821
author_email='[email protected]',
1922
url='https://github.com/castlabs/pycaption',
20-
install_requires=requirements,
23+
install_requires=install_requires,
2124
packages=find_packages(),
2225
include_package_data=True,
2326
classifiers=[

tests/ebu1991/test.stl

62.8 KB
Binary file not shown.

tests/ebu1991/test2.stl

13 KB
Binary file not shown.

tests/test_stl.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import unittest
2+
from os.path import join, dirname, abspath
3+
4+
from pycaption.stl.ebu_stl_reader import STLReader
5+
6+
7+
class SRTtoWebVTTTestCase(unittest.TestCase):
8+
directory = join(dirname(abspath(__file__)))
9+
def test_stl(self):
10+
stl = STLReader()
11+
content = open(join(self.directory, "ebu1991", "test.stl"), "rb").read()
12+
self.assertTrue(stl.detect(content))
13+
captions = stl.read(content)
14+
15+
self.assertEqual(len(captions.get_captions("de")), 494)

0 commit comments

Comments
 (0)