diff --git a/README.md b/README.md
index cee661d..b899780 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ One can use `pip` to install `pdf2video` directly from GitHub:
```
python3 -m pip install git+https://github.com/tjunttila/pdf2video.git
```
-See the [PyPA Installing Packages tutorial](https://packaging.python.org/tutorials/installing-packages/) if you are not familiar with installing Python packages.
+See the [PyPA Installing Packages tutorial](https://packaging.python.org/tutorials/installing-packages/) for information on installing Python packages and on Python virtual environments.
# Usage
diff --git a/pdf2video/parser.py b/pdf2video/parser.py
index 9cd8a09..76f4fee 100644
--- a/pdf2video/parser.py
+++ b/pdf2video/parser.py
@@ -4,19 +4,29 @@
License: The MIT License
"""
+from abc import ABC, abstractmethod
import re
+import sys
-class AST:
- def __init__(self):
- pass
+class AST(ABC):
+ """Base class for abstract syntax tree nodes."""
+
+ @abstractmethod
def to_ssml(self, neural):
- assert False
+ """Get the SSML representation of the sub-tree."""
+
+ @abstractmethod
def to_words(self):
- assert False
+ """Get the plain words representation of the sub-tree."""
+
+ @abstractmethod
def to_sub(self):
- assert False
+ """Get the sub-titles representation of the sub-tree."""
+
class ASTWord(AST):
+ """An AST node for a word."""
def __init__(self, text):
+ super().__init__()
self.text = text
def to_ssml(self, neural):
return self.text
@@ -24,7 +34,9 @@ def to_words(self):
return [self.text]
def to_sub(self):
return self.text
+
class ASTBreak(AST):
+ """An AST node for a break."""
def __init__(self, time):
self.time = time
def to_ssml(self, neural):
@@ -33,7 +45,9 @@ def to_words(self):
return []
def to_sub(self):
return ''
+
class ASTDelim(AST):
+ """An AST node for a delimiter."""
def __init__(self, text):
self.text = text
def to_ssml(self, neural):
@@ -42,7 +56,9 @@ def to_words(self):
return []
def to_sub(self):
return self.text
+
class ASTSpace(AST):
+ """An AST node for a white space."""
def __init__(self):
pass
def to_ssml(self, neural):
@@ -51,32 +67,38 @@ def to_words(self):
return []
def to_sub(self):
return ' '
+
class ASTEmph(AST):
+ """An AST node for emphasized text."""
def __init__(self, children):
self.children = children
def to_ssml(self, neural):
children_ssml = "".join([child.to_ssml(neural) for child in self.children])
if neural:
return ''+children_ssml+' '
- else:
- return ''+children_ssml+' '
+ return ''+children_ssml+' '
def to_words(self):
result = []
- for child in self.children: result += child.to_words()
+ for child in self.children:
+ result += child.to_words()
return result
def to_sub(self):
return "".join([child.to_sub() for child in self.children])
+
class ASTPhoneme(AST):
+ """An AST node for text read with phonemes."""
def __init__(self, text, xsampa):
self.text = text
self.xsampa = xsampa
def to_ssml(self, neural):
return f'{self.text} '
def to_words(self):
- return re.split('\s+', self.text.strip())
+ return re.split(r'\s+', self.text.strip())
def to_sub(self):
return self.text
+
class ASTSub(AST):
+ """An AST node for text with different sub-title representation."""
def __init__(self, children, subtitles):
self.children = children
self.subtitles = subtitles
@@ -85,11 +107,14 @@ def to_ssml(self, neural):
return "".join(children_ssml)
def to_words(self):
result = []
- for child in self.children: result += child.to_words()
+ for child in self.children:
+ result += child.to_words()
return result
def to_sub(self):
return self.subtitles
+
class ASTSlow(AST):
+ """An AST node for text read slowly."""
def __init__(self, children):
self.children = children
def to_ssml(self, neural):
@@ -97,11 +122,14 @@ def to_ssml(self, neural):
return ''+children_ssml+' '
def to_words(self):
result = []
- for child in self.children: result += child.to_words()
+ for child in self.children:
+ result += child.to_words()
return result
def to_sub(self):
return "".join([child.to_sub() for child in self.children])
+
class ASTLow(AST):
+ """An AST node for text read in low pitch."""
def __init__(self, children):
self.children = children
def to_ssml(self, neural):
@@ -109,15 +137,17 @@ def to_ssml(self, neural):
if neural:
# prosody pitch not yet in neural TTS, make it slightly slower
return ''+children_ssml+' '
- else:
- return ''+children_ssml+' '
+ return ''+children_ssml+' '
def to_words(self):
result = []
- for child in self.children: result += child.to_words()
+ for child in self.children:
+ result += child.to_words()
return result
def to_sub(self):
return "".join([child.to_sub() for child in self.children])
+
class ASTHigh(AST):
+ """An AST node for text read in high pitch."""
def __init__(self, children):
self.children = children
def to_ssml(self, neural):
@@ -125,21 +155,23 @@ def to_ssml(self, neural):
if neural:
# prosody pitch not yet in neural TTS, make it slightly faster
return ''+children_ssml+' '
- else:
- return ''+children_ssml+' '
+ return ''+children_ssml+' '
def to_words(self):
result = []
- for child in self.children: result += child.to_words()
+ for child in self.children:
+ result += child.to_words()
return result
def to_sub(self):
return "".join([child.to_sub() for child in self.children])
+
class ASTSayAs(AST):
+ """An AST node for text read as letters."""
def __init__(self, letters):
self.letters = letters
def to_ssml(self, neural):
return ''+self.letters+' '
def to_words(self):
- return re.split('\s+', self.letters.strip())
+ return re.split(r'\s+', self.letters.strip())
def to_sub(self):
return self.letters
@@ -147,97 +179,96 @@ def to_sub(self):
def parse_to_ast(string, err_linenum = None):
"""Parse the script text string into a sequence of AST nodes."""
i = 0
- n = len(string)
+ string_length = len(string)
def read_until(chars):
nonlocal i
tmp = i
- while i < n and string[i] not in chars:
+ while i < string_length and string[i] not in chars:
i += 1
return string[tmp:i]
def err(msg):
- ln = f'On line {err_linenum}: ' if err_linenum != None else ''
- print(ln+msg)
- exit(1)
+ linenum_text = '' if err_linenum is None else f'On line {err_linenum}: '
+ print(linenum_text+msg)
+ sys.exit(1)
#assert False, msg
result = []
- while i < n:
+ while i < string_length:
if string[i] == '#':
if string[i:i+4] == '#sub':
- m = re.match('^#sub(.)(?P((?!\1).)*?)\\1(?P((?!\1).)+?)\\1', string[i:])
- if m == None:
+ match = re.match(
+ '^#sub(.)(?P((?!\1).)*?)\\1(?P((?!\1).)+?)\\1',
+ string[i:])
+ if match is None:
err(f'Malformed #sub "{string[i:]}"')
- t = parse_to_ast(m['text'])
- result.append(ASTSub(t, m['sub']))
- i += len(m.group(0))
+ result.append(ASTSub(parse_to_ast(match['text']), match['sub']))
+ i += len(match.group(0))
continue
if string[i:i+5] == '#slow':
- m = re.match('^#slow(.)(?P((?!\1).)+?)\\1', string[i:])
- if m == None:
+ match = re.match('^#slow(.)(?P((?!\1).)+?)\\1', string[i:])
+ if match is None:
err(f'Malformed #slow "{string[i:]}"')
- t = parse_to_ast(m['text'])
- result.append(ASTSlow(t))
- i += len(m.group(0))
+ result.append(ASTSlow(parse_to_ast(match['text'])))
+ i += len(match.group(0))
continue
if string[i:i+4] == '#low':
- m = re.match('^#low(.)(?P((?!\1).)+?)\\1', string[i:])
- if m == None:
+ match = re.match('^#low(.)(?P((?!\1).)+?)\\1', string[i:])
+ if match is None:
err(f'Malformed #low "{string[i:]}"')
- t = parse_to_ast(m['text'])
- result.append(ASTLow(t))
- i += len(m.group(0))
+ result.append(ASTLow(parse_to_ast(match['text'])))
+ i += len(match.group(0))
continue
if string[i:i+5] == '#high':
- m = re.match('^#high(.)(?P((?!\1).)+?)\\1', string[i:])
- if m == None:
+ match = re.match('^#high(.)(?P((?!\1).)+?)\\1', string[i:])
+ if match is None:
err(f'Malformed #high "{string[i:]}"')
- t = parse_to_ast(m['text'])
- result.append(ASTHigh(t))
- i += len(m.group(0))
+ result.append(ASTHigh(parse_to_ast(match['text'])))
+ i += len(match.group(0))
continue
if string[i:i+3] == '#ph':
- m = re.match('^#ph(.)(?P((?!\1).)+?)\\1(?P((?!\1).)+?)\\1', string[i:])
- if m == None:
+ match = re.match(
+ '^#ph(.)(?P((?!\1).)+?)\\1(?P((?!\1).)+?)\\1',
+ string[i:])
+ if match is None:
err(f'Malformed #ph "{string[i:]}"')
- result.append(ASTPhoneme(m['text'], m['ph']))
- i += len(m.group(0))
+ result.append(ASTPhoneme(match['text'], match['ph']))
+ i += len(match.group(0))
continue
# Break #10
- m = re.match('^#(?P\d+)', string[i:])
- if m != None:
- result.append(ASTBreak(int(m['time'])))
- i += len(m.group(0))
+ match = re.match(r'^#(?P\d+)', string[i:])
+ if match:
+ result.append(ASTBreak(int(match['time'])))
+ i += len(match.group(0))
continue
err(f'Unrecognized script command "{string[i:]}"')
elif string[i] == '*':
- m = re.match('^\*(?P[^\*]+)\*', string[i:])
- if m == None:
+ match = re.match(r'^\*(?P[^\*]+)\*', string[i:])
+ if match is None:
err(f'Malformed emphasis "{string[i:]}"')
- t = parse_to_ast(m['text'])
- result.append(ASTEmph(t))
- i += len(m.group(0))
+ result.append(ASTEmph(parse_to_ast(match['text'])))
+ i += len(match.group(0))
elif string[i] == '@':
- m = re.match('^@(?P[^@]+)@', string[i:])
- if m == None:
- err(f'Malformed say-ass "{string[i:]}"')
- result.append(ASTSayAs(m['text']))
- i += len(m.group(0))
+ match = re.match(r'^@(?P[^@]+)@', string[i:])
+ if match is None:
+ err(f'Malformed say-as "{string[i:]}"')
+ result.append(ASTSayAs(match['text']))
+ i += len(match.group(0))
else:
- m = re.match('^\s+', string[i:])
- if m != None:
+ match = re.match(r'^\s+', string[i:])
+ if match:
result.append(ASTSpace())
- i += len(m.group(0))
+ i += len(match.group(0))
continue
# Negative numbers are words
- m = re.match('^-\d+', string[i:])
- if m != None:
- result.append(ASTWord(m.group(0)))
- i += len(m.group(0))
+ match = re.match(r'^-\d+', string[i:])
+ if match:
+ result.append(ASTWord(match.group(0)))
+ i += len(match.group(0))
continue
# Delimiters
- m = re.match('^[-.,:;!?"]', string[i:])
- if m != None:
- result.append(ASTDelim(m.group(0)))
- i += len(m.group(0))
+ match = re.match('^[-.,:;!?"]', string[i:])
+ if match:
+ result.append(ASTDelim(match.group(0)))
+ i += len(match.group(0))
continue
word = read_until([' ','\t','#','*','@','"','.',',',':',';','!','?'])
result.append(ASTWord(word))
diff --git a/pdf2video/pdf2video.py b/pdf2video/pdf2video.py
index 7627dc2..734af30 100644
--- a/pdf2video/pdf2video.py
+++ b/pdf2video/pdf2video.py
@@ -21,31 +21,40 @@
from subprocess import PIPE
import sys
-from .parser import *
+from .parser import parse_to_ast, parse
-voices = ['Zeina', 'Zhiyu', 'Naja', 'Mads', 'Lotte', 'Ruben', 'Nicole', 'Russell', 'Amy', 'Emma', 'Brian', 'Aditi', 'Raveena', 'Ivy', 'Joanna', 'Kendra', 'Kimberly', 'Salli', 'Joey', 'Justin', 'Matthew', 'Geraint', 'Céline', 'Celine', 'Léa', 'Mathieu', 'Chantal', 'Marlene', 'Vicki', 'Hans', 'Aditi', 'Dóra', 'Dora', 'Karl', 'Carla', 'Bianca', 'Giorgio', 'Mizuki', 'Takumi', 'Seoyeon', 'Liv', 'Ewa', 'Maja', 'Jacek', 'Jan', 'Camila', 'Vitória', 'Vitoria', 'Ricardo', 'Inês', 'Ines', 'Cristiano', 'Carmen', 'Tatyana', 'Maxim', 'Conchita', 'Lucia', 'Enrique', 'Mia', 'Lupe', 'Penélope', 'Penelope', 'Miguel', 'Astrid', 'Filiz', 'Gwyneth']
+voices = ['Zeina', 'Zhiyu', 'Naja', 'Mads', 'Lotte', 'Ruben', 'Nicole',
+ 'Russell', 'Amy', 'Emma', 'Brian', 'Aditi', 'Raveena', 'Ivy',
+ 'Joanna', 'Kendra', 'Kimberly', 'Salli', 'Joey', 'Justin',
+ 'Matthew', 'Geraint', 'Céline', 'Celine', 'Léa', 'Mathieu',
+ 'Chantal', 'Marlene', 'Vicki', 'Hans', 'Aditi', 'Dóra', 'Dora',
+ 'Karl', 'Carla', 'Bianca', 'Giorgio', 'Mizuki', 'Takumi', 'Seoyeon',
+ 'Liv', 'Ewa', 'Maja', 'Jacek', 'Jan', 'Camila', 'Vitória', 'Vitoria',
+ 'Ricardo', 'Inês', 'Ines', 'Cristiano', 'Carmen', 'Tatyana', 'Maxim',
+ 'Conchita', 'Lucia', 'Enrique', 'Mia', 'Lupe', 'Penélope',
+ 'Penelope', 'Miguel', 'Astrid', 'Filiz', 'Gwyneth']
-voices_neural = ['Amy', 'Emma', 'Brian', 'Ivy', 'Joanna', 'Kendra', 'Kimberly', 'Salli', 'Joey', 'Justin', 'Kevin', 'Matthew', 'Camila', 'Lupe']
+voices_neural = ['Amy', 'Emma', 'Brian', 'Ivy', 'Joanna', 'Kendra',
+ 'Kimberly', 'Salli', 'Joey', 'Justin', 'Kevin', 'Matthew',
+ 'Camila', 'Lupe']
voices_conversational = ['Joanna', 'Matthew', 'Lupe']
def millis_to_srt(millis):
+ """Convert milliseconds time to the SRT subtitles format time string."""
result = ''
# milliseconds
- t = millis % 1000
- result = ('%03d' % t) + result
- millis -= t
- millis /= 1000
+ milliseconds = millis % 1000
+ result = ('%03d' % milliseconds) + result
+ millis = (millis - milliseconds) / 1000
# seconds
- t = millis % 60
- result = ('%02d,' % t) + result
- millis -= t
- millis /= 60
+ seconds = millis % 60
+ result = ('%02d,' % seconds) + result
+ millis = (millis - seconds) / 60
# minutes
- t = millis % 60
- result = ('%02d:' % t) + result
- millis -= t
- millis /= 60
+ minutes = millis % 60
+ result = ('%02d:' % minutes) + result
+ millis = (millis - minutes) / 60
# hours
result = ('%02d:' % millis) + result
# ready
@@ -60,35 +69,36 @@ def parse_page_range(args, execute, error):
if args.pages == 'all':
# --pages parameter was not given
# Use pdfinfo to find out the number of pages, select all
- r = execute(f'{args.pdfinfo} {args.pdf_file}')
+ cmd = f'{args.pdfinfo} {args.pdf_file}'
+ exec_result = execute(cmd)
nof_pages = None
- for line in r.stdout.decode('utf-8').split('\n'):
- m = re.match('^Pages:\s*(\d+)\s*$', line)
- if m:
- nof_pages = int(m.group(1))
+ for line in exec_result.stdout.decode('utf-8').split('\n'):
+ match = re.match(r'^Pages:\s*(\d+)\s*$', line)
+ if match:
+ nof_pages = int(match.group(1))
break
- if nof_pages == None:
+ if nof_pages is None:
error(f'Could not read the number of pages with "{cmd}"')
pages = list(range(1, nof_pages+1))
return pages
# --pages parameter was given, parse it
for comp in [c.strip() for c in args.pages.split(",")]:
- m = re.match(r'^(\d+)$', comp)
- if m:
- pages.append(int(m.group(1)))
+ match = re.match(r'^(\d+)$', comp)
+ if match:
+ pages.append(int(match.group(1)))
continue
- m = re.match(r'^(\d+)\s*-\s*(\d+)$', comp)
- if m:
- (start,end) = (int(m.group(1)), int(m.group(2)))
+ match = re.match(r'^(\d+)\s*-\s*(\d+)$', comp)
+ if match:
+ (start,end) = (int(match.group(1)), int(match.group(2)))
length = end - start + 1
- if length > 0 and length < 10000:
+ if 0 < length < 10000:
for i in range(start, end+1):
pages.append(i)
continue
error('Invalid page range component: '+comp)
return pages
-
+
def parse_only(args, scripts, scripts_names, error):
"""
Parse the 'only' range.
@@ -103,50 +113,56 @@ def parse_only(args, scripts, scripts_names, error):
# --only parameter was given, parse it
for comp in [c.strip() for c in args.only.split(",")]:
# Single number
- m = re.match(r'^[1-9]\d*$', comp)
- if m:
- num = int(m.group(0))
- if not(num <= len(scripts)):
- error(f'#page {num} was selected in --only, but only {len(scripts)} #pages exists')
+ match = re.match(r'^[1-9]\d*$', comp)
+ if match:
+ num = int(match.group(0))
+ if not num <= len(scripts):
+ error(f'#page {num} was selected in --only, ' \
+ f'but only {len(scripts)} #pages exists')
only.add(num-1)
continue
# Numeric range
- m = re.match(r'^([1-9]\d*)\s*-\s*([1-9]\d*)$', comp)
- if m:
- (start,end) = (int(m.group(1)), int(m.group(2)))
+ match = re.match(r'^([1-9]\d*)\s*-\s*([1-9]\d*)$', comp)
+ if match:
+ (start,end) = (int(match.group(1)), int(match.group(2)))
length = end - start + 1
- if length > 0 and length < 10000:
+ if 0 < length < 10000:
for num in range(start, end+1):
- if not(num <= len(scripts)):
- error(f'#page {num} was selected in --only, but only {len(scripts)} #pages exists')
+ if not num <= len(scripts):
+ error(f'#page {num} was selected in --only, ' \
+ f'but only {len(scripts)} #pages exists')
only.add(num-1)
continue
# Single name
- m = re.match(r'^[a-zA-Z_]+([1-9]\d*)?$', comp)
- if m:
- name = m.group(0)
+ match = re.match(r'^[a-zA-Z_]+([1-9]\d*)?$', comp)
+ if match:
+ name = match.group(0)
if name not in scripts_names:
- error(f'#page named "{name}" was selected in --only, but there is no #page with that name. Available #page names are: {",".join(sorted(scripts_names.keys()))}')
+ error(f'#page named "{name}" was selected in --only, ' \
+ f'but there is no #page with that name. ' \
+ f'Available #page names are: ' \
+ f'{",".join(sorted(scripts_names.keys()))}')
only.add(scripts_names[name])
continue
# name range
- m = re.match(r'^([a-zA-Z_]+)([1-9]\d*)-([1-9]\d*)$', comp)
- if m:
- (base,start,end) = (m.group(1),int(m.group(2)),int(m.group(3)))
+ match = re.match(r'^([a-zA-Z_]+)([1-9]\d*)-([1-9]\d*)$', comp)
+ if match:
+ (base,start,end) = (match.group(1),int(match.group(2)),int(match.group(3)))
length = end - start + 1
- if length > 0 and length < 10000:
+ if 0 < length < 10000:
for i in range(start, end+1):
name = base+str(i)
if name not in scripts_names:
- error(f'#page named "{name}" was selected in --only, but there is no #page with that name. Available #page names are: {",".join(sorted(scripts_names.keys()))}')
+ error(f'#page named "{name}" was selected in --only, ' \
+ f'but there is no #page with that name. ' \
+ f'Available #page names are: ' \
+ f'{",".join(sorted(scripts_names.keys()))}')
only.add(scripts_names[name])
-
- pages.append(i)
continue
error('Invalid "only" range component: '+comp)
return only
-
+
def read_scripts(script_file, error):
"""
Read all the scripts from a file.
@@ -157,11 +173,11 @@ def read_scripts(script_file, error):
in_script = False
in_script_name = None
try:
- with open(script_file, 'r', encoding='utf-8') as f:
+ with open(script_file, 'r', encoding='utf-8') as file_object:
linenum = 0
def err(msg):
error(f'on line {linenum}: {msg}')
- for line in f.readlines():
+ for line in file_object.readlines():
line = line.rstrip()
linenum += 1
if line == '':
@@ -171,19 +187,19 @@ def err(msg):
# Lines starting with % are comments, skip them
continue
# A "#page" line starting a new page?
- m = re.match(r'^#page\s*(?P\s+[a-zA-Z_]+([1-9]\d*)?)?\s*$', line)
- if m != None:
+ match = re.match(r'^#page\s*(?P\s+[a-zA-Z_]+([1-9]\d*)?)?\s*$', line)
+ if match:
if in_script:
# The previous #page script is now fully read, save it
- if in_script_name != None:
+ if in_script_name is not None:
if in_script_name in scripts_names:
err(f'#page named "{in_script_name}" defined twice')
scripts_names[in_script_name] = len(scripts)
scripts.append(script)
#print(m)
- name = m['name']
+ name = match['name']
#print(name)
- in_script_name = name.strip() if name != None else None
+ in_script_name = name.strip() if name is not None else None
in_script = True
script = []
continue
@@ -195,7 +211,7 @@ def err(msg):
script.append((line, linenum))
# All lines read, add the last page
if in_script:
- if in_script_name != None:
+ if in_script_name is not None:
if in_script_name in scripts_names:
err(f'#page named "{in_script_name}" defined twice')
scripts_names[in_script_name] = len(scripts)
@@ -211,12 +227,11 @@ def script_to_ssml_and_hash(script, args):
Also returns a hash of the voice, style, and the script
for caching audio files produced by the TTS system.
"""
-
- h = hashlib.sha256()
- h.update(args.voice.encode('utf-8'))
- h.update(str(args.neural).encode('utf-8'))
- h.update(str(args.conversational).encode('utf-8'))
- plain = ''
+
+ hash_value = hashlib.sha256()
+ hash_value.update(args.voice.encode('utf-8'))
+ hash_value.update(str(args.neural).encode('utf-8'))
+ hash_value.update(str(args.conversational).encode('utf-8'))
ssml = ''
ssml += ' '
if args.conversational:
@@ -232,78 +247,104 @@ def script_to_ssml_and_hash(script, args):
# End-of-the-line marks for subtitle synchronization
l_ssml += f' '
ssml += l_ssml
- h.update(l_ssml.encode('utf-8'))
+ hash_value.update(l_ssml.encode('utf-8'))
if args.conversational:
ssml += ''
ssml += ' '
ssml += '\n'
- return (ssml, h.hexdigest())
+ return (ssml, hash_value.hexdigest())
def main():
- description = "A tool for converting PDF presentations into narrated videos. Please see https://github.com/tjunttila/pdf2video/ for more details."
- p = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter, description = description)
- p.add_argument('--voice', metavar='V', default='Joanna',
+ """The main routine."""
+ description = 'A tool for converting PDF presentations into ' \
+ 'narrated videos. Please see ' \
+ 'https://github.com/tjunttila/pdf2video/ for more details.'
+ argp = argparse.ArgumentParser(
+ formatter_class = argparse.ArgumentDefaultsHelpFormatter,
+ description = description)
+ argp.add_argument('--voice', metavar='V', default='Joanna',
help='the applied TTS voice')
- p.add_argument('--neural', action='store_true',
+ argp.add_argument('--neural', action='store_true',
help='use neural TTS')
- p.add_argument('--conversational', action='store_true',
+ argp.add_argument('--conversational', action='store_true',
help='use conversational style')
- p.add_argument('--aws_profile', metavar='A', default='default',
+ argp.add_argument('--aws_profile', metavar='A', default='default',
help='a Polly-enabled AWS profile')
- p.add_argument('--audio_cache', metavar='C', default='pdf2video-cache',
+ argp.add_argument('--audio_cache', metavar='C', default='pdf2video-cache',
help='the directory for caching TTS audio files')
- p.add_argument('--temp_prefix', metavar='T', default='pdf2video-temp',
+ argp.add_argument('--temp_prefix', metavar='T', default='pdf2video-temp',
help='the prefix for the created temporary files')
- p.add_argument('--ignore_subtitles', action='store_true',
+ argp.add_argument('--ignore_subtitles', action='store_true',
help='do not include or produce subtitles')
- p.add_argument('--quiet', action='store_true',
+ argp.add_argument('--quiet', action='store_true',
help='do not print progress information')
- p.add_argument('--pages', metavar='P', default='all',
- help='The PDF page range of the form "1,3,4-7,1". Defines the mapping from the #page texts in the script file to selected PDF pages.')
- p.add_argument('--only', metavar='O', default='the full set',
- help='Only compile the selected #page texts. Used mainly during the development to select some of the #pages. A comma-sepated set of #page identifies, which can be (i) numbers, (ii) #page names, or (iii) ranges of of those. Example: "1,usage,scripts_1-2" compiles the first #page, the ones named usage, scripts_1, and scripts_2.')
- #p.add_argument('--output', metavar='O', default='video.mp4',
+ argp.add_argument('--pages', metavar='P', default='all', help=
+ 'The PDF page range of the form "1,3,4-7,1". ' \
+ 'Defines the mapping from the #page texts ' \
+ 'in the script file to selected PDF pages.')
+ argp.add_argument('--only', metavar='O', default='the full set',
+ help = 'Only compile the selected #page texts. ' \
+ 'Used mainly during the development to select some of ' \
+ 'the #pages. A comma-sepated set of #page identifies, ' \
+ 'which can be (i) numbers, (ii) #page names, or ' \
+ '(iii) ranges of of those. Example: "1,usage,scripts_1-2" ' \
+ 'compiles the first #page, the ones named usage, '\
+ 'scripts_1, and scripts_2.')
+ #argp.add_argument('--output', metavar='O', default='video.mp4',
# help="the output file")
- p.add_argument('--ffmpeg', default='ffmpeg',
+ argp.add_argument('--ffmpeg', default='ffmpeg',
help='the FFmpeg command line tool executable')
- p.add_argument('--pdfinfo', default='pdfinfo',
+ argp.add_argument('--pdfinfo', default='pdfinfo',
help='the "pdfinfo" executable from Poppler utils')
- p.add_argument('--pdftoppm', default='pdftoppm',
+ argp.add_argument('--pdftoppm', default='pdftoppm',
help='the "pdftoppm" executable from Poppler utils')
- p.add_argument('pdf_file', help="the input PDF file")
- p.add_argument('script_file', help="the input script file")
- p.add_argument('output_file', help="the output mp4 video file")
- #p.add_argument('files', nargs=argparse.REMAINDER)
- args = p.parse_args()
-
- def verbose(s):
- if not args.quiet: print(s)
-
+ argp.add_argument('pdf_file', help="the input PDF file")
+ argp.add_argument('script_file', help="the input script file")
+ argp.add_argument('output_file', help="the output mp4 video file")
+ #argp.add_argument('files', nargs=argparse.REMAINDER)
+ args = argp.parse_args()
+
+ def verbose(msg):
+ if not args.quiet:
+ print(msg)
+
temp_image_files = []
temp_ssml_files = []
temp_ts_files = []
def unlink(file_name):
- if file_name == None: return
- try: os.unlink(file_name)
- except FileNotFoundError: pass
+ if file_name is None:
+ return
+ try:
+ os.unlink(file_name)
+ except FileNotFoundError:
+ pass
def clean_temps():
# remove the created temporary files
- for file_name in temp_image_files: unlink(file_name)
- for file_name in temp_ssml_files: unlink(file_name)
- for file_name in temp_ts_files: unlink(file_name)
+ for file_name in temp_image_files:
+ unlink(file_name)
+ for file_name in temp_ssml_files:
+ unlink(file_name)
+ for file_name in temp_ts_files:
+ unlink(file_name)
+
def error(msg):
clean_temps()
- p.exit(1, msg+'\n')
+ argp.exit(1, msg+'\n')
+
def execute(cmd):
try:
- r = subprocess.run(re.split('\s+', cmd.strip()), stdout=PIPE, stderr=PIPE)
+ exec_result = subprocess.run(re.split(r'\s+', cmd.strip()),
+ stdout=PIPE, stderr=PIPE, check=False)
except Exception as err:
error(f'Error when executing "{cmd}".\n'+str(err))
- if r.returncode != 0:
+ if exec_result.returncode != 0:
#print(" ".join(r.args))
- error(f'Error when executing "{cmd}". The last 10 lines of the stderr output is as follows:\n' + '\n'.join((r.stderr.decode('utf-8').split('\n'))[-11:]))
- return r
+ error(f'Error when executing "{cmd}". The last 10 lines of ' \
+ f'the stderr output is as follows:\n' +
+ '\n'.join((exec_result.stderr.decode('utf-8').split('\n'))[-11:]))
+ return exec_result
+
def make_dir(dir_name):
if os.path.exists(dir_name):
if not os.path.isdir(dir_name):
@@ -313,25 +354,29 @@ def make_dir(dir_name):
if not args.output_file.endswith(".mp4"):
error("The output file name must end with .mp4")
-
+
pages = parse_page_range(args, execute, error)
-
+
# Check voice arguments consistency
if args.voice not in voices:
error(f'Unsupported voice {args.voice}. The available voices are {", ".join(voices)}.')
if args.neural and args.voice not in voices_neural:
- error(f'voice {args.voice} is not available in neural TTS. The available neural voices are {", ".join(voices_neural)}.')
+ error(f'The voice {args.voice} is not available in neural TTS. ' \
+ f'The available neural voices are {", ".join(voices_neural)}.')
if args.conversational:
args.neural = True
if args.voice not in voices_conversational:
- error(f'voice {args.voice} is not available in conversational style. The available conversational voices are {", ".join(voices_conversational)}.')
+ error(f'The voice {args.voice} is not available in ' \
+ f'conversational style. The available conversational ' \
+ f'voices are {", ".join(voices_conversational)}.')
(scripts, scripts_names) = read_scripts(args.script_file, error)
make_dir(args.audio_cache)
-
+
if len(scripts) != len(pages):
- error(f'{len(pages)} PDF pages selected but the script file contains {len(scripts)} scripts')
+ error(f'{len(pages)} PDF pages selected but the script file ' \
+ f'contains {len(scripts)} scripts')
only = parse_only(args, scripts, scripts_names, error)
@@ -343,7 +388,8 @@ def make_dir(dir_name):
verbose(f'Extracting and converting PDF page {page_num}')
image_file = f'{args.temp_prefix}-{index+1}'
temp_image_files.append(image_file+".ppm")
- cmd = f'{args.pdftoppm} -scale-to-y 1080 -scale-to-x -1 -f {page_num} -singlefile {args.pdf_file} {image_file}'
+ cmd = f'{args.pdftoppm} -scale-to-y 1080 -scale-to-x -1 ' \
+ f'-f {page_num} -singlefile {args.pdf_file} {image_file}'
execute(cmd)
# Make audio files with AWS Polly (cache the results)
@@ -363,21 +409,20 @@ def make_dir(dir_name):
(ssml, hash_hex) = script_to_ssml_and_hash(script, args)
ssml_file = f'{args.temp_prefix}-{index+1}.ssml'
temp_ssml_files.append(ssml_file)
- with open(ssml_file, "w", encoding='utf-8') as f:
- f.write(ssml)
+ with open(ssml_file, "w", encoding='utf-8') as file_handle:
+ file_handle.write(ssml)
audio_file = os.path.join(args.audio_cache, hash_hex+".mp3")
marks_file = os.path.join(args.audio_cache, hash_hex+".mrk")
- if False:
- plain_file = os.path.join(args.audio_cache, hash_hex+".txt")
- with open(plain_file, "w", encoding='utf-8') as f:
- f.write(plain)
# Use Polly to generate the MP3 file if not in cache
if os.path.isfile(audio_file):
verbose(' Audio file found in cache')
else:
verbose(' Calling Polly for the audio file')
- cmd = f'aws {profile_arg} polly synthesize-speech --text-type ssml --text file://{ssml_file} --output-format mp3 --voice-id {args.voice}'
- if args.neural: cmd += ' --engine neural'
+ cmd = f'aws {profile_arg} polly synthesize-speech ' \
+ f'--text-type ssml --text file://{ssml_file} ' \
+ f'--output-format mp3 --voice-id {args.voice}'
+ if args.neural:
+ cmd += ' --engine neural'
cmd += f' {audio_file}'
execute(cmd)
audio_files.append(audio_file)
@@ -390,8 +435,13 @@ def make_dir(dir_name):
verbose(' Speech marks found in cache')
else:
verbose(' Calling Polly for speech marks')
- cmd = f'aws {profile_arg} polly synthesize-speech --text-type ssml --text file://{ssml_file} --output-format json --speech-mark-types sentence word viseme ssml --voice-id {args.voice}'
- if args.neural: cmd += ' --engine neural'
+ cmd = f'aws {profile_arg} polly synthesize-speech ' \
+ f'--text-type ssml --text file://{ssml_file} ' \
+ f'--output-format json ' \
+ f'--speech-mark-types sentence word viseme ssml ' \
+ f'--voice-id {args.voice}'
+ if args.neural:
+ cmd += ' --engine neural'
cmd += f' {marks_file}'
execute(cmd)
marks_files.append(marks_file)
@@ -401,7 +451,8 @@ def make_dir(dir_name):
# Make srt subtitles
#
for (index, script) in enumerate(scripts):
- if index not in only: continue
+ if index not in only:
+ continue
# Read the speech marks, keep only the start and end-of-the-line marks
marks_file = marks_files[index]
starts = {}
@@ -409,63 +460,71 @@ def make_dir(dir_name):
with open(marks_file, 'r', encoding='utf-8') as f:
for line in f.readlines():
mark = json.loads(line)
- if mark['type'] != 'ssml': continue
- m = re.match('^s(?P\d+?)$', mark['value'])
- if m != None:
- starts[int(m['num'])] = mark['time']
- m = re.match('^e(?P\d+?)$', mark['value'])
- if m != None:
- ends[int(m['num'])] = mark['time']
+ if mark['type'] != 'ssml':
+ continue
+ match = re.match(r'^s(?P\d+?)$', mark['value'])
+ if match:
+ starts[int(match['num'])] = mark['time']
+ match = re.match(r'^e(?P\d+?)$', mark['value'])
+ if match:
+ ends[int(match['num'])] = mark['time']
#print(starts)
#print(ends)
srts = []
- index = 0
- for (page_linenum, (line, linenum)) in enumerate(script):
+ for (page_linenum, (line, _)) in enumerate(script):
#print(page_linenum, line)
- if line.strip() == '': continue
+ if line.strip() == '':
+ continue
start = starts[page_linenum]
end = ends[page_linenum]
(dummy, words, sub) = parse(line, args.neural)
- if len(words) == 0: continue
+ if len(words) == 0:
+ continue
srts.append({'start': start, 'end': end, 'text': sub})
srt_file = marks_file[:-4] + '.srt'
with open(srt_file, 'w', encoding='utf-8') as f:
- for (index, srt) in enumerate(srts):
- f.write(f'{index+1}\n')
+ for (srt_index, srt) in enumerate(srts):
+ f.write(f'{srt_index+1}\n')
f.write(millis_to_srt(srt['start'])+' --> '+millis_to_srt(srt['end'])+'\n')
f.write(srt['text']+'\n')
f.write('\n')
-
+
# Combine images and audios to transport streams
for (index, page_num) in enumerate(pages):
- if index not in only: continue
+ if index not in only:
+ continue
verbose(f'Combining PDF page and audio: {index+1}')
ts_file = f'{args.temp_prefix}-{index+1}.mp4'
temp_ts_files.append(ts_file)
audio_file = audio_files[index]
- cmd = f'{args.ffmpeg} -y -loop 1 -i {temp_image_files[index]} -i {audio_file} -shortest -c:v libx264 -vf scale=-2:1080,format=yuv420p -c:a copy -tune stillimage d{ts_file}'
+ cmd = f'{args.ffmpeg} -y -loop 1 -i {temp_image_files[index]} ' \
+ f'-i {audio_file} -shortest -c:v libx264 ' \
+ f'-vf scale=-2:1080,format=yuv420p -c:a copy ' \
+ f'-tune stillimage d{ts_file}'
execute(cmd)
if args.ignore_subtitles:
- os.rename(f'd{ts_file}', f'{ts_file}')
+ os.rename(f'd{ts_file}', f'{ts_file}')
else:
- verbose(f' Adding subtitles')
+ verbose(' Adding subtitles')
srt_file = audio_file[:-4] + '.srt'
if os.stat(srt_file).st_size == 0:
os.rename(f'd{ts_file}', f'{ts_file}')
else:
- #{args.ffmpeg} -i infile.mp4 -i infile.srt -c copy -c:s mov_text outfile.mp4
- cmd = f'{args.ffmpeg} -y -i d{ts_file} -i {srt_file} -c copy -c:s mov_text -metadata:s:s:0 language=eng {ts_file}'
+ cmd = f'{args.ffmpeg} -y -i d{ts_file} -i {srt_file} ' \
+ f'-c copy -c:s mov_text -metadata:s:s:0 language=eng ' \
+ f'{ts_file}'
execute(cmd)
unlink(f'd{ts_file}')
-
+
# Combine the transport streams
verbose(f'Combining the transport streams to "{args.output_file}"')
lst_file = f'{args.temp_prefix}.lst'
with open(lst_file, 'w', encoding='utf-8') as f:
for ts_file in temp_ts_files:
f.write(f'file {ts_file}\n')
- cmd = f'{args.ffmpeg} -y -f concat -i {lst_file} -c:v copy -c:a aac -c:s copy -strict -2 {args.output_file}'
+ cmd = f'{args.ffmpeg} -y -f concat -i {lst_file} -c:v copy -c:a aac ' \
+ f'-c:s copy -strict -2 {args.output_file}'
execute(cmd)
if not args.ignore_subtitles:
@@ -476,7 +535,7 @@ def make_dir(dir_name):
execute(cmd)
clean_temps()
- exit(0)
+ sys.exit(0)
if __name__ == '__main__':
diff --git a/sample.pdf b/sample.pdf
index f0e294c..a379b0b 100644
Binary files a/sample.pdf and b/sample.pdf differ
diff --git a/sample.txt b/sample.txt
index e4f9b9f..a95290b 100644
--- a/sample.txt
+++ b/sample.txt
@@ -4,7 +4,6 @@ This video is produced automatically with the tool.
You can find more details in the GitHub page of the project.
#20
-
#page motivation
Need to make videos of your PDF presentations?
Tired in spending *hours* in recording and editing the audio tracks?
@@ -40,6 +39,7 @@ with the command shown in the slide.
One can find the sample PDF and script files
in the GitHub repository of the project.
#30
+
#page scripts_1
The script files are rather simple text files.
They should be easy to produce with *any* text editor.
diff --git a/setup.py b/setup.py
index fab13a6..cbd2681 100644
--- a/setup.py
+++ b/setup.py
@@ -1,12 +1,11 @@
import setuptools
-from setuptools import setup
with open("README.md", "r", encoding="utf-8") as f:
long_description = f.read()
setuptools.setup(
name="pdf2video", # Replace with your own username
- version="0.2.0",
+ version="0.2.1",
author="T. Junttila",
author_email="Tommi.Junttila@aalto.fi",
description="A tool for making narrated videos from PDF presentations.",