Skip to content
This repository was archived by the owner on Jan 12, 2018. It is now read-only.

Commit 650d586

Browse files
author
Steve Canny
committed
PEP8 example-extracttext.py
1 parent 543d305 commit 650d586

File tree

1 file changed

+23
-16
lines changed

1 file changed

+23
-16
lines changed

example-extracttext.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,37 @@
1-
#!/usr/bin/env python2.6
2-
'''
1+
#!/usr/bin/env python
2+
"""
33
This file opens a docx (Office 2007) file and dumps the text.
44
5-
If you need to extract text from documents, use this file as a basis for your work.
5+
If you need to extract text from documents, use this file as a basis for your
6+
work.
67
78
Part of Python's docx module - http://github.com/mikemaccana/python-docx
89
See LICENSE for licensing information.
9-
'''
10-
from docx import *
10+
"""
11+
1112
import sys
12-
if __name__ == '__main__':
13+
14+
from docx import opendocx, getdocumenttext
15+
16+
if __name__ == '__main__':
1317
try:
1418
document = opendocx(sys.argv[1])
15-
newfile = open(sys.argv[2],'w')
19+
newfile = open(sys.argv[2], 'w')
1620
except:
17-
print('Please supply an input and output file. For example:')
18-
print(''' example-extracttext.py 'My Office 2007 document.docx' 'outputfile.txt' ''')
21+
print(
22+
"Please supply an input and output file. For example:\n"
23+
" example-extracttext.py 'My Office 2007 document.docx' 'outp"
24+
"utfile.txt'"
25+
)
1926
exit()
20-
## Fetch all the text out of the document we just created
21-
paratextlist = getdocumenttext(document)
2227

23-
# Make explicit unicode version
28+
# Fetch all the text out of the document we just created
29+
paratextlist = getdocumenttext(document)
30+
31+
# Make explicit unicode version
2432
newparatextlist = []
2533
for paratext in paratextlist:
26-
newparatextlist.append(paratext.encode("utf-8"))
27-
28-
## Print our documnts test with two newlines under each paragraph
34+
newparatextlist.append(paratext.encode("utf-8"))
35+
36+
# Print out text of document with two newlines under each paragraph
2937
newfile.write('\n\n'.join(newparatextlist))
30-
#print '\n\n'.join(newparatextlist)

0 commit comments

Comments
 (0)