Skip to content
This repository was archived by the owner on Jan 12, 2018. It is now read-only.

Commit 3459ab7

Browse files
committed
- Streamlined namespace support by simulating namespace prefixes
for makeelement() - Initial work on adding images (doesn't work yet)
1 parent bbe3498 commit 3459ab7

File tree

4 files changed

+116
-43
lines changed

4 files changed

+116
-43
lines changed

README.markdown

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ Python docx
33

44
## Introduction
55

6-
The docx module reads and writes Microsoft Office Word 2007 docx files.
6+
The docx module creates, reads and writes Microsoft Office Word 2007 docx files.
77

88
These are referred to as 'WordML', 'Office Open XML' and 'Open XML' by Microsoft.
99

docx.py

Lines changed: 110 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@
1111
import re
1212
import time
1313

14-
# Namespaces used for the test (document.xml)
15-
docns = {
14+
# All Word prefixes / namespace matches used in document.xml & core.xml
15+
# LXML doesn't actually use prefixes (just the real namespace) , but these
16+
# make it easier to copy Word output more easily.
17+
nsprefixes = {
1618
'mv':'urn:schemas-microsoft-com:mac:vml',
1719
'mo':'http://schemas.microsoft.com/office/mac/office/2008/main',
1820
've':'http://schemas.openxmlformats.org/markup-compatibility/2006',
@@ -26,21 +28,13 @@
2628
'wp':'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing',
2729
'a':'http://schemas.openxmlformats.org/drawingml/2006/main',
2830
'pic':'http://schemas.openxmlformats.org/drawingml/2006/picture',
29-
}
30-
31-
# Namespaces used for document properties (core.xml)
32-
propns={
3331
'cp':"http://schemas.openxmlformats.org/package/2006/metadata/core-properties",
3432
'dc':"http://purl.org/dc/elements/1.1/",
3533
'dcterms':"http://purl.org/dc/terms/",
3634
'dcmitype':"http://purl.org/dc/dcmitype/",
3735
'xsi':"http://www.w3.org/2001/XMLSchema-instance",
3836
}
3937

40-
def getns(nsdict,prefix):
41-
'''Given a dict to search, a namespace prefix to look for, return a formatted namespace'''
42-
return '{'+nsdict[prefix]+'}'
43-
4438
def opendocx(file):
4539
'''Open a docx file, return a document XML tree'''
4640
mydoc = zipfile.ZipFile(file)
@@ -53,14 +47,15 @@ def newdocument():
5347
document.append(makeelement('body'))
5448
return document
5549

56-
def makeelement(tagname,tagtext=None,tagnamespace=getns(docns,'w'),tagattributes=None,attributenamespace=None):
57-
'''Create an element & return it'''
58-
newelement = etree.Element(tagnamespace+tagname)
50+
def makeelement(tagname,tagtext=None,nsprefix='w',tagattributes=None,attributenamespace=None):
51+
'''Create an element & return it'''
52+
namespace = '{'+nsprefixes[nsprefix]+'}'
53+
newelement = etree.Element(namespace+tagname)
5954
# Add attributes with namespaces
6055
if tagattributes:
6156
# If they haven't bothered setting attribute namespace, use the same one as the tag
6257
if not attributenamespace:
63-
attributenamespace = tagnamespace
58+
attributenamespace = namespace
6459
for tagattribute in tagattributes:
6560
newelement.set(attributenamespace+tagattribute, tagattributes[tagattribute])
6661
if tagtext:
@@ -193,26 +188,100 @@ def table(contents):
193188
table.append(row)
194189
return table
195190

196-
def picture(filename):
191+
def picture():
197192
'''Create a pragraph containing an image - FIXME - not implemented yet'''
198193
# Word uses paragraphs to contain images
199194
# http://openxmldeveloper.org/articles/462.aspx
200-
resourceid = rId5
201-
newrelationship = makeelement('Relationship',tagattributes={'Id':resourceid,'Type':'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image'},Target=filename)
202-
newpara = makeelement('deleteme',style='BodyText')
195+
#resourceid = rId5
196+
#newrelationship = makeelement('Relationship',tagattributes={'Id':resourceid,'Type':'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image'},Target=filename)
197+
198+
# Now make drawing element
199+
#newpara = makeelement('deleteme',style='BodyText')
200+
201+
202+
blipfill = makeelement('blipFill',nsprefix='a')
203+
blipfill.append(makeelement('blip',nsprefix='a',tagattributes={'embed':'rId5'}))
204+
stretch = makeelement('stretch',nsprefix='a')
205+
stretch.append(makeelement('fillRect',nsprefix='a'))
206+
blipfill.append(stretch)
207+
208+
sppr = makeelement('spPr',nsprefix='pic')
209+
xfrm = makeelement('xfrm',nsprefix='a')
210+
xfrm.append(makeelement('off',nsprefix='a',tagattributes={'x':'0','y':'0'}))
211+
xfrm.append(makeelement('ext',nsprefix='a',tagattributes={'cx':'5486400','cy':'3429000'}))
212+
prstgeom = makeelement('prstGeom',nsprefix='a',tagattributes={'prst':'rect'})
213+
prstgeom.append(makeelement('avLst',nsprefix='a'))
214+
sppr.append(xfrm)
215+
sppr.append(prstgeom)
216+
217+
nvpicpr = makeelement('nvPicPr',nsprefix='a')
218+
cnvpr = makeelement('cNvPr',nsprefix='a',tagattributes={'id':'0','name':'aero_glow_v2_1920x1200.png'})
219+
cnvpicpr = makeelement('cNvPicPr')
220+
nvpicpr.append(cnvpicpr)
221+
nvpicpr.append(cnvpr)
222+
223+
pic = makeelement('pic',nsprefix='pic')
224+
pic.append(blipfill)
225+
pic.append(sppr)
226+
pic.append(nvpicpr)
227+
228+
229+
graphicdata = makeelement('graphicData',nsprefix='a',tagattributes={'uri':'http://schemas.openxmlformats.org/drawingml/2006/picture'})
230+
graphicdata.append(pic)
231+
232+
graphic = makeelement('graphic',nsprefix='a')
233+
graphic.append(graphicdata)
234+
235+
framepr = makeelement('cNvGraphicFramePr',nsprefix='a')
236+
framelocks = makeelement('graphicFrameLocks',nsprefix='a',tagattributes={'noChangeAspect':'1'})
237+
framepr.append(framelocks)
238+
203239
makeelement('drawing')
204-
makeelement('inline',tagattributes={'distT':"0",'distB':"0",'distL':"0",'distR':"0"},tagnamespace=getns(docns,'wp'))
205-
makeelement('graphic',tagnamespace=getns(docns,'a'))
206-
makeelement('graphicData',tagnamespace=getns(docns,'a'))
207-
makeelement('pic',tagnamespace=getns(docns,'a'))
208-
240+
inline = makeelement('inline',tagattributes={'distT':"0",'distB':"0",'distL':"0",'distR':"0"},nsprefix='wp')
241+
extent = makeelement('extent',nsprefix='a',tagattributes={'cx':'5486400','cy':'3429000'})
242+
effectextent = makeelement('effectExtent',nsprefix='a',tagattributes={'l':'25400','t':'0','r':'0','b':'0'})
243+
docpr = makeelement('docPr',nsprefix='a',tagattributes={'id':'1','name':'Picture 0','descr':'aero_glow_v2_1920x1200.png'})
244+
inline.append(extent)
245+
inline.append(effectextent)
246+
inline.append(docpr)
247+
inline.append(framepr)
248+
inline.append(graphic)
249+
drawing = makeelement('drawing')
250+
drawing.append(inline)
251+
return drawing
252+
'''
253+
<w:drawing>
254+
<wp:inline distT="0" distB="0" distL="0" distR="0">
255+
<wp:extent cx="5486400" cy="3429000"/>
256+
<wp:effectExtent l="25400" t="0" r="0" b="0"/>
257+
<wp:docPr id="1" name="Picture 0" descr="aero_glow_v2_1920x1200.png"/>
258+
<wp:cNvGraphicFramePr>
259+
<a:graphicFrameLocks xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" noChangeAspect="1"/>
260+
<a:graphic xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main">
261+
<a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture">
262+
<pic:pic xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture">
263+
<pic:nvPicPr>
264+
<pic:cNvPr id="0" name="aero_glow_v2_1920x1200.png"/>
265+
<pic:cNvPicPr/>
266+
<pic:blipFill>
267+
<a:blip r:embed="rId5"/>
268+
<a:stretch>
269+
<a:fillRect/>
270+
<pic:spPr>
271+
<a:xfrm>
272+
<a:off x="0" y="0"/>
273+
<a:ext cx="5486400" cy="3429000"/>
274+
<a:prstGeom prst="rect">
275+
<a:avLst/>
276+
'''
277+
209278

210279
def search(document,search):
211280
'''Search a document for a regex, return '''
212281
results = False
213282
searchre = re.compile(search)
214283
for element in document.iter():
215-
if element.tag == getns(docns,'w')+'t':
284+
if element.tag == 'w'+'t':
216285
if element.text:
217286
if searchre.match(element.text):
218287
results = True
@@ -223,7 +292,7 @@ def replace(document,search,replace):
223292
newdocument = document
224293
searchre = re.compile(search)
225294
for element in newdocument.iter():
226-
if element.tag == getns(docns,'w')+'t':
295+
if element.tag == 'w'+'t':
227296
if element.text:
228297
if searchre.search(element.text):
229298
element.text = re.sub(search,replace,element.text)
@@ -238,7 +307,7 @@ def getdocumenttext(document):
238307
paralist = []
239308
for element in document.iter():
240309
# Find p (paragraph) elements
241-
if element.tag == getns(docns,'w')+'p':
310+
if element.tag == 'w'+'p':
242311
paralist.append(element)
243312

244313
# Since a single sentence might be spread over multiple text elements, iterate through each
@@ -248,7 +317,7 @@ def getdocumenttext(document):
248317
# Loop through each paragraph
249318
for element in para.iter():
250319
# Find t (text) elements
251-
if element.tag == getns(docns,'w')+'t':
320+
if element.tag == 'w'+'t':
252321
if element.text:
253322
paratext = paratext+element.text
254323

@@ -260,27 +329,27 @@ def getdocumenttext(document):
260329
def docproperties(title,subject,creator,keywords,lastmodifiedby=None):
261330
'''Makes document properties. '''
262331
# OpenXML uses the term 'core' to refer to the 'Dublin Core' specification used to make the properties.
263-
docprops = makeelement('coreProperties',tagnamespace=getns(propns,'cp'))
264-
docprops.append(makeelement('title',tagtext=title,tagnamespace=getns(propns,'dc')))
265-
docprops.append(makeelement('subject',tagtext=subject,tagnamespace=getns(propns,'dc')))
266-
docprops.append(makeelement('creator',tagtext=creator,tagnamespace=getns(propns,'dc')))
267-
docprops.append(makeelement('keywords',tagtext=','.join(keywords),tagnamespace=getns(propns,'cp')))
332+
docprops = makeelement('coreProperties',nsprefix='cp')
333+
docprops.append(makeelement('title',tagtext=title,nsprefix='dc'))
334+
docprops.append(makeelement('subject',tagtext=subject,nsprefix='dc'))
335+
docprops.append(makeelement('creator',tagtext=creator,nsprefix='dc'))
336+
docprops.append(makeelement('keywords',tagtext=','.join(keywords),nsprefix='cp'))
268337
if not lastmodifiedby:
269338
lastmodifiedby = creator
270-
docprops.append(makeelement('lastModifiedBy',tagtext=lastmodifiedby,tagnamespace=getns(propns,'cp')))
271-
docprops.append(makeelement('revision',tagtext='1',tagnamespace=getns(propns,'cp')))
272-
docprops.append(makeelement('category',tagtext='Examples',tagnamespace=getns(propns,'cp')))
273-
docprops.append(makeelement('description',tagtext='Examples',tagnamespace=getns(propns,'dc')))
339+
docprops.append(makeelement('lastModifiedBy',tagtext=lastmodifiedby,nsprefix='cp'))
340+
docprops.append(makeelement('revision',tagtext='1',nsprefix='cp'))
341+
docprops.append(makeelement('category',tagtext='Examples',nsprefix='cp'))
342+
docprops.append(makeelement('description',tagtext='Examples',nsprefix='dc'))
274343
currenttime = time.strftime('%Y-%m-%dT-%H:%M:%SZ')
275344
# FIXME - creating these items manually fails - but we can live without them for now.
276345
''' What we're going for:
277346
<dcterms:created xsi:type="dcterms:W3CDTF">2010-01-01T21:07:00Z</dcterms:created>
278347
<dcterms:modified xsi:type="dcterms:W3CDTF">2010-01-01T21:20:00Z</dcterms:modified>
279348
currenttime'''
280-
#docprops.append(makeelement('created',tagnamespace=getns(propns,'dcterms'),
281-
#tagattributes={'type':'dcterms:W3CDTF'},tagtext='2010-01-01T21:07:00Z',attributenamespace=getns(propns,'xsi')))
282-
#docprops.append(makeelement('modified',tagnamespace=getns(propns,'dcterms'),
283-
#tagattributes={'type':'dcterms:W3CDTF'},tagtext='2010-01-01T21:07:00Z',attributenamespace=getns(propns,'xsi')))
349+
#docprops.append(makeelement('created',nsprefix='dcterms',
350+
#tagattributes={'type':'dcterms:W3CDTF'},tagtext='2010-01-01T21:07:00Z',attributenamespace='xsi'))
351+
#docprops.append(makeelement('modified',nsprefix='dcterms',
352+
#tagattributes={'type':'dcterms:W3CDTF'},tagtext='2010-01-01T21:07:00Z',attributenamespace='xsi'))
284353
return docprops
285354

286355

@@ -306,6 +375,7 @@ def savedocx(document,properties,newfilename):
306375
'word/numbering.xml',
307376
'word/theme/',
308377
'word/theme/theme1.xml',
378+
#'word/media/image1.png',
309379
'word/settings.xml',
310380
'word/fontTable.xml']:
311381
newfile.write('template/'+xmlfile,xmlfile)

example-makedocument.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
document = newdocument()
1515

1616
# This xpath location is where most interesting content lives
17-
docbody = document.xpath('/w:document/w:body', namespaces=docns)[0]
17+
docbody = document.xpath('/w:document/w:body', namespaces=nsprefixes)[0]
1818

1919
# Append two headings and a paragraph
2020
docbody.append(heading('''Welcome to Python's docx module''',1) )
@@ -41,6 +41,9 @@
4141
docbody.append(paragraph('Thanks to the awesomeness of the lxml module, we can:'))
4242
for point in ['Search and replace','Extract plain text of document','Add and delete items anywhere within the document']:
4343
docbody.append(paragraph(point,style='ListBullet'))
44+
45+
# Add an image (beta)
46+
#docbody.append(picture())
4447

4548
# Search and replace
4649
document = replace(document,'the','the goshdarned')

template/word/_rels/document.xml.rels

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2-
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId6" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme" Target="theme/theme1.xml"/><Relationship Id="rId4" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings" Target="webSettings.xml"/><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering" Target="numbering.xml"/><Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" Target="styles.xml"/><Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings" Target="settings.xml"/><Relationship Id="rId5" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable" Target="fontTable.xml"/></Relationships>
2+
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships"><Relationship Id="rId4" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings" Target="webSettings.xml"/><Relationship Id="rId5" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" Target="media/image1.png"/><Relationship Id="rId7" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme" Target="theme/theme1.xml"/><Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering" Target="numbering.xml"/><Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" Target="styles.xml"/><Relationship Id="rId3" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings" Target="settings.xml"/><Relationship Id="rId6" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable" Target="fontTable.xml"/></Relationships>

0 commit comments

Comments
 (0)