1111import re
1212import time
1313
14- # Namespaces used for the test (document.xml)
15- docns = {
14+ # All Word prefixes / namespace matches used in document.xml & core.xml
15+ # LXML doesn't actually use prefixes (just the real namespace) , but these
16+ # make it easier to copy Word output more easily.
17+ nsprefixes = {
1618 'mv' :'urn:schemas-microsoft-com:mac:vml' ,
1719 'mo' :'http://schemas.microsoft.com/office/mac/office/2008/main' ,
1820 've' :'http://schemas.openxmlformats.org/markup-compatibility/2006' ,
2628 'wp' :'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing' ,
2729 'a' :'http://schemas.openxmlformats.org/drawingml/2006/main' ,
2830 'pic' :'http://schemas.openxmlformats.org/drawingml/2006/picture' ,
29- }
30-
31- # Namespaces used for document properties (core.xml)
32- propns = {
3331 'cp' :"http://schemas.openxmlformats.org/package/2006/metadata/core-properties" ,
3432 'dc' :"http://purl.org/dc/elements/1.1/" ,
3533 'dcterms' :"http://purl.org/dc/terms/" ,
3634 'dcmitype' :"http://purl.org/dc/dcmitype/" ,
3735 'xsi' :"http://www.w3.org/2001/XMLSchema-instance" ,
3836 }
3937
40- def getns (nsdict ,prefix ):
41- '''Given a dict to search, a namespace prefix to look for, return a formatted namespace'''
42- return '{' + nsdict [prefix ]+ '}'
43-
4438def opendocx (file ):
4539 '''Open a docx file, return a document XML tree'''
4640 mydoc = zipfile .ZipFile (file )
@@ -53,14 +47,15 @@ def newdocument():
5347 document .append (makeelement ('body' ))
5448 return document
5549
56- def makeelement (tagname ,tagtext = None ,tagnamespace = getns (docns ,'w' ),tagattributes = None ,attributenamespace = None ):
57- '''Create an element & return it'''
58- newelement = etree .Element (tagnamespace + tagname )
50+ def makeelement (tagname ,tagtext = None ,nsprefix = 'w' ,tagattributes = None ,attributenamespace = None ):
51+ '''Create an element & return it'''
52+ namespace = '{' + nsprefixes [nsprefix ]+ '}'
53+ newelement = etree .Element (namespace + tagname )
5954 # Add attributes with namespaces
6055 if tagattributes :
6156 # If they haven't bothered setting attribute namespace, use the same one as the tag
6257 if not attributenamespace :
63- attributenamespace = tagnamespace
58+ attributenamespace = namespace
6459 for tagattribute in tagattributes :
6560 newelement .set (attributenamespace + tagattribute , tagattributes [tagattribute ])
6661 if tagtext :
@@ -193,26 +188,100 @@ def table(contents):
193188 table .append (row )
194189 return table
195190
196- def picture (filename ):
191+ def picture ():
197192 '''Create a pragraph containing an image - FIXME - not implemented yet'''
198193 # Word uses paragraphs to contain images
199194 # http://openxmldeveloper.org/articles/462.aspx
200- resourceid = rId5
201- newrelationship = makeelement ('Relationship' ,tagattributes = {'Id' :resourceid ,'Type' :'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image' },Target = filename )
202- newpara = makeelement ('deleteme' ,style = 'BodyText' )
195+ #resourceid = rId5
196+ #newrelationship = makeelement('Relationship',tagattributes={'Id':resourceid,'Type':'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image'},Target=filename)
197+
198+ # Now make drawing element
199+ #newpara = makeelement('deleteme',style='BodyText')
200+
201+
202+ blipfill = makeelement ('blipFill' ,nsprefix = 'a' )
203+ blipfill .append (makeelement ('blip' ,nsprefix = 'a' ,tagattributes = {'embed' :'rId5' }))
204+ stretch = makeelement ('stretch' ,nsprefix = 'a' )
205+ stretch .append (makeelement ('fillRect' ,nsprefix = 'a' ))
206+ blipfill .append (stretch )
207+
208+ sppr = makeelement ('spPr' ,nsprefix = 'pic' )
209+ xfrm = makeelement ('xfrm' ,nsprefix = 'a' )
210+ xfrm .append (makeelement ('off' ,nsprefix = 'a' ,tagattributes = {'x' :'0' ,'y' :'0' }))
211+ xfrm .append (makeelement ('ext' ,nsprefix = 'a' ,tagattributes = {'cx' :'5486400' ,'cy' :'3429000' }))
212+ prstgeom = makeelement ('prstGeom' ,nsprefix = 'a' ,tagattributes = {'prst' :'rect' })
213+ prstgeom .append (makeelement ('avLst' ,nsprefix = 'a' ))
214+ sppr .append (xfrm )
215+ sppr .append (prstgeom )
216+
217+ nvpicpr = makeelement ('nvPicPr' ,nsprefix = 'a' )
218+ cnvpr = makeelement ('cNvPr' ,nsprefix = 'a' ,tagattributes = {'id' :'0' ,'name' :'aero_glow_v2_1920x1200.png' })
219+ cnvpicpr = makeelement ('cNvPicPr' )
220+ nvpicpr .append (cnvpicpr )
221+ nvpicpr .append (cnvpr )
222+
223+ pic = makeelement ('pic' ,nsprefix = 'pic' )
224+ pic .append (blipfill )
225+ pic .append (sppr )
226+ pic .append (nvpicpr )
227+
228+
229+ graphicdata = makeelement ('graphicData' ,nsprefix = 'a' ,tagattributes = {'uri' :'http://schemas.openxmlformats.org/drawingml/2006/picture' })
230+ graphicdata .append (pic )
231+
232+ graphic = makeelement ('graphic' ,nsprefix = 'a' )
233+ graphic .append (graphicdata )
234+
235+ framepr = makeelement ('cNvGraphicFramePr' ,nsprefix = 'a' )
236+ framelocks = makeelement ('graphicFrameLocks' ,nsprefix = 'a' ,tagattributes = {'noChangeAspect' :'1' })
237+ framepr .append (framelocks )
238+
203239 makeelement ('drawing' )
204- makeelement ('inline' ,tagattributes = {'distT' :"0" ,'distB' :"0" ,'distL' :"0" ,'distR' :"0" },tagnamespace = getns (docns ,'wp' ))
205- makeelement ('graphic' ,tagnamespace = getns (docns ,'a' ))
206- makeelement ('graphicData' ,tagnamespace = getns (docns ,'a' ))
207- makeelement ('pic' ,tagnamespace = getns (docns ,'a' ))
208-
240+ inline = makeelement ('inline' ,tagattributes = {'distT' :"0" ,'distB' :"0" ,'distL' :"0" ,'distR' :"0" },nsprefix = 'wp' )
241+ extent = makeelement ('extent' ,nsprefix = 'a' ,tagattributes = {'cx' :'5486400' ,'cy' :'3429000' })
242+ effectextent = makeelement ('effectExtent' ,nsprefix = 'a' ,tagattributes = {'l' :'25400' ,'t' :'0' ,'r' :'0' ,'b' :'0' })
243+ docpr = makeelement ('docPr' ,nsprefix = 'a' ,tagattributes = {'id' :'1' ,'name' :'Picture 0' ,'descr' :'aero_glow_v2_1920x1200.png' })
244+ inline .append (extent )
245+ inline .append (effectextent )
246+ inline .append (docpr )
247+ inline .append (framepr )
248+ inline .append (graphic )
249+ drawing = makeelement ('drawing' )
250+ drawing .append (inline )
251+ return drawing
252+ '''
253+ <w:drawing>
254+ <wp:inline distT="0" distB="0" distL="0" distR="0">
255+ <wp:extent cx="5486400" cy="3429000"/>
256+ <wp:effectExtent l="25400" t="0" r="0" b="0"/>
257+ <wp:docPr id="1" name="Picture 0" descr="aero_glow_v2_1920x1200.png"/>
258+ <wp:cNvGraphicFramePr>
259+ <a:graphicFrameLocks xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" noChangeAspect="1"/>
260+ <a:graphic xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main">
261+ <a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture">
262+ <pic:pic xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture">
263+ <pic:nvPicPr>
264+ <pic:cNvPr id="0" name="aero_glow_v2_1920x1200.png"/>
265+ <pic:cNvPicPr/>
266+ <pic:blipFill>
267+ <a:blip r:embed="rId5"/>
268+ <a:stretch>
269+ <a:fillRect/>
270+ <pic:spPr>
271+ <a:xfrm>
272+ <a:off x="0" y="0"/>
273+ <a:ext cx="5486400" cy="3429000"/>
274+ <a:prstGeom prst="rect">
275+ <a:avLst/>
276+ '''
277+
209278
210279def search (document ,search ):
211280 '''Search a document for a regex, return '''
212281 results = False
213282 searchre = re .compile (search )
214283 for element in document .iter ():
215- if element .tag == getns ( docns , 'w' ) + 't' :
284+ if element .tag == 'w' + 't' :
216285 if element .text :
217286 if searchre .match (element .text ):
218287 results = True
@@ -223,7 +292,7 @@ def replace(document,search,replace):
223292 newdocument = document
224293 searchre = re .compile (search )
225294 for element in newdocument .iter ():
226- if element .tag == getns ( docns , 'w' ) + 't' :
295+ if element .tag == 'w' + 't' :
227296 if element .text :
228297 if searchre .search (element .text ):
229298 element .text = re .sub (search ,replace ,element .text )
@@ -238,7 +307,7 @@ def getdocumenttext(document):
238307 paralist = []
239308 for element in document .iter ():
240309 # Find p (paragraph) elements
241- if element .tag == getns ( docns , 'w' ) + 'p' :
310+ if element .tag == 'w' + 'p' :
242311 paralist .append (element )
243312
244313 # Since a single sentence might be spread over multiple text elements, iterate through each
@@ -248,7 +317,7 @@ def getdocumenttext(document):
248317 # Loop through each paragraph
249318 for element in para .iter ():
250319 # Find t (text) elements
251- if element .tag == getns ( docns , 'w' ) + 't' :
320+ if element .tag == 'w' + 't' :
252321 if element .text :
253322 paratext = paratext + element .text
254323
@@ -260,27 +329,27 @@ def getdocumenttext(document):
260329def docproperties (title ,subject ,creator ,keywords ,lastmodifiedby = None ):
261330 '''Makes document properties. '''
262331 # OpenXML uses the term 'core' to refer to the 'Dublin Core' specification used to make the properties.
263- docprops = makeelement ('coreProperties' ,tagnamespace = getns ( propns , 'cp' ) )
264- docprops .append (makeelement ('title' ,tagtext = title ,tagnamespace = getns ( propns , 'dc' ) ))
265- docprops .append (makeelement ('subject' ,tagtext = subject ,tagnamespace = getns ( propns , 'dc' ) ))
266- docprops .append (makeelement ('creator' ,tagtext = creator ,tagnamespace = getns ( propns , 'dc' ) ))
267- docprops .append (makeelement ('keywords' ,tagtext = ',' .join (keywords ),tagnamespace = getns ( propns , 'cp' ) ))
332+ docprops = makeelement ('coreProperties' ,nsprefix = 'cp' )
333+ docprops .append (makeelement ('title' ,tagtext = title ,nsprefix = 'dc' ))
334+ docprops .append (makeelement ('subject' ,tagtext = subject ,nsprefix = 'dc' ))
335+ docprops .append (makeelement ('creator' ,tagtext = creator ,nsprefix = 'dc' ))
336+ docprops .append (makeelement ('keywords' ,tagtext = ',' .join (keywords ),nsprefix = 'cp' ))
268337 if not lastmodifiedby :
269338 lastmodifiedby = creator
270- docprops .append (makeelement ('lastModifiedBy' ,tagtext = lastmodifiedby ,tagnamespace = getns ( propns , 'cp' ) ))
271- docprops .append (makeelement ('revision' ,tagtext = '1' ,tagnamespace = getns ( propns , 'cp' ) ))
272- docprops .append (makeelement ('category' ,tagtext = 'Examples' ,tagnamespace = getns ( propns , 'cp' ) ))
273- docprops .append (makeelement ('description' ,tagtext = 'Examples' ,tagnamespace = getns ( propns , 'dc' ) ))
339+ docprops .append (makeelement ('lastModifiedBy' ,tagtext = lastmodifiedby ,nsprefix = 'cp' ))
340+ docprops .append (makeelement ('revision' ,tagtext = '1' ,nsprefix = 'cp' ))
341+ docprops .append (makeelement ('category' ,tagtext = 'Examples' ,nsprefix = 'cp' ))
342+ docprops .append (makeelement ('description' ,tagtext = 'Examples' ,nsprefix = 'dc' ))
274343 currenttime = time .strftime ('%Y-%m-%dT-%H:%M:%SZ' )
275344 # FIXME - creating these items manually fails - but we can live without them for now.
276345 ''' What we're going for:
277346 <dcterms:created xsi:type="dcterms:W3CDTF">2010-01-01T21:07:00Z</dcterms:created>
278347 <dcterms:modified xsi:type="dcterms:W3CDTF">2010-01-01T21:20:00Z</dcterms:modified>
279348 currenttime'''
280- #docprops.append(makeelement('created',tagnamespace=getns(propns, 'dcterms') ,
281- #tagattributes={'type':'dcterms:W3CDTF'},tagtext='2010-01-01T21:07:00Z',attributenamespace=getns(propns, 'xsi') ))
282- #docprops.append(makeelement('modified',tagnamespace=getns(propns, 'dcterms') ,
283- #tagattributes={'type':'dcterms:W3CDTF'},tagtext='2010-01-01T21:07:00Z',attributenamespace=getns(propns, 'xsi') ))
349+ #docprops.append(makeelement('created',nsprefix= 'dcterms',
350+ #tagattributes={'type':'dcterms:W3CDTF'},tagtext='2010-01-01T21:07:00Z',attributenamespace='xsi'))
351+ #docprops.append(makeelement('modified',nsprefix= 'dcterms',
352+ #tagattributes={'type':'dcterms:W3CDTF'},tagtext='2010-01-01T21:07:00Z',attributenamespace='xsi'))
284353 return docprops
285354
286355
@@ -306,6 +375,7 @@ def savedocx(document,properties,newfilename):
306375 'word/numbering.xml' ,
307376 'word/theme/' ,
308377 'word/theme/theme1.xml' ,
378+ #'word/media/image1.png',
309379 'word/settings.xml' ,
310380 'word/fontTable.xml' ]:
311381 newfile .write ('template/' + xmlfile ,xmlfile )
0 commit comments