11
11
import re
12
12
import time
13
13
14
- # Namespaces used for the test (document.xml)
15
- docns = {
14
+ # All Word prefixes / namespace matches used in document.xml & core.xml
15
+ # LXML doesn't actually use prefixes (just the real namespace) , but these
16
+ # make it easier to copy Word output more easily.
17
+ nsprefixes = {
16
18
'mv' :'urn:schemas-microsoft-com:mac:vml' ,
17
19
'mo' :'http://schemas.microsoft.com/office/mac/office/2008/main' ,
18
20
've' :'http://schemas.openxmlformats.org/markup-compatibility/2006' ,
26
28
'wp' :'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing' ,
27
29
'a' :'http://schemas.openxmlformats.org/drawingml/2006/main' ,
28
30
'pic' :'http://schemas.openxmlformats.org/drawingml/2006/picture' ,
29
- }
30
-
31
- # Namespaces used for document properties (core.xml)
32
- propns = {
33
31
'cp' :"http://schemas.openxmlformats.org/package/2006/metadata/core-properties" ,
34
32
'dc' :"http://purl.org/dc/elements/1.1/" ,
35
33
'dcterms' :"http://purl.org/dc/terms/" ,
36
34
'dcmitype' :"http://purl.org/dc/dcmitype/" ,
37
35
'xsi' :"http://www.w3.org/2001/XMLSchema-instance" ,
38
36
}
39
37
40
- def getns (nsdict ,prefix ):
41
- '''Given a dict to search, a namespace prefix to look for, return a formatted namespace'''
42
- return '{' + nsdict [prefix ]+ '}'
43
-
44
38
def opendocx (file ):
45
39
'''Open a docx file, return a document XML tree'''
46
40
mydoc = zipfile .ZipFile (file )
@@ -53,14 +47,15 @@ def newdocument():
53
47
document .append (makeelement ('body' ))
54
48
return document
55
49
56
- def makeelement (tagname ,tagtext = None ,tagnamespace = getns (docns ,'w' ),tagattributes = None ,attributenamespace = None ):
57
- '''Create an element & return it'''
58
- newelement = etree .Element (tagnamespace + tagname )
50
+ def makeelement (tagname ,tagtext = None ,nsprefix = 'w' ,tagattributes = None ,attributenamespace = None ):
51
+ '''Create an element & return it'''
52
+ namespace = '{' + nsprefixes [nsprefix ]+ '}'
53
+ newelement = etree .Element (namespace + tagname )
59
54
# Add attributes with namespaces
60
55
if tagattributes :
61
56
# If they haven't bothered setting attribute namespace, use the same one as the tag
62
57
if not attributenamespace :
63
- attributenamespace = tagnamespace
58
+ attributenamespace = namespace
64
59
for tagattribute in tagattributes :
65
60
newelement .set (attributenamespace + tagattribute , tagattributes [tagattribute ])
66
61
if tagtext :
@@ -193,26 +188,100 @@ def table(contents):
193
188
table .append (row )
194
189
return table
195
190
196
- def picture (filename ):
191
+ def picture ():
197
192
'''Create a pragraph containing an image - FIXME - not implemented yet'''
198
193
# Word uses paragraphs to contain images
199
194
# http://openxmldeveloper.org/articles/462.aspx
200
- resourceid = rId5
201
- newrelationship = makeelement ('Relationship' ,tagattributes = {'Id' :resourceid ,'Type' :'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image' },Target = filename )
202
- newpara = makeelement ('deleteme' ,style = 'BodyText' )
195
+ #resourceid = rId5
196
+ #newrelationship = makeelement('Relationship',tagattributes={'Id':resourceid,'Type':'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image'},Target=filename)
197
+
198
+ # Now make drawing element
199
+ #newpara = makeelement('deleteme',style='BodyText')
200
+
201
+
202
+ blipfill = makeelement ('blipFill' ,nsprefix = 'a' )
203
+ blipfill .append (makeelement ('blip' ,nsprefix = 'a' ,tagattributes = {'embed' :'rId5' }))
204
+ stretch = makeelement ('stretch' ,nsprefix = 'a' )
205
+ stretch .append (makeelement ('fillRect' ,nsprefix = 'a' ))
206
+ blipfill .append (stretch )
207
+
208
+ sppr = makeelement ('spPr' ,nsprefix = 'pic' )
209
+ xfrm = makeelement ('xfrm' ,nsprefix = 'a' )
210
+ xfrm .append (makeelement ('off' ,nsprefix = 'a' ,tagattributes = {'x' :'0' ,'y' :'0' }))
211
+ xfrm .append (makeelement ('ext' ,nsprefix = 'a' ,tagattributes = {'cx' :'5486400' ,'cy' :'3429000' }))
212
+ prstgeom = makeelement ('prstGeom' ,nsprefix = 'a' ,tagattributes = {'prst' :'rect' })
213
+ prstgeom .append (makeelement ('avLst' ,nsprefix = 'a' ))
214
+ sppr .append (xfrm )
215
+ sppr .append (prstgeom )
216
+
217
+ nvpicpr = makeelement ('nvPicPr' ,nsprefix = 'a' )
218
+ cnvpr = makeelement ('cNvPr' ,nsprefix = 'a' ,tagattributes = {'id' :'0' ,'name' :'aero_glow_v2_1920x1200.png' })
219
+ cnvpicpr = makeelement ('cNvPicPr' )
220
+ nvpicpr .append (cnvpicpr )
221
+ nvpicpr .append (cnvpr )
222
+
223
+ pic = makeelement ('pic' ,nsprefix = 'pic' )
224
+ pic .append (blipfill )
225
+ pic .append (sppr )
226
+ pic .append (nvpicpr )
227
+
228
+
229
+ graphicdata = makeelement ('graphicData' ,nsprefix = 'a' ,tagattributes = {'uri' :'http://schemas.openxmlformats.org/drawingml/2006/picture' })
230
+ graphicdata .append (pic )
231
+
232
+ graphic = makeelement ('graphic' ,nsprefix = 'a' )
233
+ graphic .append (graphicdata )
234
+
235
+ framepr = makeelement ('cNvGraphicFramePr' ,nsprefix = 'a' )
236
+ framelocks = makeelement ('graphicFrameLocks' ,nsprefix = 'a' ,tagattributes = {'noChangeAspect' :'1' })
237
+ framepr .append (framelocks )
238
+
203
239
makeelement ('drawing' )
204
- makeelement ('inline' ,tagattributes = {'distT' :"0" ,'distB' :"0" ,'distL' :"0" ,'distR' :"0" },tagnamespace = getns (docns ,'wp' ))
205
- makeelement ('graphic' ,tagnamespace = getns (docns ,'a' ))
206
- makeelement ('graphicData' ,tagnamespace = getns (docns ,'a' ))
207
- makeelement ('pic' ,tagnamespace = getns (docns ,'a' ))
208
-
240
+ inline = makeelement ('inline' ,tagattributes = {'distT' :"0" ,'distB' :"0" ,'distL' :"0" ,'distR' :"0" },nsprefix = 'wp' )
241
+ extent = makeelement ('extent' ,nsprefix = 'a' ,tagattributes = {'cx' :'5486400' ,'cy' :'3429000' })
242
+ effectextent = makeelement ('effectExtent' ,nsprefix = 'a' ,tagattributes = {'l' :'25400' ,'t' :'0' ,'r' :'0' ,'b' :'0' })
243
+ docpr = makeelement ('docPr' ,nsprefix = 'a' ,tagattributes = {'id' :'1' ,'name' :'Picture 0' ,'descr' :'aero_glow_v2_1920x1200.png' })
244
+ inline .append (extent )
245
+ inline .append (effectextent )
246
+ inline .append (docpr )
247
+ inline .append (framepr )
248
+ inline .append (graphic )
249
+ drawing = makeelement ('drawing' )
250
+ drawing .append (inline )
251
+ return drawing
252
+ '''
253
+ <w:drawing>
254
+ <wp:inline distT="0" distB="0" distL="0" distR="0">
255
+ <wp:extent cx="5486400" cy="3429000"/>
256
+ <wp:effectExtent l="25400" t="0" r="0" b="0"/>
257
+ <wp:docPr id="1" name="Picture 0" descr="aero_glow_v2_1920x1200.png"/>
258
+ <wp:cNvGraphicFramePr>
259
+ <a:graphicFrameLocks xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main" noChangeAspect="1"/>
260
+ <a:graphic xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main">
261
+ <a:graphicData uri="http://schemas.openxmlformats.org/drawingml/2006/picture">
262
+ <pic:pic xmlns:pic="http://schemas.openxmlformats.org/drawingml/2006/picture">
263
+ <pic:nvPicPr>
264
+ <pic:cNvPr id="0" name="aero_glow_v2_1920x1200.png"/>
265
+ <pic:cNvPicPr/>
266
+ <pic:blipFill>
267
+ <a:blip r:embed="rId5"/>
268
+ <a:stretch>
269
+ <a:fillRect/>
270
+ <pic:spPr>
271
+ <a:xfrm>
272
+ <a:off x="0" y="0"/>
273
+ <a:ext cx="5486400" cy="3429000"/>
274
+ <a:prstGeom prst="rect">
275
+ <a:avLst/>
276
+ '''
277
+
209
278
210
279
def search (document ,search ):
211
280
'''Search a document for a regex, return '''
212
281
results = False
213
282
searchre = re .compile (search )
214
283
for element in document .iter ():
215
- if element .tag == getns ( docns , 'w' ) + 't' :
284
+ if element .tag == 'w' + 't' :
216
285
if element .text :
217
286
if searchre .match (element .text ):
218
287
results = True
@@ -223,7 +292,7 @@ def replace(document,search,replace):
223
292
newdocument = document
224
293
searchre = re .compile (search )
225
294
for element in newdocument .iter ():
226
- if element .tag == getns ( docns , 'w' ) + 't' :
295
+ if element .tag == 'w' + 't' :
227
296
if element .text :
228
297
if searchre .search (element .text ):
229
298
element .text = re .sub (search ,replace ,element .text )
@@ -238,7 +307,7 @@ def getdocumenttext(document):
238
307
paralist = []
239
308
for element in document .iter ():
240
309
# Find p (paragraph) elements
241
- if element .tag == getns ( docns , 'w' ) + 'p' :
310
+ if element .tag == 'w' + 'p' :
242
311
paralist .append (element )
243
312
244
313
# Since a single sentence might be spread over multiple text elements, iterate through each
@@ -248,7 +317,7 @@ def getdocumenttext(document):
248
317
# Loop through each paragraph
249
318
for element in para .iter ():
250
319
# Find t (text) elements
251
- if element .tag == getns ( docns , 'w' ) + 't' :
320
+ if element .tag == 'w' + 't' :
252
321
if element .text :
253
322
paratext = paratext + element .text
254
323
@@ -260,27 +329,27 @@ def getdocumenttext(document):
260
329
def docproperties (title ,subject ,creator ,keywords ,lastmodifiedby = None ):
261
330
'''Makes document properties. '''
262
331
# OpenXML uses the term 'core' to refer to the 'Dublin Core' specification used to make the properties.
263
- docprops = makeelement ('coreProperties' ,tagnamespace = getns ( propns , 'cp' ) )
264
- docprops .append (makeelement ('title' ,tagtext = title ,tagnamespace = getns ( propns , 'dc' ) ))
265
- docprops .append (makeelement ('subject' ,tagtext = subject ,tagnamespace = getns ( propns , 'dc' ) ))
266
- docprops .append (makeelement ('creator' ,tagtext = creator ,tagnamespace = getns ( propns , 'dc' ) ))
267
- docprops .append (makeelement ('keywords' ,tagtext = ',' .join (keywords ),tagnamespace = getns ( propns , 'cp' ) ))
332
+ docprops = makeelement ('coreProperties' ,nsprefix = 'cp' )
333
+ docprops .append (makeelement ('title' ,tagtext = title ,nsprefix = 'dc' ))
334
+ docprops .append (makeelement ('subject' ,tagtext = subject ,nsprefix = 'dc' ))
335
+ docprops .append (makeelement ('creator' ,tagtext = creator ,nsprefix = 'dc' ))
336
+ docprops .append (makeelement ('keywords' ,tagtext = ',' .join (keywords ),nsprefix = 'cp' ))
268
337
if not lastmodifiedby :
269
338
lastmodifiedby = creator
270
- docprops .append (makeelement ('lastModifiedBy' ,tagtext = lastmodifiedby ,tagnamespace = getns ( propns , 'cp' ) ))
271
- docprops .append (makeelement ('revision' ,tagtext = '1' ,tagnamespace = getns ( propns , 'cp' ) ))
272
- docprops .append (makeelement ('category' ,tagtext = 'Examples' ,tagnamespace = getns ( propns , 'cp' ) ))
273
- docprops .append (makeelement ('description' ,tagtext = 'Examples' ,tagnamespace = getns ( propns , 'dc' ) ))
339
+ docprops .append (makeelement ('lastModifiedBy' ,tagtext = lastmodifiedby ,nsprefix = 'cp' ))
340
+ docprops .append (makeelement ('revision' ,tagtext = '1' ,nsprefix = 'cp' ))
341
+ docprops .append (makeelement ('category' ,tagtext = 'Examples' ,nsprefix = 'cp' ))
342
+ docprops .append (makeelement ('description' ,tagtext = 'Examples' ,nsprefix = 'dc' ))
274
343
currenttime = time .strftime ('%Y-%m-%dT-%H:%M:%SZ' )
275
344
# FIXME - creating these items manually fails - but we can live without them for now.
276
345
''' What we're going for:
277
346
<dcterms:created xsi:type="dcterms:W3CDTF">2010-01-01T21:07:00Z</dcterms:created>
278
347
<dcterms:modified xsi:type="dcterms:W3CDTF">2010-01-01T21:20:00Z</dcterms:modified>
279
348
currenttime'''
280
- #docprops.append(makeelement('created',tagnamespace=getns(propns, 'dcterms') ,
281
- #tagattributes={'type':'dcterms:W3CDTF'},tagtext='2010-01-01T21:07:00Z',attributenamespace=getns(propns, 'xsi') ))
282
- #docprops.append(makeelement('modified',tagnamespace=getns(propns, 'dcterms') ,
283
- #tagattributes={'type':'dcterms:W3CDTF'},tagtext='2010-01-01T21:07:00Z',attributenamespace=getns(propns, 'xsi') ))
349
+ #docprops.append(makeelement('created',nsprefix= 'dcterms',
350
+ #tagattributes={'type':'dcterms:W3CDTF'},tagtext='2010-01-01T21:07:00Z',attributenamespace='xsi'))
351
+ #docprops.append(makeelement('modified',nsprefix= 'dcterms',
352
+ #tagattributes={'type':'dcterms:W3CDTF'},tagtext='2010-01-01T21:07:00Z',attributenamespace='xsi'))
284
353
return docprops
285
354
286
355
@@ -306,6 +375,7 @@ def savedocx(document,properties,newfilename):
306
375
'word/numbering.xml' ,
307
376
'word/theme/' ,
308
377
'word/theme/theme1.xml' ,
378
+ #'word/media/image1.png',
309
379
'word/settings.xml' ,
310
380
'word/fontTable.xml' ]:
311
381
newfile .write ('template/' + xmlfile ,xmlfile )
0 commit comments