@@ -121,23 +121,23 @@ def test_run(self, test_files_path, docx_converter):
121
121
assert docs [0 ].meta .keys () == {"file_path" , "docx" }
122
122
assert docs [0 ].meta == {
123
123
"file_path" : os .path .basename (paths [0 ]),
124
- "docx" : DOCXMetadata (
125
- author = "Microsoft Office User" ,
126
- category = "" ,
127
- comments = "" ,
128
- content_status = "" ,
129
- created = "2024-06-09T21:17:00+00:00" ,
130
- identifier = "" ,
131
- keywords = "" ,
132
- language = "" ,
133
- last_modified_by = "Carlos Fernández Lorán" ,
134
- last_printed = None ,
135
- modified = "2024-06-09T21:27:00+00:00" ,
136
- revision = 2 ,
137
- subject = "" ,
138
- title = "" ,
139
- version = "" ,
140
- ) ,
124
+ "docx" : {
125
+ " author" : "Microsoft Office User" ,
126
+ " category" : "" ,
127
+ " comments" : "" ,
128
+ " content_status" : "" ,
129
+ " created" : "2024-06-09T21:17:00+00:00" ,
130
+ " identifier" : "" ,
131
+ " keywords" : "" ,
132
+ " language" : "" ,
133
+ " last_modified_by" : "Carlos Fernández Lorán" ,
134
+ " last_printed" : None ,
135
+ " modified" : "2024-06-09T21:27:00+00:00" ,
136
+ " revision" : 2 ,
137
+ " subject" : "" ,
138
+ " title" : "" ,
139
+ " version" : "" ,
140
+ } ,
141
141
}
142
142
143
143
def test_run_with_table (self , test_files_path ):
@@ -153,23 +153,23 @@ def test_run_with_table(self, test_files_path):
153
153
assert docs [0 ].meta .keys () == {"file_path" , "docx" }
154
154
assert docs [0 ].meta == {
155
155
"file_path" : os .path .basename (paths [0 ]),
156
- "docx" : DOCXMetadata (
157
- author = "Saha, Anirban" ,
158
- category = "" ,
159
- comments = "" ,
160
- content_status = "" ,
161
- created = "2020-07-14T08:14:00+00:00" ,
162
- identifier = "" ,
163
- keywords = "" ,
164
- language = "" ,
165
- last_modified_by = "Saha, Anirban" ,
166
- last_printed = None ,
167
- modified = "2020-07-14T08:16:00+00:00" ,
168
- revision = 1 ,
169
- subject = "" ,
170
- title = "" ,
171
- version = "" ,
172
- ) ,
156
+ "docx" : {
157
+ " author" : "Saha, Anirban" ,
158
+ " category" : "" ,
159
+ " comments" : "" ,
160
+ " content_status" : "" ,
161
+ " created" : "2020-07-14T08:14:00+00:00" ,
162
+ " identifier" : "" ,
163
+ " keywords" : "" ,
164
+ " language" : "" ,
165
+ " last_modified_by" : "Saha, Anirban" ,
166
+ " last_printed" : None ,
167
+ " modified" : "2020-07-14T08:16:00+00:00" ,
168
+ " revision" : 1 ,
169
+ " subject" : "" ,
170
+ " title" : "" ,
171
+ " version" : "" ,
172
+ } ,
173
173
}
174
174
# let's now detect that the table markdown is correctly added and that order of elements is correct
175
175
content_parts = docs [0 ].content .split ("\n \n " )
@@ -193,23 +193,23 @@ def test_run_with_store_full_path_false(self, test_files_path):
193
193
assert docs [0 ].meta .keys () == {"file_path" , "docx" }
194
194
assert docs [0 ].meta == {
195
195
"file_path" : "sample_docx_1.docx" ,
196
- "docx" : DOCXMetadata (
197
- author = "Microsoft Office User" ,
198
- category = "" ,
199
- comments = "" ,
200
- content_status = "" ,
201
- created = "2024-06-09T21:17:00+00:00" ,
202
- identifier = "" ,
203
- keywords = "" ,
204
- language = "" ,
205
- last_modified_by = "Carlos Fernández Lorán" ,
206
- last_printed = None ,
207
- modified = "2024-06-09T21:27:00+00:00" ,
208
- revision = 2 ,
209
- subject = "" ,
210
- title = "" ,
211
- version = "" ,
212
- ) ,
196
+ "docx" : {
197
+ " author" : "Microsoft Office User" ,
198
+ " category" : "" ,
199
+ " comments" : "" ,
200
+ " content_status" : "" ,
201
+ " created" : "2024-06-09T21:17:00+00:00" ,
202
+ " identifier" : "" ,
203
+ " keywords" : "" ,
204
+ " language" : "" ,
205
+ " last_modified_by" : "Carlos Fernández Lorán" ,
206
+ " last_printed" : None ,
207
+ " modified" : "2024-06-09T21:27:00+00:00" ,
208
+ " revision" : 2 ,
209
+ " subject" : "" ,
210
+ " title" : "" ,
211
+ " version" : "" ,
212
+ } ,
213
213
}
214
214
215
215
@pytest .mark .parametrize ("table_format" , ["markdown" , "csv" ])
@@ -285,23 +285,23 @@ def test_run_with_additional_meta(self, test_files_path, docx_converter):
285
285
doc = output ["documents" ][0 ]
286
286
assert doc .meta == {
287
287
"file_path" : os .path .basename (paths [0 ]),
288
- "docx" : DOCXMetadata (
289
- author = "Microsoft Office User" ,
290
- category = "" ,
291
- comments = "" ,
292
- content_status = "" ,
293
- created = "2024-06-09T21:17:00+00:00" ,
294
- identifier = "" ,
295
- keywords = "" ,
296
- language = "" ,
297
- last_modified_by = "Carlos Fernández Lorán" ,
298
- last_printed = None ,
299
- modified = "2024-06-09T21:27:00+00:00" ,
300
- revision = 2 ,
301
- subject = "" ,
302
- title = "" ,
303
- version = "" ,
304
- ) ,
288
+ "docx" : {
289
+ " author" : "Microsoft Office User" ,
290
+ " category" : "" ,
291
+ " comments" : "" ,
292
+ " content_status" : "" ,
293
+ " created" : "2024-06-09T21:17:00+00:00" ,
294
+ " identifier" : "" ,
295
+ " keywords" : "" ,
296
+ " language" : "" ,
297
+ " last_modified_by" : "Carlos Fernández Lorán" ,
298
+ " last_printed" : None ,
299
+ " modified" : "2024-06-09T21:27:00+00:00" ,
300
+ " revision" : 2 ,
301
+ " subject" : "" ,
302
+ " title" : "" ,
303
+ " version" : "" ,
304
+ } ,
305
305
"language" : "it" ,
306
306
"author" : "test_author" ,
307
307
}
0 commit comments