27
27
from collections import namedtuple
28
28
import functools
29
29
import logging
30
+ import os
30
31
31
32
from commoncode import fileutils
32
33
from commoncode import filetype
33
34
import typecode
34
35
35
- import extractcode
36
-
37
36
from extractcode import all_kinds
38
37
from extractcode import regular
39
38
from extractcode import package
46
45
from extractcode import patch
47
46
from extractcode import sevenzip
48
47
from extractcode import libarchive2
48
+ from extractcode import extracted_files
49
49
from extractcode .uncompress import uncompress_gzip
50
50
from extractcode .uncompress import uncompress_bzip2
51
51
52
52
53
+ logger = logging .getLogger (__name__ )
54
+ DEBUG = True
55
+ DEBUG_DEEP = False
56
+ # import sys
57
+ # logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
58
+ # logger.setLevel(logging.DEBUG)
59
+
60
+
61
+
53
62
"""
54
63
Archive formats handling. The purpose of this module is to select an extractor
55
64
suitable for the accurate extraction of a given kind of archive. An extractor is
98
107
extract_ishield = sevenzip .extract
99
108
extract_Z = sevenzip .extract
100
109
101
- DEBUG = False
102
- logger = logging .getLogger (__name__ )
103
- # import sys
104
- # logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
105
- # logger.setLevel(logging.DEBUG)
106
-
107
110
108
111
Handler = namedtuple ('Handler' , ['name' , 'types' , 'mimes' , 'exts' , 'kind' , 'extractors' ])
109
112
@@ -112,7 +115,6 @@ def can_extract(location):
112
115
"""
113
116
Return True if this location can be extracted by some handler.
114
117
"""
115
- assert location
116
118
handlers = list (get_handlers (location ))
117
119
if handlers :
118
120
return True
@@ -123,8 +125,7 @@ def should_extract(location, kinds):
123
125
Return True if this location should be extracted based on the provided
124
126
kinds
125
127
"""
126
- assert location
127
- assert kinds
128
+ location = os .path .abspath (os .path .expanduser (location ))
128
129
if get_extractor (location , kinds ):
129
130
return True
130
131
@@ -135,7 +136,7 @@ def get_extractor(location, kinds=all_kinds):
135
136
an None if no extract function is found.
136
137
"""
137
138
assert location
138
- assert kinds
139
+ location = os . path . abspath ( os . path . expanduser ( location ))
139
140
extractors = get_extractors (location , kinds )
140
141
if not extractors :
141
142
return None
@@ -157,14 +158,15 @@ def get_extractors(location, kinds=all_kinds):
157
158
Return a list of extractors that can extract the file at
158
159
location or an empty list.
159
160
"""
161
+ location = os .path .abspath (os .path .expanduser (location ))
160
162
if filetype .is_file (location ):
161
- handlers = get_handlers (location )
163
+ handlers = list ( get_handlers (location ) )
162
164
if handlers :
163
165
candidates = score_handlers (handlers )
164
- if candidates :
165
- best = pick_best_handler (candidates , kinds )
166
- if best :
167
- return best .extractors
166
+ if candidates :
167
+ best = pick_best_handler (candidates , kinds )
168
+ if best :
169
+ return best .extractors
168
170
return []
169
171
170
172
@@ -174,7 +176,6 @@ def get_handlers(location):
174
176
extension_matched,) for this `location`.
175
177
"""
176
178
if filetype .is_file (location ):
177
-
178
179
T = typecode .contenttype .get_type (location )
179
180
ftype = T .filetype_file .lower ()
180
181
mtype = T .mimetype_file
@@ -199,13 +200,13 @@ def get_handlers(location):
199
200
if handler .exts :
200
201
extension_matched = location .lower ().endswith (handler .exts )
201
202
202
- if DEBUG :
203
+ if DEBUG_DEEP :
203
204
logger .debug ('get_handlers: %(location)s: ftype: %(ftype)s, mtype: %(mtype)s ' % locals ())
204
205
logger .debug ('get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals ())
205
206
206
207
207
208
if type_matched or mime_matched or extension_matched :
208
- if DEBUG :
209
+ if DEBUG_DEEP :
209
210
logger .debug ('get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals ())
210
211
logger .debug ('get_handlers: %(location)s: handler: %(handler)r' % locals ())
211
212
yield handler , type_matched , mime_matched , extension_matched
@@ -307,21 +308,33 @@ def extract_twice(location, target_dir, extractor1, extractor2):
307
308
the `extractor1` function to a temporary directory then the `extractor2`
308
309
function on the extracted payload of `extractor1`.
309
310
310
- Return a mapping of path->warning_message .
311
+ Return a list of warning messages. Raise exceptions on errors .
311
312
312
313
Typical nested archives include compressed tarballs and RPMs (containing a
313
314
compressed cpio).
315
+
316
+ Note: it would be easy to support deeper extractor chains, but this gets
317
+ hard to trace and debug very quickly. A depth of two is simple and sane and
318
+ covers most common cases.
314
319
"""
320
+ abs_location = os .path .abspath (os .path .expanduser (location ))
321
+ abs_target_dir = os .path .abspath (os .path .expanduser (target_dir ))
315
322
# extract first the intermediate payload to a temp dir
316
323
temp_target = fileutils .get_temp_dir ('extract' )
317
- warnings = extractor1 (location , temp_target )
324
+ warnings = extractor1 (abs_location , temp_target )
325
+ if DEBUG :
326
+ logger .debug ('extract_twice: temp_target: %(temp_target)r' % locals ())
318
327
319
328
# extract this intermediate payload to the final target_dir
320
329
try :
321
- for extracted1_loc in extractcode .extracted_files (temp_target ):
322
- warnings .extend (extractor2 (extracted1_loc , target_dir ))
330
+ inner_archives = list (extracted_files (temp_target ))
331
+ if not inner_archives :
332
+ warnings .append (location + ': No files found in archive.' )
323
333
else :
324
- warnings .append (location + ': No files found in archive.' )
334
+ for extracted1_loc in inner_archives :
335
+ if DEBUG :
336
+ logger .debug ('extract_twice: extractor2: %(extracted1_loc)r' % locals ())
337
+ warnings .extend (extractor2 (extracted1_loc , target_dir ))
325
338
finally :
326
339
# cleanup the temporary output from extractor1
327
340
fileutils .delete (temp_target )
0 commit comments