2727from collections import namedtuple
2828import functools
2929import logging
30+ import os
3031
3132from commoncode import fileutils
3233from commoncode import filetype
3334import typecode
3435
35- import extractcode
36-
3736from extractcode import all_kinds
3837from extractcode import regular
3938from extractcode import package
4645from extractcode import patch
4746from extractcode import sevenzip
4847from extractcode import libarchive2
48+ from extractcode import extracted_files
4949from extractcode .uncompress import uncompress_gzip
5050from extractcode .uncompress import uncompress_bzip2
5151
5252
53+ logger = logging .getLogger (__name__ )
54+ DEBUG = True
55+ DEBUG_DEEP = False
56+ # import sys
57+ # logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
58+ # logger.setLevel(logging.DEBUG)
59+
60+
61+
5362"""
5463Archive formats handling. The purpose of this module is to select an extractor
5564suitable for the accurate extraction of a given kind of archive. An extractor is
98107extract_ishield = sevenzip .extract
99108extract_Z = sevenzip .extract
100109
101- DEBUG = False
102- logger = logging .getLogger (__name__ )
103- # import sys
104- # logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
105- # logger.setLevel(logging.DEBUG)
106-
107110
108111Handler = namedtuple ('Handler' , ['name' , 'types' , 'mimes' , 'exts' , 'kind' , 'extractors' ])
109112
@@ -112,7 +115,6 @@ def can_extract(location):
112115 """
113116 Return True if this location can be extracted by some handler.
114117 """
115- assert location
116118 handlers = list (get_handlers (location ))
117119 if handlers :
118120 return True
@@ -123,8 +125,7 @@ def should_extract(location, kinds):
123125 Return True if this location should be extracted based on the provided
124126 kinds
125127 """
126- assert location
127- assert kinds
128+ location = os .path .abspath (os .path .expanduser (location ))
128129 if get_extractor (location , kinds ):
129130 return True
130131
@@ -135,7 +136,7 @@ def get_extractor(location, kinds=all_kinds):
135136 an None if no extract function is found.
136137 """
137138 assert location
138- assert kinds
139+ location = os . path . abspath ( os . path . expanduser ( location ))
139140 extractors = get_extractors (location , kinds )
140141 if not extractors :
141142 return None
@@ -157,14 +158,15 @@ def get_extractors(location, kinds=all_kinds):
157158 Return a list of extractors that can extract the file at
158159 location or an empty list.
159160 """
161+ location = os .path .abspath (os .path .expanduser (location ))
160162 if filetype .is_file (location ):
161- handlers = get_handlers (location )
163+ handlers = list ( get_handlers (location ) )
162164 if handlers :
163165 candidates = score_handlers (handlers )
164- if candidates :
165- best = pick_best_handler (candidates , kinds )
166- if best :
167- return best .extractors
166+ if candidates :
167+ best = pick_best_handler (candidates , kinds )
168+ if best :
169+ return best .extractors
168170 return []
169171
170172
@@ -174,7 +176,6 @@ def get_handlers(location):
174176 extension_matched,) for this `location`.
175177 """
176178 if filetype .is_file (location ):
177-
178179 T = typecode .contenttype .get_type (location )
179180 ftype = T .filetype_file .lower ()
180181 mtype = T .mimetype_file
@@ -199,13 +200,13 @@ def get_handlers(location):
199200 if handler .exts :
200201 extension_matched = location .lower ().endswith (handler .exts )
201202
202- if DEBUG :
203+ if DEBUG_DEEP :
203204 logger .debug ('get_handlers: %(location)s: ftype: %(ftype)s, mtype: %(mtype)s ' % locals ())
204205 logger .debug ('get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals ())
205206
206207
207208 if type_matched or mime_matched or extension_matched :
208- if DEBUG :
209+ if DEBUG_DEEP :
209210 logger .debug ('get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals ())
210211 logger .debug ('get_handlers: %(location)s: handler: %(handler)r' % locals ())
211212 yield handler , type_matched , mime_matched , extension_matched
@@ -307,21 +308,33 @@ def extract_twice(location, target_dir, extractor1, extractor2):
307308 the `extractor1` function to a temporary directory then the `extractor2`
308309 function on the extracted payload of `extractor1`.
309310
310- Return a mapping of path->warning_message .
311+ Return a list of warning messages. Raise exceptions on errors .
311312
312313 Typical nested archives include compressed tarballs and RPMs (containing a
313314 compressed cpio).
315+
316+ Note: it would be easy to support deeper extractor chains, but this gets
317+ hard to trace and debug very quickly. A depth of two is simple and sane and
318+ covers most common cases.
314319 """
320+ abs_location = os .path .abspath (os .path .expanduser (location ))
321+ abs_target_dir = os .path .abspath (os .path .expanduser (target_dir ))
315322 # extract first the intermediate payload to a temp dir
316323 temp_target = fileutils .get_temp_dir ('extract' )
317- warnings = extractor1 (location , temp_target )
324+ warnings = extractor1 (abs_location , temp_target )
325+ if DEBUG :
326+ logger .debug ('extract_twice: temp_target: %(temp_target)r' % locals ())
318327
319328 # extract this intermediate payload to the final target_dir
320329 try :
321- for extracted1_loc in extractcode .extracted_files (temp_target ):
322- warnings .extend (extractor2 (extracted1_loc , target_dir ))
330+ inner_archives = list (extracted_files (temp_target ))
331+ if not inner_archives :
332+ warnings .append (location + ': No files found in archive.' )
323333 else :
324- warnings .append (location + ': No files found in archive.' )
334+ for extracted1_loc in inner_archives :
335+ if DEBUG :
336+ logger .debug ('extract_twice: extractor2: %(extracted1_loc)r' % locals ())
337+ warnings .extend (extractor2 (extracted1_loc , target_dir ))
325338 finally :
326339 # cleanup the temporary output from extractor1
327340 fileutils .delete (temp_target )
0 commit comments