Skip to content

Commit 755ead9

Browse files
committed
Merge pull request #32 from pombredanne/issue_31
Issue 31
2 parents 614c438 + 8d41972 commit 755ead9

File tree

13 files changed

+296
-70
lines changed

13 files changed

+296
-70
lines changed

src/commoncode/command.py

+3
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@
5656
"""
5757

5858
logger = logging.getLogger(__name__)
59+
# import sys
60+
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
61+
# logger.setLevel(logging.DEBUG)
5962

6063
# current directory is the root dir of this library
6164
curr_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

src/extractcode/__init__.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
from __future__ import print_function, absolute_import
2626

27+
import logging
2728
import os
2829
import posixpath
2930
import re
@@ -32,6 +33,13 @@
3233
from commoncode import fileutils
3334

3435

36+
logger = logging.getLogger(__name__)
37+
DEBUG = False
38+
# import sys
39+
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
40+
# logger.setLevel(logging.DEBUG)
41+
42+
3543
root_dir = os.path.join(os.path.dirname(__file__), 'bin')
3644

3745
# Suffix added to extracted target_dir paths
@@ -134,9 +142,8 @@ def extracted_files(location):
134142
Yield the locations of extracted files in a directory location.
135143
"""
136144
assert location
137-
for top, _dirs, files in os.walk(location, topdown=True):
138-
for f in files:
139-
yield os.path.join(top, f)
145+
logger.debug('extracted_files for: %(location)r' % locals())
146+
return fileutils.file_iter(location)
140147

141148

142149
def new_name(location, is_dir=False):

src/extractcode/archive.py

+38-25
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,12 @@
2727
from collections import namedtuple
2828
import functools
2929
import logging
30+
import os
3031

3132
from commoncode import fileutils
3233
from commoncode import filetype
3334
import typecode
3435

35-
import extractcode
36-
3736
from extractcode import all_kinds
3837
from extractcode import regular
3938
from extractcode import package
@@ -46,10 +45,20 @@
4645
from extractcode import patch
4746
from extractcode import sevenzip
4847
from extractcode import libarchive2
48+
from extractcode import extracted_files
4949
from extractcode.uncompress import uncompress_gzip
5050
from extractcode.uncompress import uncompress_bzip2
5151

5252

53+
logger = logging.getLogger(__name__)
54+
DEBUG = True
55+
DEBUG_DEEP = False
56+
# import sys
57+
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
58+
# logger.setLevel(logging.DEBUG)
59+
60+
61+
5362
"""
5463
Archive formats handling. The purpose of this module is to select an extractor
5564
suitable for the accurate extraction of a given kind of archive. An extractor is
@@ -98,12 +107,6 @@
98107
extract_ishield = sevenzip.extract
99108
extract_Z = sevenzip.extract
100109

101-
DEBUG = False
102-
logger = logging.getLogger(__name__)
103-
# import sys
104-
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
105-
# logger.setLevel(logging.DEBUG)
106-
107110

108111
Handler = namedtuple('Handler', ['name', 'types', 'mimes', 'exts', 'kind', 'extractors'])
109112

@@ -112,7 +115,6 @@ def can_extract(location):
112115
"""
113116
Return True if this location can be extracted by some handler.
114117
"""
115-
assert location
116118
handlers = list(get_handlers(location))
117119
if handlers:
118120
return True
@@ -123,8 +125,7 @@ def should_extract(location, kinds):
123125
Return True if this location should be extracted based on the provided
124126
kinds
125127
"""
126-
assert location
127-
assert kinds
128+
location = os.path.abspath(os.path.expanduser(location))
128129
if get_extractor(location, kinds):
129130
return True
130131

@@ -135,7 +136,7 @@ def get_extractor(location, kinds=all_kinds):
135136
an None if no extract function is found.
136137
"""
137138
assert location
138-
assert kinds
139+
location = os.path.abspath(os.path.expanduser(location))
139140
extractors = get_extractors(location, kinds)
140141
if not extractors:
141142
return None
@@ -157,14 +158,15 @@ def get_extractors(location, kinds=all_kinds):
157158
Return a list of extractors that can extract the file at
158159
location or an empty list.
159160
"""
161+
location = os.path.abspath(os.path.expanduser(location))
160162
if filetype.is_file(location):
161-
handlers = get_handlers(location)
163+
handlers = list(get_handlers(location))
162164
if handlers:
163165
candidates = score_handlers(handlers)
164-
if candidates:
165-
best = pick_best_handler(candidates, kinds)
166-
if best:
167-
return best.extractors
166+
if candidates:
167+
best = pick_best_handler(candidates, kinds)
168+
if best:
169+
return best.extractors
168170
return []
169171

170172

@@ -174,7 +176,6 @@ def get_handlers(location):
174176
extension_matched,) for this `location`.
175177
"""
176178
if filetype.is_file(location):
177-
178179
T = typecode.contenttype.get_type(location)
179180
ftype = T.filetype_file.lower()
180181
mtype = T.mimetype_file
@@ -199,13 +200,13 @@ def get_handlers(location):
199200
if handler.exts:
200201
extension_matched = location.lower().endswith(handler.exts)
201202

202-
if DEBUG:
203+
if DEBUG_DEEP:
203204
logger.debug('get_handlers: %(location)s: ftype: %(ftype)s, mtype: %(mtype)s ' % locals())
204205
logger.debug('get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals())
205206

206207

207208
if type_matched or mime_matched or extension_matched:
208-
if DEBUG:
209+
if DEBUG_DEEP:
209210
logger.debug('get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals())
210211
logger.debug('get_handlers: %(location)s: handler: %(handler)r' % locals())
211212
yield handler, type_matched, mime_matched, extension_matched
@@ -307,21 +308,33 @@ def extract_twice(location, target_dir, extractor1, extractor2):
307308
the `extractor1` function to a temporary directory then the `extractor2`
308309
function on the extracted payload of `extractor1`.
309310
310-
Return a mapping of path->warning_message.
311+
Return a list of warning messages. Raise exceptions on errors.
311312
312313
Typical nested archives include compressed tarballs and RPMs (containing a
313314
compressed cpio).
315+
316+
Note: it would be easy to support deeper extractor chains, but this gets
317+
hard to trace and debug very quickly. A depth of two is simple and sane and
318+
covers most common cases.
314319
"""
320+
abs_location = os.path.abspath(os.path.expanduser(location))
321+
abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))
315322
# extract first the intermediate payload to a temp dir
316323
temp_target = fileutils.get_temp_dir('extract')
317-
warnings = extractor1(location, temp_target)
324+
warnings = extractor1(abs_location, temp_target)
325+
if DEBUG:
326+
logger.debug('extract_twice: temp_target: %(temp_target)r' % locals())
318327

319328
# extract this intermediate payload to the final target_dir
320329
try:
321-
for extracted1_loc in extractcode.extracted_files(temp_target):
322-
warnings.extend(extractor2(extracted1_loc, target_dir))
330+
inner_archives = list(extracted_files(temp_target))
331+
if not inner_archives:
332+
warnings.append(location + ': No files found in archive.')
323333
else:
324-
warnings.append(location+ ': No files found in archive.')
334+
for extracted1_loc in inner_archives:
335+
if DEBUG:
336+
logger.debug('extract_twice: extractor2: %(extracted1_loc)r' % locals())
337+
warnings.extend(extractor2(extracted1_loc, target_dir))
325338
finally:
326339
# cleanup the temporary output from extractor1
327340
fileutils.delete(temp_target)

src/extractcode/extract.py

+18-14
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
from collections import namedtuple
2828
from functools import partial
2929
import logging
30+
from os.path import abspath
31+
from os.path import expanduser
3032
from os.path import join
3133

3234
from commoncode import fileutils
@@ -117,11 +119,11 @@ def extract(location, kinds=extractcode.default_kinds, recurse=False):
117119
"""
118120
ignored = partial(ignore.is_ignored, ignores=ignore.default_ignores, unignores={})
119121
if DEBUG:
120-
logger.debug('extract:start:' + location + ' recurse:' + repr(recurse) + '\n')
121-
122-
for top, dirs, files in fileutils.walk(location, ignored):
122+
logger.debug('extract:start: %(location)r recurse: %(recurse)r\n' % locals())
123+
abs_location = abspath(expanduser(location))
124+
for top, dirs, files in fileutils.walk(abs_location, ignored):
123125
if DEBUG:
124-
logger.debug('extract:walk: top:' + top + ' dirs:' + repr(dirs) + ' files:' + repr(files))
126+
logger.debug('extract:walk: top: %(top)r dirs: %(dirs)r files: r(files)r' % locals())
125127

126128
if not recurse:
127129
if DEBUG:
@@ -135,26 +137,28 @@ def extract(location, kinds=extractcode.default_kinds, recurse=False):
135137
loc = join(top, f)
136138
if not recurse and extractcode.is_extraction_path(loc):
137139
if DEBUG:
138-
logger.debug('extract:walk not recurse: skipped file:' + loc)
140+
logger.debug('extract:walk not recurse: skipped file: %(loc)r' % locals())
139141
continue
140142

141143
if not archive.should_extract(loc, kinds):
142144
if DEBUG:
143-
logger.debug('extract:walk: skipped file: not should_extract:' + loc)
145+
logger.debug('extract:walk: skipped file: not should_extract: %(loc)r' % locals())
144146
continue
145147

146-
target = join(top, extractcode.get_extraction_path(loc))
148+
target = join(abspath(top), extractcode.get_extraction_path(loc))
149+
if DEBUG:
150+
logger.debug('extract:target: %(target)r' % locals())
147151
for xevent in extract_file(loc, target, kinds):
148152
if DEBUG:
149-
logger.debug('extract:walk:extraction event:' + repr(xevent))
153+
logger.debug('extract:walk:extraction event: %(xevent)r' % locals())
150154
yield xevent
151155

152156
if recurse:
153157
if DEBUG:
154-
logger.debug('extract:walk: recursing on:' + target)
158+
logger.debug('extract:walk: recursing on target: %(target)r' % locals())
155159
for xevent in extract(target, kinds, recurse):
156160
if DEBUG:
157-
logger.debug('extract:walk:recurse:extraction event:' + repr(xevent))
161+
logger.debug('extract:walk:recurse:extraction event: %(xevent)r' % locals())
158162
yield xevent
159163

160164

@@ -167,8 +171,7 @@ def extract_file(location, target, kinds=extractcode.default_kinds):
167171
errors = []
168172
extractor = archive.get_extractor(location, kinds)
169173
if DEBUG:
170-
logger.debug('extract_file: extractor: for:' + location
171-
+ ' with kinds: ' + repr(kinds) + ': '
174+
logger.debug('extract_file: extractor: for: %(location)r with kinds: r(kinds)r : ' % locals()
172175
+ getattr(extractor, '__module__', '')
173176
+ '.' + getattr(extractor, '__name__', ''))
174177
if extractor:
@@ -178,12 +181,13 @@ def extract_file(location, target, kinds=extractcode.default_kinds):
178181
# if there is an error, the extracted files will not be moved
179182
# to target
180183
tmp_tgt = fileutils.get_temp_dir('extract')
181-
warnings.extend(extractor(location, tmp_tgt))
184+
abs_location= abspath(expanduser(location))
185+
warnings.extend(extractor(abs_location, tmp_tgt))
182186
fileutils.copytree(tmp_tgt, target)
183187
fileutils.delete(tmp_tgt)
184188
except Exception, e:
185189
if DEBUG:
186-
logger.debug('extract_file: ERROR: %(errors)r, %(e)r.\n' % locals())
190+
logger.debug('extract_file: ERROR: %(location)r: %(errors)r, %(e)r.\n' % locals())
187191
errors = [str(e).strip(' \'"')]
188192
finally:
189193
yield ExtractEvent(location, target, done=True, warnings=warnings, errors=errors)

src/extractcode/libarchive2.py

+9-16
Original file line numberDiff line numberDiff line change
@@ -101,13 +101,14 @@ def extract(location, target_dir):
101101
"""
102102
assert location
103103
assert target_dir
104-
104+
abs_location = os.path.abspath(os.path.expanduser(location))
105+
abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))
105106
warnings = []
106107

107-
for entry in list_entries(location):
108+
for entry in list_entries(abs_location):
108109
if not (entry.isdir or entry.isfile):
109110
continue
110-
_target_path = entry.write(target_dir, transform_path=paths.resolve)
111+
_target_path = entry.write(abs_target_dir, transform_path=paths.resolve)
111112
if entry.warnings:
112113
msgs = [w.strip('"\' ') for w in entry.warnings if w.strip('"\' ')]
113114
msgs = msgs or ['No message provided']
@@ -121,10 +122,12 @@ def list_entries(location):
121122
"""
122123
Return a list entries of archive file at `location`.
123124
"""
124-
assert os.path.exists(location)
125-
assert os.path.isfile(location)
125+
assert location
126+
abs_location = os.path.abspath(os.path.expanduser(location))
127+
assert os.path.isfile(abs_location)
128+
126129
# TODO: harden error handling
127-
with Archive(location) as archive:
130+
with Archive(abs_location) as archive:
128131
for entry in archive:
129132
yield entry
130133

@@ -314,16 +317,6 @@ def write(self, target_dir, transform_path=lambda x: x):
314317
self.warnings.append(msg)
315318
return target_path
316319

317-
# except ArchiveError, ae:
318-
# if ae.msg and ae.msg.startswith('Encrypted file is unsupported'):
319-
# raise ArchiveErrorPasswordProtected(root_ex=ae)
320-
# else:
321-
# raise
322-
#
323-
# except Exception, e:
324-
# raise
325-
# raise ArchiveError(root_ex=e)
326-
327320
def __repr__(self):
328321
return ('Entry('
329322
'path=%(path)r,'

0 commit comments

Comments
 (0)