Skip to content

Commit f0c16da

Browse files
committed
Merge, support unix filename pattern matching on zip archive
1 parent c017e1b commit f0c16da

File tree

3 files changed

+20
-9
lines changed

3 files changed

+20
-9
lines changed

analysers/Analyser_Merge.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import tempfile
3535
import json
3636
import re
37+
import fnmatch
3738
from typing import Optional, Dict, Union, Callable
3839
from collections import defaultdict
3940
from .Analyser_Osmosis import Analyser_Osmosis
@@ -322,7 +323,7 @@ def __init__(self, attribution = None, millesime = None, encoding = "utf-8", fil
322323
@param file: file name in storage
323324
@param urlFile: remote URL of source file
324325
@param fileUrlCache: days for file in cache
325-
@param zip: extract file from zip
326+
@param zip: extract a file from zip. Unix filename pattern matching.
326327
@param extract: extract file from any archive format
327328
@param gzip: uncompress from bz2
328329
@param gzip: uncompress from gzip
@@ -350,13 +351,23 @@ def __init__(self, attribution = None, millesime = None, encoding = "utf-8", fil
350351
if self.attribution and "{0}" in self.attribution:
351352
self.attribution_re = re.compile(self.attribution.replace("{0}", ".*"))
352353

354+
def zipFile(self):
355+
if self.file:
356+
f = open(self.file, 'rb')
357+
elif self.fileUrl:
358+
f = downloader.urlopen(self.fileUrl, self.fileUrlCache, mode='rb')
359+
360+
z = zipfile.ZipFile(f, 'r')
361+
print(z.namelist())
362+
filename = next(filter(lambda zipinfo: fnmatch.fnmatch(zipinfo.filename, self.zip), z.infolist()))
363+
return filename
364+
353365
def time(self):
354366
if self.file:
355367
return int(os.path.getmtime(self.file)+.5)
356368
elif self.fileUrl:
357-
if self.zip:
358-
f = downloader.urlopen(self.fileUrl, self.fileUrlCache, mode='rb')
359-
date_time = zipfile.ZipFile(f, 'r').getinfo(self.zip).date_time
369+
if self.zipFile():
370+
date_time = self.zipFile().date_time
360371
return int(time.mktime(date_time + (0, 0, -1))+.5)
361372
else:
362373
return int(downloader.urlmtime(self.fileUrl, self.fileUrlCache)+.5)
@@ -374,8 +385,8 @@ def open(self, binary = False):
374385
elif self.fileUrl:
375386
f = downloader.urlopen(self.fileUrl, self.fileUrlCache, mode='rb')
376387

377-
if self.zip:
378-
z = zipfile.ZipFile(f, 'r').open(self.zip)
388+
if self.zipFile():
389+
z = zipfile.ZipFile(f, 'r').open(self.zipFile().filename)
379390
f = io.BytesIO(z.read())
380391
f.seek(0)
381392
elif self.extract:
@@ -689,7 +700,7 @@ def import_(self, table, srid, osmosis):
689700
self.source.encoding,
690701
srid,
691702
tmp_file.name,
692-
self.source.zip,
703+
self.source.zipFile().filename,
693704
table,
694705
tmp_file.name
695706
)

analysers/analyser_merge_public_transport_FR_idfm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def __init__(self, config, logger, clas, conflationDistance, select, osmTags, de
3838
"Référentiel des arrêts : fichiers SIG",
3939
SHP(Source(attribution = 'Île-de-France Mobilités', millesime = '03/2022',
4040
fileUrl = 'https://eu.ftp.opendatasoft.com/stif/Reflex/REF_ZDE.zip',
41-
zip = 'PT_ZDE_R_02_03_2022.shp')),
41+
zip = '*.shp')),
4242
LoadGeomCentroid(srid = 2154,
4343
select = {"type_arret": select}),
4444
Conflate(

doc/4-Merge.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ Remote files are fetched and saved in the Osmose-QA Backend cache, the delay can
138138
The remote file could be compressed or an archive:
139139
- `bz2` (boolean): the content is compressed in BZip2 format.
140140
- `gzip` (boolean): the content is compressed in GZip format.
141-
- `zip`: the remote URL is a ZIP archive and the data is at this path inside the archive.
141+
- `zip`: the remote URL is a ZIP archive and the data is at this path inside the archive. Path can use unix filename pattern matching.
142142
- `extract`: same as `zip`, but for all archive formats.
143143

144144
Assuming the resource is a text file:

0 commit comments

Comments
 (0)