Skip to content

Commit 76af3be

Browse files
committed
Merge remote-tracking branch 'frodrigo/master'
* frodrigo/master: Merge, support unix filename pattern matching on zip archive Disable FR_CAPP merge, no more data source Disable analyser_merge_tmc_point_FR, no more data source Data schema update in analyser_merge_post_box_FR (changing column names every month, that fine) Update data source in analyser_merge_healthcare_FR_finess Use std source function in analyser_merge_geodesie_support_FR
2 parents 5f3c347 + f0c16da commit 76af3be

12 files changed

+24
-14
lines changed

analysers/Analyser_Merge.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import tempfile
3535
import json
3636
import re
37+
import fnmatch
3738
from typing import Optional, Dict, Union, Callable
3839
from collections import defaultdict
3940
from .Analyser_Osmosis import Analyser_Osmosis
@@ -322,7 +323,7 @@ def __init__(self, attribution = None, millesime = None, encoding = "utf-8", fil
322323
@param file: file name in storage
323324
@param urlFile: remote URL of source file
324325
@param fileUrlCache: days for file in cache
325-
@param zip: extract file from zip
326+
@param zip: extract a file from zip. Unix filename pattern matching.
326327
@param extract: extract file from any archive format
327328
@param gzip: uncompress from bz2
328329
@param gzip: uncompress from gzip
@@ -350,13 +351,23 @@ def __init__(self, attribution = None, millesime = None, encoding = "utf-8", fil
350351
if self.attribution and "{0}" in self.attribution:
351352
self.attribution_re = re.compile(self.attribution.replace("{0}", ".*"))
352353

354+
def zipFile(self):
355+
if self.file:
356+
f = open(self.file, 'rb')
357+
elif self.fileUrl:
358+
f = downloader.urlopen(self.fileUrl, self.fileUrlCache, mode='rb')
359+
360+
z = zipfile.ZipFile(f, 'r')
361+
print(z.namelist())
362+
filename = next(filter(lambda zipinfo: fnmatch.fnmatch(zipinfo.filename, self.zip), z.infolist()))
363+
return filename
364+
353365
def time(self):
354366
if self.file:
355367
return int(os.path.getmtime(self.file)+.5)
356368
elif self.fileUrl:
357-
if self.zip:
358-
f = downloader.urlopen(self.fileUrl, self.fileUrlCache, mode='rb')
359-
date_time = zipfile.ZipFile(f, 'r').getinfo(self.zip).date_time
369+
if self.zipFile():
370+
date_time = self.zipFile().date_time
360371
return int(time.mktime(date_time + (0, 0, -1))+.5)
361372
else:
362373
return int(downloader.urlmtime(self.fileUrl, self.fileUrlCache)+.5)
@@ -374,8 +385,8 @@ def open(self, binary = False):
374385
elif self.fileUrl:
375386
f = downloader.urlopen(self.fileUrl, self.fileUrlCache, mode='rb')
376387

377-
if self.zip:
378-
z = zipfile.ZipFile(f, 'r').open(self.zip)
388+
if self.zipFile():
389+
z = zipfile.ZipFile(f, 'r').open(self.zipFile().filename)
379390
f = io.BytesIO(z.read())
380391
f.seek(0)
381392
elif self.extract:
@@ -689,7 +700,7 @@ def import_(self, table, srid, osmosis):
689700
self.source.encoding,
690701
srid,
691702
tmp_file.name,
692-
self.source.zip,
703+
self.source.zipFile().filename,
693704
table,
694705
tmp_file.name
695706
)

analysers/analyser_merge_geodesie_support_FR.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,5 +72,5 @@ def __init__(self, config, error_file, logger, item, classs, level, topic, osmTa
7272
conflationDistance = 200,
7373
mapping = Mapping(
7474
static1 = dict(dict(**osmTags), **defaultTags),
75-
static2 = {"source": lambda a: a.parser.source.attribution},
75+
static2 = {"source": self.source},
7676
text = lambda tags, fields: {"en": fields["description"]} )))

analysers/analyser_merge_healthcare_FR_finess.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def __init__(self, config, error_file, logger, srid, is_in, categories, items, m
7878
attribution="Le ministère des solidarités et de la santé",
7979
encoding="ISO-8859-1",
8080
dataset="53699569a3a729239d2046eb",
81-
resource="2ce43ade-8d2c-4d1d-81da-ca06c82abc68")),
81+
resource="98f3161f-79ff-4f16-8f6a-6d571a80fea2")),
8282
Load("coordxet", "coordyet", srid = srid,
8383
select = {"categetab": categories},
8484
where = lambda res: is_in(res["departement"])),

analysers/analyser_merge_post_box_FR.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,5 +54,5 @@ def __init__(self, config, logger = None):
5454
"amenity": "post_box",
5555
"operator": "La Poste"},
5656
static2 = {"source": self.source},
57-
mapping1 = {"ref": "CO_EXT"},
58-
text = lambda tags, fields: {"en": ", ".join(filter(lambda x: x, [fields["NUM_VOIE"], fields["LB_CPL_NUM_ADR"], fields["LB_VOIE"], fields["CO_POS"], fields["LB_CMN"]]))} )))
57+
mapping1 = {"ref": "CO_MUP"},
58+
text = lambda tags, fields: {"en": ", ".join(filter(lambda x: x, [fields["VA_NO_VOIE"], fields["LB_EXTENSION"], fields["LB_VOIE_EXT"], fields["CO_POSTAL"], fields["LB_COM"]]))} )))

analysers/analyser_merge_public_transport_FR_idfm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def __init__(self, config, logger, clas, conflationDistance, select, osmTags, de
3838
"Référentiel des arrêts : fichiers SIG",
3939
SHP(Source(attribution = 'Île-de-France Mobilités', millesime = '03/2022',
4040
fileUrl = 'https://eu.ftp.opendatasoft.com/stif/Reflex/REF_ZDE.zip',
41-
zip = 'PT_ZDE_R_02_03_2022.shp')),
41+
zip = '*.shp')),
4242
LoadGeomCentroid(srid = 2154,
4343
select = {"type_arret": select}),
4444
Conflate(

0 commit comments

Comments
 (0)