@@ -265,8 +265,9 @@ def fetch(self, filename, query):
265
265
print ('Loaded ' + str (offset + batch_size ) + ' entries. Please wait...' )
266
266
sparql .setQuery (query + ' LIMIT %s OFFSET %s' % (batch_size , offset ))
267
267
data = sparql .query ().convert ()
268
- if len (data ) > 0 :
269
- all_results .extend (data )
268
+ if len (data ['results' ]['bindings' ]) > 0 :
269
+ print (len (data ['results' ]['bindings' ]))
270
+ all_results .extend (data ['results' ]['bindings' ])
270
271
json .dump (data , open (filename , 'a' , encoding = 'utf-8' ))
271
272
offset += batch_size
272
273
else :
@@ -385,60 +386,59 @@ def update_churches(self, data):
385
386
print ('\n Finished' )
386
387
387
388
def update_dioceses (self , data ):
388
- if 'results' in data .keys () and 'bindings' in data ['results' ].keys ():
389
- t = len (data ['results' ]['bindings' ])
390
- print (t , 'dioceses loaded' )
391
- i = 0
392
- j = 0
393
- for item in data ['results' ]['bindings' ]:
394
- i += 1
395
- if i % DB .commit_frequency == 0 :
396
- DB .session .commit ()
397
- wikidata_id = int (item ['dioceses' ]['value' ].split ('/' )[- 1 ].replace ('Q' , '' ))
398
- modified = item ['modified' ]['value' ].replace ('T' , ' ' ).replace ('Z' , '' )
399
- if wikidata_id in self .cache_dioceses and self .cache_dioceses [wikidata_id ] == modified :
400
- print ('(%s/%s) Q%s' % (i , t , wikidata_id ), '-> continue' , end = ' \r ' )
401
- continue
402
- gcatholic_id = Query .get_value (item , 'P8389' )
403
- if not gcatholic_id :
404
- continue
405
- print ('(%s/%s) Q%s' % (i , t , wikidata_id ), '-> update' , end = ' \r ' )
406
- type_ = Query .get_wikidata_id (item , 'P31' )
407
- if not type_ or int (type_ ) not in Query .dioceses_types :
408
- continue # ignore item FIXME we may want to delete if from the DB
409
- country_id = Query .get_wikidata_id (item , 'P17' )
410
- website = Query .get_decoded_value (item , 'P856' , '' )
411
- label_fr = item ['label_fr' ]['value' ] if 'label_fr' in item .keys () else item ['label_en' ]['value' ] if 'label_en' in item .keys () else ''
412
-
413
- # dirty hack so that Annecy appears in France and not in Switzerland
414
- if wikidata_id == 866863 : # Annecy
415
- country_id = 142 # France
416
-
417
- if country_id and country_id not in self .cache_places :
418
- self .add_place (country_id , True )
419
-
420
- diocese = {
421
- 'name' : Query .ucfirst (label_fr ),
422
- 'country_id' : country_id ,
423
- 'gcatholic_id' : gcatholic_id ,
424
- 'website' : website ,
425
- 'updated_at' : datetime .datetime .strptime (modified , Query .dateformat ),
426
- }
427
-
428
- if wikidata_id in self .cache_dioceses :
429
- up = update (DB .dioceses , DB .dioceses .c .wikidata_id == wikidata_id )
430
- up = up .values (diocese )
431
- DB .session .execute (up )
432
- else :
433
- diocese ['wikidata_id' ] = wikidata_id
434
- diocese ['created_at' ] = DB .now
435
- ins = insert (DB .dioceses )
436
- ins = ins .values (diocese )
437
- DB .session .execute (ins )
438
-
439
- self .cache_dioceses [wikidata_id ] = modified
440
- DB .session .commit ()
441
- print ('\n Finished' )
389
+ t = len (data )
390
+ print (t , 'dioceses loaded' )
391
+ i = 0
392
+ j = 0
393
+ for item in data :
394
+ i += 1
395
+ if i % DB .commit_frequency == 0 :
396
+ DB .session .commit ()
397
+ wikidata_id = int (item ['dioceses' ]['value' ].split ('/' )[- 1 ].replace ('Q' , '' ))
398
+ modified = item ['modified' ]['value' ].replace ('T' , ' ' ).replace ('Z' , '' )
399
+ if wikidata_id in self .cache_dioceses and self .cache_dioceses [wikidata_id ] == modified :
400
+ print ('(%s/%s) Q%s' % (i , t , wikidata_id ), '-> continue' , end = ' \r ' )
401
+ continue
402
+ gcatholic_id = Query .get_value (item , 'P8389' )
403
+ if not gcatholic_id :
404
+ continue
405
+ print ('(%s/%s) Q%s' % (i , t , wikidata_id ), '-> update' , end = ' \r ' )
406
+ type_ = Query .get_wikidata_id (item , 'P31' )
407
+ if not type_ or int (type_ ) not in Query .dioceses_types :
408
+ continue # ignore item FIXME we may want to delete if from the DB
409
+ country_id = Query .get_wikidata_id (item , 'P17' )
410
+ website = Query .get_decoded_value (item , 'P856' , '' )
411
+ label_fr = item ['label_fr' ]['value' ] if 'label_fr' in item .keys () else item ['label_en' ]['value' ] if 'label_en' in item .keys () else ''
412
+
413
+ # dirty hack so that Annecy appears in France and not in Switzerland
414
+ if wikidata_id == 866863 : # Annecy
415
+ country_id = 142 # France
416
+
417
+ if country_id and country_id not in self .cache_places :
418
+ self .add_place (country_id , True )
419
+
420
+ diocese = {
421
+ 'name' : Query .ucfirst (label_fr ),
422
+ 'country_id' : country_id ,
423
+ 'gcatholic_id' : gcatholic_id ,
424
+ 'website' : website ,
425
+ 'updated_at' : datetime .datetime .strptime (modified , Query .dateformat ),
426
+ }
427
+
428
+ if wikidata_id in self .cache_dioceses :
429
+ up = update (DB .dioceses , DB .dioceses .c .wikidata_id == wikidata_id )
430
+ up = up .values (diocese )
431
+ DB .session .execute (up )
432
+ else :
433
+ diocese ['wikidata_id' ] = wikidata_id
434
+ diocese ['created_at' ] = DB .now
435
+ ins = insert (DB .dioceses )
436
+ ins = ins .values (diocese )
437
+ DB .session .execute (ins )
438
+
439
+ self .cache_dioceses [wikidata_id ] = modified
440
+ DB .session .commit ()
441
+ print ('\n Finished' )
442
442
443
443
def update_parishes (self , data ):
444
444
if 'results' in data .keys () and 'bindings' in data ['results' ].keys ():
0 commit comments