-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathembeds.py
36 lines (27 loc) · 991 Bytes
/
embeds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
'''
For each object, count the number of objects in which it is embedded.
Usage: bin/py embeds.py > embeds.jsonlines
'''
from elasticsearch import Elasticsearch
from elasticsearch.helpers import scan
import json
es = Elasticsearch('localhost:9200')
def embeds_uuid(es, uuid, item_type):
query = {
'query': {'terms': {'embedded_uuids': [uuid]}},
'aggregations': {
'item_type': {'terms': {'field': 'item_type'}},
},
}
res = es.search(index='encoded', search_type='count', body=query)
return {
'uuid': uuid,
'item_type': item_type,
'embeds': res['hits']['total']['value'],
'buckets': res['aggregations']['item_type']['buckets'],
}
uuid_type = [(hit['_id'], hit['_type']) for hit in scan(es, query={'fields': []})]
# rows = [embeds_uuid(es, uuid, item_type) for uuid, item_type in uuid_type]
for uuid, item_type in uuid_type:
data = embeds_uuid(es, uuid, item_type)
print(json.dumps(data))