-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrel_reduce.py
43 lines (35 loc) · 1.26 KB
/
rel_reduce.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import sys
grandparent = []
grandchild = []
cur_key = None
print('grandchild' + ' '*(16-len('grandchild')) + 'grandparent')
print('-' * 29)
for line in sys.stdin:
ss = line.strip().split('\t')
if len(ss) < 2:
continue
key = ss[0]
value = ss[1]
if cur_key == None:
cur_key = key
if cur_key != key:
for i in range(len(grandchild)):
for j in range(len(grandparent)):
# print('\t'.join([grandchild[i], grandparent[j]]))
print(grandchild[i] + ' '*(16-len(grandchild[i])) + grandparent[j])
cur_key = key
grandparent = []
grandchild = []
if value[0] == 'p':
if value[1:] not in grandparent:
grandparent.append(value[1:])
else:
if value[1:] not in grandchild:
grandchild.append(value[1:])
for i in range(len(grandchild)):
for j in range(len(grandparent)):
# print('\t'.join([grandchild[i], grandparent[j]]))
print(grandchild[i] + ' ' * (16 - len(grandchild[i])) + grandparent[j])
'''
hadoop jar /usr/local/hadoop/share/hadoop/tools/lib/hadoop-streaming-3.2.0.jar -file ./rel_map.py -file ./rel_reduce.py -input /relation -output /relation_output -mapper "python rel_map.py" -reducer "python rel_reduce.py"
'''