|
| 1 | +import os |
| 2 | +import logging |
| 3 | +from typing import List |
| 4 | + |
| 5 | +import pandas as pd |
| 6 | + |
| 7 | +logger = logging.getLogger(__name__) |
| 8 | + |
| 9 | + |
| 10 | +def generate_types(frame: pd.DataFrame, **kwargs) -> List[str]: |
| 11 | + ''' |
| 12 | + Given the pre-processed DataFrame from the schema |
| 13 | + strategy, generate types. |
| 14 | + ''' |
| 15 | + if frame is None: |
| 16 | + raise ValueError('frame') |
| 17 | + if 'column' not in frame: |
| 18 | + raise ValueError('column') |
| 19 | + if 'type' not in frame: |
| 20 | + raise ValueError('type') |
| 21 | + if 'table' not in frame: |
| 22 | + raise ValueError('table') |
| 23 | + if 'options' not in frame: |
| 24 | + raise ValueError('options') |
| 25 | + |
| 26 | + output_dir = kwargs.get('output_dir', '.') |
| 27 | + export_schema = kwargs.get('export_schema', False) |
| 28 | + export_file = kwargs.get('export_file', 'types.txt') |
| 29 | + console = kwargs.get('console', False) |
| 30 | + encoding = kwargs.get('encoding', 'utf-8') |
| 31 | + line_delimeter = kwargs.get('line_delimeter ', '\n') |
| 32 | + |
| 33 | + all_types: List[str] = [] |
| 34 | + all_types_reverse: List[str] = [] |
| 35 | + |
| 36 | + tables = frame.groupby(by=['table']) |
| 37 | + for name, current_frame in tables: |
| 38 | + logger.debug(f'Creating types for {name}') |
| 39 | + |
| 40 | + reverse_edge_mask = (~current_frame['options'].isnull()) & current_frame['options'].str.contains('@reverse') |
| 41 | + current_frame.loc[reverse_edge_mask, 'column'] = '<~' + current_frame['column'] + '>' |
| 42 | + |
| 43 | + type_builder = 'type ' + name |
| 44 | + type_builder += ' { ' |
| 45 | + type_builder += line_delimeter |
| 46 | + type_builder += line_delimeter.join(current_frame['column'].unique().tolist()) |
| 47 | + type_builder += line_delimeter |
| 48 | + type_builder += ' }' |
| 49 | + type_builder += line_delimeter |
| 50 | + |
| 51 | + # Split up types with reverse edges so we can gurantee they are applied after other types |
| 52 | + # This is required because if dgraph live encounters a reverse edge for a type defined later in the file |
| 53 | + # then dgraph live will fails. |
| 54 | + # NOTE: There might be a better solution here |
| 55 | + # and we could build a dependency tree based on the references |
| 56 | + # topological sort? |
| 57 | + # also this won't detect circular dependencies |
| 58 | + if current_frame.loc[reverse_edge_mask, 'column'].shape[0]: |
| 59 | + all_types_reverse.append(type_builder) |
| 60 | + else: |
| 61 | + all_types.append(type_builder) |
| 62 | + |
| 63 | + if console: |
| 64 | + print(type_builder) |
| 65 | + print(line_delimeter) |
| 66 | + |
| 67 | + if export_schema: |
| 68 | + export_path = os.path.join(output_dir, export_file) |
| 69 | + logger.debug(f'Writing to {export_path} ({encoding})') |
| 70 | + with open(export_path, 'w', encoding=encoding) as f: |
| 71 | + for current_type in all_types: |
| 72 | + f.write(current_type) |
| 73 | + f.write('\n') |
| 74 | + for current_type in all_types_reverse: |
| 75 | + f.write(current_type) |
| 76 | + f.write('\n') |
| 77 | + |
| 78 | + return all_types + all_types_reverse |
0 commit comments