Skip to content

Commit 11837f7

Browse files
committedJan 9, 2025·
add stub of a dbml importer
1 parent 2ae9ba2 commit 11837f7

File tree

4 files changed

+124
-8
lines changed

4 files changed

+124
-8
lines changed
 

‎docs/packages/importers.rst

+7
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,13 @@ NCI implements a JSON serialization of ISO-11197. You can import this JSON and c
7777
schemauto import-cadsr "cdes/*.json"
7878
7979
80+
Importing from DBML
81+
--------------------
82+
83+
DBML is a simple DSL for defining database schemas. It is a subset of SQL DDL.
84+
85+
86+
8087
Packages for importing
8188
----------------------
8289

‎poetry.lock

+18-8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ click-default-group = "^1.2.4"
5353
linkml-runtime = "^1.7.2"
5454
duckdb = "^0.10.1"
5555
numpy = "<2.0"
56+
pydbml = "^1.1.2"
5657

5758
[tool.poetry.dev-dependencies]
5859
pytest = ">=7.1.1"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
from schema_automator.importers.import_engine import ImportEngine
2+
from pydbml import PyDBML
3+
from linkml_runtime.linkml_model import SchemaDefinition, ClassDefinition, SlotDefinition
4+
from dataclasses import dataclass
5+
6+
@dataclass
7+
class DbmlImportEngine(ImportEngine):
8+
"""
9+
An ImportEngine that introspects a DBML schema to determine a corresponding LinkML schema.
10+
"""
11+
12+
def convert(
13+
self,
14+
file: str,
15+
name: str = None,
16+
model_uri: str = None,
17+
identifier: str = None,
18+
**kwargs
19+
) -> SchemaDefinition:
20+
"""
21+
Converts a DBML schema file into a LinkML SchemaDefinition.
22+
23+
:param file: Path to the DBML schema file.
24+
:param name: Optional name for the generated LinkML schema.
25+
:param model_uri: Optional URI for the schema.
26+
:param identifier: Identifier field for the schema.
27+
:return: SchemaDefinition object representing the DBML schema.
28+
"""
29+
# Initialize the schema definition
30+
schema_name = name or "GeneratedSchema"
31+
schema = SchemaDefinition(name=schema_name, id=model_uri or f"https://example.org/{schema_name}")
32+
33+
# Parse the DBML file
34+
with open(file, 'r', encoding='utf-8') as f:
35+
dbml_content = f.read()
36+
parsed_dbml = PyDBML(dbml_content)
37+
38+
# Process tables
39+
for table in parsed_dbml.tables:
40+
class_def = ClassDefinition(
41+
name=table.name,
42+
description=table.note or f"Auto-generated class for table '{table.name}'",
43+
slots=[],
44+
unique_keys=[], # Initialize unique keys property
45+
)
46+
processed_slots = set() # Track processed slot names to avoid duplicates
47+
48+
# Handle primary key and unique constraints
49+
primary_key_columns = [col for col in table.columns if col.primary_key]
50+
unique_columns = [col for col in table.columns if col.unique and not col.primary_key]
51+
multi_column_unique_keys = table.indexes # Assuming `indexes` captures multi-column unique keys
52+
53+
# Process columns
54+
for column in table.columns:
55+
slot_name = column.name
56+
slot_def = SlotDefinition(
57+
name=slot_name,
58+
range=self._map_dbml_type_to_linkml(column.type),
59+
description=column.note or f"Column '{slot_name}'",
60+
required=column in primary_key_columns or column.unique,
61+
identifier=column in primary_key_columns, # Mark primary key columns as identifiers
62+
)
63+
schema.slots[slot_name] = slot_def
64+
class_def.slots.append(slot_name)
65+
processed_slots.add(slot_name)
66+
67+
# Add multi-column unique keys
68+
for index in multi_column_unique_keys:
69+
if index.unique:
70+
class_def.unique_keys.append([col.name for col in index.columns])
71+
72+
# Handle single unique column as primary key if no explicit primary key exists
73+
if not primary_key_columns and len(unique_columns) == 1:
74+
unique_column = unique_columns[0]
75+
schema.slots[unique_column.name].identifier = True
76+
schema.slots[unique_column.name].required = True
77+
78+
schema.classes[table.name] = class_def
79+
80+
return schema
81+
82+
def _map_dbml_type_to_linkml(self, dbml_type: str) -> str:
83+
"""
84+
Maps DBML data types to LinkML types.
85+
86+
:param dbml_type: The DBML column type.
87+
:return: Corresponding LinkML type.
88+
"""
89+
type_mapping = {
90+
"int": "integer",
91+
"varchar": "string",
92+
"text": "string",
93+
"float": "float",
94+
"boolean": "boolean",
95+
"date": "date",
96+
"datetime": "datetime",
97+
}
98+
return type_mapping.get(dbml_type.lower(), "string")

0 commit comments

Comments
 (0)
Please sign in to comment.