Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added SQLDabatase connectivity #432

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion src/sempy_labs/_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ def __init__(
(resource_id, resource_name) = resolve_item_name_and_id(
item=item, type=endpoint_type.capitalize(), workspace=workspace_id
)
if endpoint_type == "sqldatabase":
# SQLDatabase is has special case for resolving the name and id
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it should be (resource_name, resource_id) = resolve_item_name_and_id(...)

(resource_id, resource_name) = resolve_item_name_and_id(
item=item, type="SQLDatabase", workspace=workspace_id
)
else:
(resource_id, resource_name) = resolve_lakehouse_name_and_id(
lakehouse=item, workspace=workspace_id
Expand All @@ -61,6 +66,8 @@ def __init__(

if endpoint_type == "warehouse":
tds_endpoint = response.json().get("properties", {}).get("connectionString")
if endpoint_type == "sqldatabase":
tds_endpoint = response.json().get("properties", {}).get("serverFqdn")
else:
tds_endpoint = (
response.json()
Expand All @@ -72,7 +79,10 @@ def __init__(
# Set up the connection string
access_token = SynapseTokenProvider()()
tokenstruct = _bytes2mswin_bstr(access_token.encode())
conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={resource_name};Encrypt=Yes;"
if endpoint_type == "sqldatabase":
conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={resource_name}-{resource_id};Encrypt=Yes;"
else:
conn_str = f"DRIVER={{ODBC Driver 18 for SQL Server}};SERVER={tds_endpoint};DATABASE={resource_name};Encrypt=Yes;"

if timeout is not None:
conn_str += f"Connect Timeout={timeout};"
Expand Down Expand Up @@ -166,3 +176,17 @@ def __init__(
timeout=timeout,
endpoint_type="lakehouse",
)

class ConnectSQLDatabase(ConnectBase):
def __init__(
self,
sqldatabase: str,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please make the parameter sql_database

workspace: Optional[Union[str, UUID]] = None,
timeout: Optional[int] = None,
):
super().__init__(
name=sqldatabase,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it should be item=sql_database. not name=sqldatabase

workspace=workspace,
timeout=timeout,
endpoint_type="sqldatabase",
)
218 changes: 218 additions & 0 deletions src/sempy_labs/_sqldatabase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
import sempy.fabric as fabric
from sempy_labs._helper_functions import (
resolve_workspace_name_and_id,
_base_api,
_create_dataframe,
_update_dataframe_datatypes,
)
import pandas as pd
from typing import Optional
import sempy_labs._icons as icons
from uuid import UUID

## Still debugging the creation of an sql database
# def create_warehouse(
# warehouse: str,
# description: Optional[str] = None,
# case_insensitive_collation: bool = False,
# workspace: Optional[str | UUID] = None,
# ):
# """
# Creates a Fabric warehouse.

# This is a wrapper function for the following API: `Items - Create Warehouse <https://learn.microsoft.com/rest/api/fabric/warehouse/items/create-warehouse>`_.

# Parameters
# ----------
# warehouse: str
# Name of the warehouse.
# description : str, default=None
# A description of the warehouse.
# case_insensitive_collation: bool, default=False
# If True, creates the warehouse with case-insensitive collation.
# workspace : str | uuid.UUID, default=None
# The Fabric workspace name or ID.
# Defaults to None which resolves to the workspace of the attached lakehouse
# or if no lakehouse attached, resolves to the workspace of the notebook.
# """

# (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)

# payload = {"displayName": warehouse}

# if description:
# payload["description"] = description
# if case_insensitive_collation:
# payload.setdefault("creationPayload", {})
# payload["creationPayload"][
# "defaultCollation"
# ] = "Latin1_General_100_CI_AS_KS_WS_SC_UTF8"

# _base_api(
# request=f"/v1/workspaces/{workspace_id}/warehouses",
# payload=payload,
# method="post",
# lro_return_status_code=True,
# status_codes=[201, 202],
# )

# print(
# f"{icons.green_dot} The '{warehouse}' warehouse has been created within the '{workspace_name}' workspace."
# )


def list_sqldatabses(workspace: Optional[str | UUID] = None) -> pd.DataFrame:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's make this list_sql_databases to conform to the other function names. please prefix this function with an underscore as the API is not yet public so I do not want to release it yet. We can keep it as a private function for now.

"""
Shows the databses within a workspace.


Parameters
----------
workspace : str | uuid.UUID, default=None
The Fabric workspace name or ID.
Defaults to None which resolves to the workspace of the attached lakehouse
or if no lakehouse attached, resolves to the workspace of the notebook.

Returns
-------
pandas.DataFrame
A pandas dataframe showing the SQLDabatases within a workspace.
"""

columns = {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The columns should be:
"SQL Database Name"
"SQL Database Id"
"Description"
"Connection Type"
"Connection Info"
"Database Name"
"Server FQDN"
"Provisioning Status"
"Created Date"
"Last Updated Time UTC"

"SQLDatabase Name": "string",
"SQLDatabase Id": "string",
"Description": "string",
"Connection Info": "string",
"Created Date": "datetime",
"Last Updated Time": "datetime",
}
df = _create_dataframe(columns=columns)

(workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)

responses = _base_api(
reqeust=f"/v1/workspaces/{workspace_id}/sqldatabases", uses_pagination=True
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you tested the code to make sure it's working? The parameter 'request' is spelled incorrectly.

)

for r in responses:
for v in r.get("value", []):
prop = v.get("properties", {})

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

names do not align with the column names. please align.

new_data = {
"Warehouse Name": v.get("displayName"),
"Warehouse Id": v.get("id"),
"Description": v.get("description"),
"Connection Info": prop.get("connectionInfo"),
"Created Date": prop.get("createdDate"),
"Last Updated Time": prop.get("lastUpdatedTime"),
}
df = pd.concat([df, pd.DataFrame(new_data, index=[0])], ignore_index=True)

_update_dataframe_datatypes(dataframe=df, column_map=columns)

return df

## Still debugging the deletion of an sql database
# def delete_warehouse(name: str, workspace: Optional[str | UUID] = None):
# """
# Deletes a Fabric warehouse.

# This is a wrapper function for the following API: `Items - Delete Warehouse <https://learn.microsoft.com/rest/api/fabric/warehouse/items/delete-warehouse>`_.

# Parameters
# ----------
# name: str
# Name of the warehouse.
# workspace : str | uuid.UUID, default=None
# The Fabric workspace name or ID.
# Defaults to None which resolves to the workspace of the attached lakehouse
# or if no lakehouse attached, resolves to the workspace of the notebook.
# """

# (workspace_name, workspace_id) = resolve_workspace_name_and_id(workspace)

# item_id = fabric.resolve_item_id(
# item_name=name, type="Warehouse", workspace=workspace_id
# )

# _base_api(
# request=f"/v1/workspaces/{workspace_id}/warehouses/{item_id}", method="delete"
# )

# print(
# f"{icons.green_dot} The '{name}' warehouse within the '{workspace_name}' workspace has been deleted."
# )


def get_sqldatabase_tables(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_sql_database_tables

sqldatabase: str | UUID, workspace: Optional[str | UUID] = None
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parameter: sql_database

) -> pd.DataFrame:
"""
Shows a list of the tables in the Fabric SQLDabatse. This function is based on INFORMATION_SCHEMA.TABLES.

Parameters
----------
sqldatabase : str | uuid.UUID
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parameter: sql_database

Name or ID of the Fabric SQLDabatase.
workspace : str | uuid.UUID, default=None
The Fabric workspace name or ID.
Defaults to None which resolves to the workspace of the attached lakehouse
or if no lakehouse attached, resolves to the workspace of the notebook.

Returns
-------
pandas.DataFrame
A pandas dataframe showing a list of the tables in the Fabric SQLDabatase.
"""

from sempy_labs._sql import ConnectSQLDatabase

with ConnectSQLDatabase(sqldatabase=sqldatabase, workspace=workspace) as sql:
df = sql.query(
"""
SELECT TABLE_SCHEMA AS [Schema], TABLE_NAME AS [Table Name], TABLE_TYPE AS [Table Type]
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_TYPE = 'BASE TABLE'
"""
)

return df


def get_sqldatabase_columns(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

get_sql_database_columns

sqldatabase: str | UUID, workspace: Optional[str | UUID] = None
) -> pd.DataFrame:
"""
Shows a list of the columns in each table within the Fabric SQLDabatase. This function is based on INFORMATION_SCHEMA.COLUMNS.

Parameters
----------
sqldatabase : str | uuid.UUID
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sql_database

Name or ID of the Fabric SQLDabatase.
workspace : str | uuid.UUID, default=None
The Fabric workspace name or ID.
Defaults to None which resolves to the workspace of the attached lakehouse
or if no lakehouse attached, resolves to the workspace of the notebook.

Returns
-------
pandas.DataFrame
A pandas dataframe showing a list of the columns in each table within the Fabric SQLDabatase.
"""

from sempy_labs._sql import ConnectSQLDatabase

with ConnectSQLDatabase(sqldatabase=sqldatabase, workspace=workspace) as sql:
df = sql.query(
"""
SELECT t.TABLE_SCHEMA AS [Schema], t.TABLE_NAME AS [Table Name], c.COLUMN_NAME AS [Column Name], c.DATA_TYPE AS [Data Type], c.IS_NULLABLE AS [Is Nullable], c.CHARACTER_MAXIMUM_LENGTH AS [Character Max Length]
FROM INFORMATION_SCHEMA.TABLES AS t
LEFT JOIN INFORMATION_SCHEMA.COLUMNS AS c
ON t.TABLE_NAME = c.TABLE_NAME
AND t.TABLE_SCHEMA = c.TABLE_SCHEMA
WHERE t.TABLE_TYPE = 'BASE TABLE'
"""
)

return df