Skip to content

Commit 5c5a641

Browse files
tanmoyioexrhizo
andauthored
Add Kusto DB and kql support (#659)
* Add Kusto DB and kql support * tenant id * tagging numpy temp * Plugins: Give more flexibility to the user how to init client * fix: errors in kustograph * Plugin runtime fixes and respond to comments * Plugins, open close session, kusto unwrap heuristic, * fix column types * Kusto query and query_graph doc strings * dynamic type handle * more dynamic type handling * remove pinned numpy * lint * mypy fixes * mypy fix * more mypy fixes * mypy fixes * mypy fix * update readme * Version and date to be added later --------- Co-authored-by: Alex Warren <[email protected]>
1 parent 7179f7b commit 5c5a641

File tree

12 files changed

+575
-141
lines changed

12 files changed

+575
-141
lines changed

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,18 @@ All notable changes to the PyGraphistry are documented in this file. The PyGraph
55
The changelog format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
66
This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) and all PyGraphistry-specific breaking changes are explictly noted here.
77

8+
## [ - ]
9+
10+
### Feat
11+
* Kusto/Azure Data Explorer integration. `PyGraphistry.kusto()`, `kusto_query()`, `kusto_query_graph()`
12+
* Extra kusto install target `pip install graphistry[kusto]` installs azure-kusto-data, azure-identity
13+
14+
### Changed
15+
* Legacy `Plottable.spanner_init()` & `PyGraphistry.spanner_init()` helpers no longer shipped. Use `spanner()`
16+
17+
### Breaking
18+
* Kusto device authentication doesn't persist.
19+
820
## [0.37.0 - 2025-06-05]
921

1022
### Fixed

graphistry/Plottable.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
from graphistry.models.compute.dbscan import DBSCANEngine
88
from graphistry.models.compute.umap import UMAPEngineConcrete
99
from graphistry.plugins_types.cugraph_types import CuGraphKind
10-
from graphistry.Engine import Engine, EngineAbstract
10+
from graphistry.plugins_types.kusto_types import KustoConfig
11+
from graphistry.plugins_types.spanner_types import SpannerConfig
12+
from graphistry.Engine import EngineAbstract
1113
from graphistry.utils.json import JSONVal
1214

1315

@@ -65,7 +67,9 @@ class Plottable(object):
6567
_complex_encodings : dict
6668
_bolt_driver : Any
6769
_tigergraph : Any
68-
_spannergraph: Any
70+
71+
_spanner_config: Optional[SpannerConfig]
72+
_kusto_config: Optional[KustoConfig]
6973

7074
_dataset_id: Optional[str]
7175
_url: Optional[str]

graphistry/PlotterBase.py

Lines changed: 96 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
from .constants import SRC, DST, NODE
1212
from .plugins_types import CuGraphKind
13+
from .plugins_types.kusto_types import KustoConfig
14+
from .plugins_types.spanner_types import SpannerConfig
1315
from .plugins.igraph import (
1416
to_igraph as to_igraph_base, from_igraph as from_igraph_base,
1517
compute_igraph as compute_igraph_base,
@@ -177,8 +179,9 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
177179
# Integrations
178180
self._bolt_driver : Any = None
179181
self._tigergraph : Any = None
180-
self._spannergraph: Any
181-
182+
self._kusto_config : Optional[KustoConfig] = None
183+
self._spanner_config : Optional[SpannerConfig] = None
184+
182185
# feature engineering
183186
self._node_embedding = None
184187
self._node_encoder = None
@@ -2282,30 +2285,50 @@ def bolt(self, driver):
22822285
res._bolt_driver = to_bolt_driver(driver)
22832286
return res
22842287

2285-
def spanner_init(self: Plottable, spanner_config: Dict[str, str]) -> Plottable:
2288+
2289+
def spanner(self: Plottable, spanner_config: SpannerConfig) -> Plottable:
22862290
"""
2287-
Initializes a SpannerGraph object with the provided configuration and connects to the instance db
2291+
Set spanner configuration for this Plottable.
22882292
2289-
spanner_config dict must contain the include the following keys, credentials_file is optional:
2290-
- "project_id": The GCP project ID.
2293+
SpannerConfig
22912294
- "instance_id": The Spanner instance ID.
22922295
- "database_id": The Spanner database ID.
2296+
- "project_id": The GCP project ID.
22932297
- "credentials_file": json file API key for service accounts
2294-
2295-
:param spanner_config A dictionary containing the Spanner configuration.
2296-
:type (Dict[str, str])
2298+
2299+
If credentials_file is provided, it will be used to authenticate with the Spanner instance.
2300+
Otherwise, project_id and the spanner login process will be used to authenticate.
2301+
2302+
:param spanner_config: A dictionary containing the Spanner configuration.
2303+
:type (SpannerConfig)
22972304
:return: Plottable with a Spanner connection
22982305
:rtype: Plottable
2299-
:raises ValueError: If any of the required keys in `spanner_config` are missing or have invalid values.
2300-
23012306
"""
2302-
from .plugins.spannergraph import SpannerGraph
2307+
self._spanner_config = spanner_config
2308+
return self
2309+
23032310

2304-
res = copy.copy(self)
2311+
def kusto(self: Plottable, kusto_config: KustoConfig) -> Plottable:
2312+
"""
2313+
Set kusto configuration for this Plottable.
2314+
2315+
KustoConfig
2316+
- "cluster": The Kusto cluster name.
2317+
- "database": The Kusto database name.
2318+
For AAD authentication:
2319+
- "client_id": The Kusto client ID.
2320+
- "client_secret": The Kusto client secret.
2321+
- "tenant_id": The Kusto tenant ID.
2322+
Otherwise: process will use web browser to authenticate.
2323+
2324+
:param kusto_config: A dictionary containing the Kusto configuration.
2325+
:type (KustoConfig)
2326+
:returns: Plottable with a Kusto connection
2327+
:rtype: Plottable
2328+
"""
2329+
self._kusto_config = kusto_config
2330+
return self
23052331

2306-
res._spannergraph = SpannerGraph(res, spanner_config)
2307-
logger.debug("Created SpannerGraph object: {res._spannergraph}")
2308-
return res
23092332

23102333
def infer_labels(self):
23112334
"""
@@ -2534,22 +2557,11 @@ def spanner_gql_to_g(self: Plottable, query: str) -> Plottable:
25342557
g.plot()
25352558
25362559
"""
2537-
from .pygraphistry import PyGraphistry
2538-
from .plugins.spannergraph import SpannerGraph
2539-
2560+
from .plugins.spannergraph import SpannerGraphContext
25402561
res = copy.copy(self)
2562+
with SpannerGraphContext(res._spanner_config) as sg:
2563+
return sg.gql_to_graph(query, g=res)
25412564

2542-
if not hasattr(res, '_spannergraph'):
2543-
spanner_config = PyGraphistry._config.get("spanner", None)
2544-
2545-
if spanner_config is not None:
2546-
logger.debug(f"Spanner Config: {spanner_config}")
2547-
else:
2548-
raise ValueError('spanner_config not defined. Pass spanner_config via register() and retry query.')
2549-
2550-
res = res.spanner_init(spanner_config) # type: ignore[attr-defined]
2551-
2552-
return res._spannergraph.gql_to_graph(res, query)
25532565

25542566
def spanner_query_to_df(self: Plottable, query: str) -> pd.DataFrame:
25552567
"""
@@ -2586,22 +2598,65 @@ def spanner_query_to_df(self: Plottable, query: str) -> pd.DataFrame:
25862598
g.plot()
25872599
25882600
"""
2601+
from .plugins.spannergraph import SpannerGraphContext
2602+
with SpannerGraphContext(self._spanner_config) as sg:
2603+
return sg.query_to_df(query)
2604+
25892605

2590-
from .pygraphistry import PyGraphistry
2606+
def kusto_query(self: Plottable, query: str, unwrap_nested: Optional[bool] = None) -> List[pd.DataFrame]:
2607+
"""
2608+
Submit a Kusto/Azure Data Explorer *query* and return result tables.
25912609
2592-
res = copy.copy(self)
2593-
2594-
if not hasattr(res, '_spannergraph'):
2595-
spanner_config = PyGraphistry._config["spanner"]
2596-
if spanner_config is not None:
2597-
logger.debug(f"Spanner Config: {spanner_config}")
2598-
else:
2599-
logger.warning('PyGraphistry._config["spanner"] is None')
2610+
Because a Kusto request may emit multiple tables, a **list of
2611+
DataFrames** is always returned; most queries yield a single entry.
2612+
2613+
unwrap_nested
2614+
-------------
2615+
Controls auto-flattening of *dynamic* (JSON) columns:
2616+
• True - always try to flatten, raise if it fails
2617+
• None - default heuristic: flatten only if table looks nested
2618+
• False - leave results untouched
2619+
2620+
:param query: Kusto query string
2621+
:type query: str
2622+
:param unwrap_nested: flatten strategy above
2623+
:type unwrap_nested: bool | None
2624+
:returns: list of Pandas DataFrames
2625+
:rtype: List[pd.DataFrame]
2626+
2627+
**Example**
2628+
::
2629+
2630+
frames = graphistry.kusto_query("StormEvents | take 100")
2631+
df = frames[0]
2632+
"""
2633+
from .plugins.kustograph import KustoGraphContext
2634+
with KustoGraphContext(self._kusto_config) as kg:
2635+
return kg.query(query, unwrap_nested=unwrap_nested)
26002636

2601-
res = res.spanner_init(PyGraphistry._config["spanner"]) # type: ignore[attr-defined]
2637+
def kusto_query_graph(self: Plottable, graph_name: str, snap_name: Optional[str] = None) -> Plottable:
2638+
"""
2639+
Fetch a Kusto *graph* (and optional *snapshot*) as a Graphistry object.
26022640
2603-
return res._spannergraph.query_to_df(query)
2641+
Under the hood: `graph(..)` + `graph-to-table` to pull **nodes** and
2642+
**edges**, then binds them to *self*.
26042643
2644+
:param graph_name: name of Kusto graph entity
2645+
:type graph_name: str
2646+
:param snap_name: optional snapshot/version
2647+
:type snap_name: str | None
2648+
:returns: Plottable ready for `.plot()` or further transforms
2649+
:rtype: Plottable
2650+
2651+
**Example**
2652+
::
2653+
2654+
g = graphistry.kusto_query_graph("HoneypotNetwork").plot()
2655+
"""
2656+
from .plugins.kustograph import KustoGraphContext
2657+
with KustoGraphContext(self._kusto_config) as kg:
2658+
return kg.query_graph(graph_name, snap_name, g=self)
2659+
26052660

26062661
def nodexl(self, xls_or_url, source='default', engine=None, verbose=False):
26072662

graphistry/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,12 @@
3131
bolt,
3232
cypher,
3333
tigergraph,
34+
spanner,
3435
spanner_gql_to_g,
3536
spanner_query_to_df,
36-
spanner_init,
37+
kusto,
38+
kusto_query,
39+
kusto_query_graph,
3740
gsql,
3841
gsql_endpoint,
3942
cosmos,

0 commit comments

Comments
 (0)