Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import { Button, Typography } from 'antd';
import React, { useState } from 'react';
import { useTranslation } from 'react-i18next';
import styled from 'styled-components';

import { DatasetAssertionLogicModal } from '@app/entityV2/shared/tabs/Dataset/Validations/DatasetAssertionLogicModal';

import { AssertionInfo } from '@types';

const ViewLogicButton = styled(Button)`
padding: 0px;
margin: 0px;
`;

type Props = {
assertionInfo: AssertionInfo;
};

/**
* A human-readable description of a Custom Assertion, surfacing its logic
* (e.g. the SQL behind a Monte Carlo custom-SQL rule) behind a "View Logic" link.
*/
export const CustomAssertionDescription = ({ assertionInfo }: Props) => {
const { t } = useTranslation('entity.profile.validations');
const [isLogicVisible, setIsLogicVisible] = useState(false);

const { description } = assertionInfo;
const logic = assertionInfo.customAssertion?.logic;

return (
<>
<Typography.Text>{description}</Typography.Text>
{logic && (
<>
<div>
<ViewLogicButton onClick={() => setIsLogicVisible(true)} type="link">
{t('datasetDescription.popover.viewLogic')}
</ViewLogicButton>
</div>
<DatasetAssertionLogicModal
logic={logic}
title={description || undefined}
description={assertionInfo.customAssertion?.type || undefined}
visible={isLogicVisible}
onClose={() => setIsLogicVisible(false)}
/>
</>
)}
</>
);
};
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@ import Query from '@app/entityV2/shared/tabs/Dataset/Queries/Query';

type Props = {
logic: string;
title?: string;
description?: string;
visible: boolean;
onClose: () => void;
};

export const DatasetAssertionLogicModal = ({ logic, visible, onClose }: Props) => {
export const DatasetAssertionLogicModal = ({ logic, title, description, visible, onClose }: Props) => {
const { t: tc } = useTranslation('common.actions');
return (
<Modal visible={visible} onCancel={onClose} footer={<Button onClick={onClose}>{tc('close')}</Button>}>
<Query query={logic} />
<Query query={logic} title={title} description={description} />
</Modal>
);
};
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import React from 'react';
import { useTranslation } from 'react-i18next';
import { useTheme } from 'styled-components';

import { CustomAssertionDescription } from '@app/entityV2/shared/tabs/Dataset/Validations/CustomAssertionDescription';
import { DatasetAssertionDescription } from '@app/entityV2/shared/tabs/Dataset/Validations/DatasetAssertionDescription';
import { FieldAssertionDescription } from '@app/entityV2/shared/tabs/Dataset/Validations/FieldAssertionDescription';
import {
Expand Down Expand Up @@ -365,7 +366,9 @@ export const useBuildAssertionPrimaryLabel = (
): JSX.Element => {
const { t } = useTranslation('entity.profile.validations');
let primaryLabel = <Typography.Text>{t('datasetDescription.fallback.noDescription')}</Typography.Text>;
if (assertionInfo?.description && assertionInfo?.type !== AssertionType.Field) {
if (assertionInfo?.type === AssertionType.Custom && assertionInfo?.customAssertion?.logic) {
primaryLabel = <CustomAssertionDescription assertionInfo={assertionInfo} />;
} else if (assertionInfo?.description && assertionInfo?.type !== AssertionType.Field) {
primaryLabel = <Typography.Text>{assertionInfo.description}</Typography.Text>;
} else {
switch (assertionInfo?.type) {
Expand Down
9 changes: 9 additions & 0 deletions datahub-web-react/src/app/ingest/source/builder/sources.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
[
{
"urn": "urn:li:dataPlatform:montecarlo",
"name": "montecarlo",
"displayName": "Monte Carlo",
"description": "Import monitors, custom rules, and alerts from Monte Carlo as DataHub assertions.",
"docsUrl": "https://docs.datahub.com/docs/generated/ingestion/sources/montecarlo",
"recipe": "source:\n type: montecarlo\n config:\n api_id: ${MCD_ID}\n api_token: ${MCD_TOKEN}\n connection_to_platform_map:\n \"<mc-warehouse-uuid>\":\n platform: snowflake\n platform_instance: prod\n env: PROD\n emit_assertions: true\n emit_alerts: true\n alerts_lookback_days: 30\n stateful_ingestion:\n enabled: true",
"category": "Observability"
},
{
"urn": "urn:li:dataPlatform:aerospike",
"name": "aerospike",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import metabaseLogo from '@images/metabaselogo.png';
import mlflowLogo2 from '@images/mlflowlogo2.png';
import modeLogo from '@images/modelogo.png';
import mongodbLogo from '@images/mongodblogo.png';
import montecarloLogo from '@images/montecarlologo.png';
import mssqlLogo from '@images/mssqllogo.png';
import mysqlLogo from '@images/mysqllogo-2.png';
import neo4j from '@images/neo4j.svg';
Expand Down Expand Up @@ -203,6 +204,8 @@ export const VERTEX_AI = 'vertexai';
export const VERTEXAI_URN = `urn:li:dataPlatform:${VERTEX_AI}`;
export const INFORMATICA = 'informatica';
export const INFORMATICA_URN = `urn:li:dataPlatform:${INFORMATICA}`;
export const MONTECARLO = 'montecarlo';
export const MONTECARLO_URN = `urn:li:dataPlatform:${MONTECARLO}`;
export const SNAPLOGIC = 'snaplogic';
export const SNAPLOGIC_URN = `urn:li:dataPlatform:${SNAPLOGIC}`;
export const DLT = 'dlt';
Expand Down Expand Up @@ -282,6 +285,7 @@ export const PLATFORM_URN_TO_LOGO = {
[NEO4J_URN]: neo4j,
[VERTEXAI_URN]: vertexAI,
[INFORMATICA_URN]: informaticaLogo,
[MONTECARLO_URN]: montecarloLogo,
[SNAPLOGIC_URN]: snaplogicLogo,
[FABRIC_URN]: fabricLogo,
[FABRIC_DATA_FACTORY_URN]: fabricDataFactoryLogo,
Expand Down
10 changes: 10 additions & 0 deletions datahub-web-react/src/app/ingestV2/source/builder/sources.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
[
{
"urn": "urn:li:dataPlatform:montecarlo",
"name": "montecarlo",
"displayName": "Monte Carlo",
"description": "Import monitors, custom rules, and alerts from Monte Carlo as DataHub assertions.",
"docsUrl": "https://docs.datahub.com/docs/generated/ingestion/sources/montecarlo",
"recipe": "source:\n type: montecarlo\n config:\n api_id: ${MCD_ID}\n api_token: ${MCD_TOKEN}\n connection_to_platform_map:\n \"<mc-warehouse-uuid>\":\n platform: snowflake\n platform_instance: prod\n env: PROD\n emit_assertions: true\n emit_alerts: true\n alerts_lookback_days: 30\n stateful_ingestion:\n enabled: true",
"category": "Observability",
"isPopular": false
},
{
"urn": "urn:li:dataPlatform:bigquery",
"name": "bigquery",
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 10 additions & 0 deletions metadata-ingestion/constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ databricks-sql-connector==2.9.6
# via acryl-datahub
dataclasses-json==0.6.7
# via
# pycarlo
# unstructured
# unstructured-ingest
dataflows-tabulator==1.54.3
Expand Down Expand Up @@ -554,6 +555,7 @@ graphql-core==3.2.8
# via
# acryl-datahub
# gql
# sgqlc
greenlet==3.3.2
# via
# acryl-datahub
Expand Down Expand Up @@ -1179,6 +1181,8 @@ pyasn1-modules==0.4.2
# python-ldap
pyathena==2.25.2
# via acryl-datahub
pycarlo==0.12.457
# via acryl-datahub
pycountry==26.2.16
# via schwifty
pycparser==3.0 ; implementation_name != 'PyPy'
Expand Down Expand Up @@ -1291,6 +1295,8 @@ pytest-rerunfailures==16.1
# via acryl-datahub
pytest-timeout==2.4.0
# via acryl-datahub
python-box==7.4.1
# via pycarlo
python-dateutil==2.9.0.post0
# via
# acryl-datahub
Expand Down Expand Up @@ -1430,6 +1436,7 @@ requests==2.32.5
# mlflow-skinny
# moto
# msal
# pycarlo
# pydruid
# pyiceberg
# redash-toolbelt
Expand Down Expand Up @@ -1478,6 +1485,7 @@ responses==0.26.0
# via
# acryl-datahub
# moto
# pycarlo
rfc3339-validator==0.1.4
# via
# jsonschema
Expand Down Expand Up @@ -1551,6 +1559,8 @@ setuptools==81.0.0
# redshift-connector
# spacy
# thinc
sgqlc==16.5
# via pycarlo
shellingham==1.5.4
# via typer
simple-salesforce==1.12.9
Expand Down
17 changes: 17 additions & 0 deletions metadata-ingestion/docs/sources/montecarlo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
## Overview

[Monte Carlo](https://www.montecarlodata.com/) is a data observability platform that monitors warehouse and lake tables for freshness, volume, schema and field-quality issues and raises alerts/incidents when they breach.

This connector ingests Monte Carlo **monitors**, **custom (SQL) rules** and **alerts/incidents** and models them as DataHub **Assertions**, so the native "Validation" tab on a dataset reflects Monte Carlo's observability coverage and incident history.

## Concept Mapping

| Monte Carlo Concept | DataHub Concept | Notes |
| ---------------------- | ----------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- |
| `"montecarlo"` | [Data Platform](https://docs.datahub.com/docs/generated/metamodel/entities/dataplatform/) | |
| Monitor | [Assertion](https://docs.datahub.com/docs/generated/metamodel/entities/assertion/) | One `CUSTOM` assertion per monitor; native type kept in custom props. |
| Custom (SQL) rule | [Assertion](https://docs.datahub.com/docs/generated/metamodel/entities/assertion/) | One `CUSTOM` assertion per rule; SQL captured in `customAssertion.logic`. |
| Monitored asset (MCON) | [Dataset](https://docs.datahub.com/docs/generated/metamodel/entities/dataset/) | Resolved via `getTable` and `connection_to_platform_map`. |
| Alert / Incident | Assertion Run Event | Emitted as an `AssertionRunEvent` failure on the corresponding assertion. |

Every monitor/rule is modeled as a `CUSTOM` assertion (matching the established connector pattern, e.g. Snowflake DMFs), with the Monte Carlo native type, resource id and data-quality dimension round-tripped via `customProperties`.
54 changes: 54 additions & 0 deletions metadata-ingestion/docs/sources/montecarlo/montecarlo_post.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
### Capabilities

Use the **Important Capabilities** table above as the source of truth for supported features.

#### Assertion modeling

Every Monte Carlo monitor and custom SQL rule is modeled as a `CUSTOM` assertion. The Monte Carlo
native type (e.g. `freshness`, `volume`, `field_quality`, `custom_sql`) is preserved in
`customProperties` alongside the resource identifier and data-quality dimension, so no information
is lost. For custom SQL rules, the raw SQL expression is captured in `customAssertion.logic`.

#### Alert and incident ingestion

Monte Carlo alerts and incidents are ingested as `AssertionRunEvent` failures on their
corresponding assertion. Each event carries a timestamp, the Monte Carlo alert ID, and a link back
to the Monte Carlo UI via `externalUrl`.

### Limitations

- **Failures only:** Monte Carlo's API does not expose a per-run "pass" stream, so the connector
emits only `FAILURE` run events (from alerts/incidents). Periodic `SUCCESS` events are not
synthesized.
- **MCON resolution:** Each monitored asset requires one `getTable` call to resolve its MCON to a
warehouse table (results are cached per MCON). Assets whose warehouse connection type is not in
`connection_to_platform_map` (and not auto-mappable) are skipped with a warning.
- **Assertion typing:** All monitors and rules are modeled as `CUSTOM` assertions. The Monte Carlo
native type is preserved in `customProperties` rather than coerced into DataHub's typed
freshness/volume/SQL/field assertion schemas.
- **Monte Carlo Cloud only:** Requires a Monte Carlo Cloud account and API key pair. Self-hosted
deployments are not supported.

### Troubleshooting

#### Monitored assets are skipped with a warning

If you see warnings like `Could not resolve MCON to a DataHub dataset URN`, the warehouse
connection type for that asset is not covered by `connection_to_platform_map`. Add a mapping entry
for the connection name shown in the warning.

#### Assertion URNs do not match your warehouse source

Assertion URNs are derived from the dataset URN resolved via `connection_to_platform_map`. If the
`platform`, `platform_instance`, or `env` values differ from those used by your warehouse source
connector, the assertions will not appear on the correct dataset. Align the values in
`connection_to_platform_map` with the config of your warehouse source.

#### No assertions appear after ingestion

Verify that:

1. The API key has read access to monitors, custom rules, and alerts in the Monte Carlo UI.
2. At least one monitor is active and has fired an alert (the connector ingests only monitors that
have associated assets and alerts).
3. The `connection_to_platform_map` covers the warehouse connections used by your monitored assets.
28 changes: 28 additions & 0 deletions metadata-ingestion/docs/sources/montecarlo/montecarlo_pre.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
### Overview

[Monte Carlo](https://www.montecarlodata.com/) is a data observability platform that monitors
warehouse and lake tables for freshness, volume, schema and field-quality issues and raises
alerts/incidents when they breach.

This connector ingests Monte Carlo **monitors**, **custom (SQL) rules** and **alerts/incidents** and
models them as DataHub **Assertions**, so the native "Validation" tab on a dataset reflects Monte
Carlo's observability coverage and incident history.

### Prerequisites

In order to ingest metadata from Monte Carlo, you will need:

- A Monte Carlo Cloud account (this connector does not support self-hosted/on-prem variants).
- An API key pair (`mcd_id` + `mcd_token`) with read access to monitors, custom rules, alerts and
the catalog. Create one in the Monte Carlo UI under **Settings → API** (see the
[Monte Carlo API docs](https://docs.getmontecarlo.com/docs/using-the-api)).
- A `connection_to_platform_map` entry for each Monte Carlo warehouse you want ingested, so
monitored-asset URNs align with the URNs emitted by your warehouse sources.

#### Cross-platform URN mapping

A Monte Carlo MCON does not encode the DataHub platform. The connector resolves each MCON to a
concrete table via `getTable` and maps the warehouse connection type to a DataHub platform. Use
`connection_to_platform_map` to pin the `platform`, `platform_instance` and `env` for each Monte
Carlo warehouse so the resulting dataset URNs line up with the URNs emitted by your warehouse
sources (Snowflake, BigQuery, etc.).
24 changes: 24 additions & 0 deletions metadata-ingestion/docs/sources/montecarlo/montecarlo_recipe.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
source:
type: montecarlo
config:
api_id: ${MCD_ID} # Monte Carlo API key id (mcd_id)
api_token: ${MCD_TOKEN} # Monte Carlo API key token (mcd_token)

# Map each Monte Carlo warehouse/resource UUID to a DataHub platform so the
# monitored-asset URNs match those emitted by your warehouse sources.
connection_to_platform_map:
"<mc-warehouse-uuid>":
platform: snowflake
platform_instance: prod
env: PROD

emit_assertions: true # monitors + custom rules -> Assertion entities
alerts:
enabled: true # alerts/incidents -> AssertionRunEvent failures
start_time: "-30 days" # how far back to fetch alerts (default: 30 days)

# stateful_ingestion:
# enabled: true

sink:
# sink configs
Loading
Loading