Skip to content

DBT model instrumentation #11268

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions core/dbt/clients/jinja.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import jinja2.nodes
import jinja2.parser
import jinja2.sandbox
from opentelemetry import trace

from dbt.contracts.graph.nodes import GenericTestNode
from dbt.exceptions import (
Expand Down Expand Up @@ -57,6 +58,7 @@ def __init__(
super().__init__(macro, context)
self.node = node
self.stack = stack
self.macro_tracer = trace.get_tracer("dbt.runner")

# This adds the macro's unique id to the node's 'depends_on'
@contextmanager
Expand All @@ -78,8 +80,13 @@ def track_call(self):

# this makes MacroGenerator objects callable like functions
def __call__(self, *args, **kwargs):
with self.track_call():
return self.call_macro(*args, **kwargs)
if "run_hooks" == self.get_name() and len(*args) > 0:
span_name = kwargs["span_name"] if "span_name" in kwargs else "hook_span"
with self.track_call(), self.macro_tracer.start_as_current_span(span_name):
return self.call_macro(*args, **kwargs)
else:
with self.track_call():
return self.call_macro(*args, **kwargs)


class UnitTestMacroGenerator(MacroGenerator):
Expand Down
133 changes: 72 additions & 61 deletions core/dbt/task/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from multiprocessing.pool import ThreadPool
from typing import AbstractSet, Any, Dict, Iterable, List, Optional, Set, Tuple, Type

from opentelemetry import trace
from opentelemetry.trace import StatusCode

from dbt import tracking, utils
from dbt.adapters.base import BaseAdapter, BaseRelation
from dbt.adapters.capability import Capability
Expand Down Expand Up @@ -699,6 +702,7 @@ def __init__(
) -> None:
super().__init__(args, config, manifest)
self.batch_map = batch_map
self._dbt_tracer = trace.get_tracer("dbt.runner")

def raise_on_first_error(self) -> bool:
return False
Expand Down Expand Up @@ -887,77 +891,83 @@ def safe_run_hooks(
failed = False
num_hooks = len(ordered_hooks)

for idx, hook in enumerate(ordered_hooks, 1):
with log_contextvars(node_info=hook.node_info):
hook.index = idx
hook_name = f"{hook.package_name}.{hook_type}.{hook.index - 1}"
execution_time = 0.0
timing: List[TimingInfo] = []
failures = 1

if not failed:
with collect_timing_info("compile", timing.append):
sql = self.get_hook_sql(
adapter, hook, hook.index, num_hooks, extra_context
if num_hooks == 0:
return status

with self._dbt_tracer.start_as_current_span(hook_type) as hook_span:
for idx, hook in enumerate(ordered_hooks, 1):
with log_contextvars(node_info=hook.node_info):
hook.index = idx
hook_name = f"{hook.package_name}.{hook_type}.{hook.index - 1}"
execution_time = 0.0
timing: List[TimingInfo] = []
failures = 1

if not failed:
with collect_timing_info("compile", timing.append):
sql = self.get_hook_sql(
adapter, hook, hook.index, num_hooks, extra_context
)

started_at = timing[0].started_at or datetime.utcnow()
hook.update_event_status(
started_at=started_at.isoformat(), node_status=RunningStatus.Started
)

fire_event(
LogHookStartLine(
statement=hook_name,
index=hook.index,
total=num_hooks,
node_info=hook.node_info,
)
)

started_at = timing[0].started_at or datetime.utcnow()
hook.update_event_status(
started_at=started_at.isoformat(), node_status=RunningStatus.Started
with collect_timing_info("execute", timing.append):
status, message = get_execution_status(sql, adapter)

finished_at = timing[1].completed_at or datetime.utcnow()
hook.update_event_status(finished_at=finished_at.isoformat())
execution_time = (finished_at - started_at).total_seconds()
failures = 0 if status == RunStatus.Success else 1

if status == RunStatus.Success:
message = f"{hook_name} passed"
else:
message = f"{hook_name} failed, error:\n {message}"
failed = True
hook_span.set_status(StatusCode.ERROR)
else:
status = RunStatus.Skipped
message = f"{hook_name} skipped"

hook.update_event_status(node_status=status)
hook_span.set_attribute("node.status", status.value)

self.node_results.append(
RunResult(
status=status,
thread_id="main",
timing=timing,
message=message,
adapter_response={},
execution_time=execution_time,
failures=failures,
node=hook,
)
)

fire_event(
LogHookStartLine(
LogHookEndLine(
statement=hook_name,
status=status,
index=hook.index,
total=num_hooks,
execution_time=execution_time,
node_info=hook.node_info,
)
)

with collect_timing_info("execute", timing.append):
status, message = get_execution_status(sql, adapter)

finished_at = timing[1].completed_at or datetime.utcnow()
hook.update_event_status(finished_at=finished_at.isoformat())
execution_time = (finished_at - started_at).total_seconds()
failures = 0 if status == RunStatus.Success else 1

if status == RunStatus.Success:
message = f"{hook_name} passed"
else:
message = f"{hook_name} failed, error:\n {message}"
failed = True
else:
status = RunStatus.Skipped
message = f"{hook_name} skipped"

hook.update_event_status(node_status=status)

self.node_results.append(
RunResult(
status=status,
thread_id="main",
timing=timing,
message=message,
adapter_response={},
execution_time=execution_time,
failures=failures,
node=hook,
)
)

fire_event(
LogHookEndLine(
statement=hook_name,
status=status,
index=hook.index,
total=num_hooks,
execution_time=execution_time,
node_info=hook.node_info,
)
)

if hook_type == RunHookType.Start and ordered_hooks:
fire_event(Formatting(""))

Expand Down Expand Up @@ -991,8 +1001,9 @@ def before_run(self, adapter: BaseAdapter, selected_uids: AbstractSet[str]) -> R
with adapter.connection_named("master"):
self.defer_to_manifest()
required_schemas = self.get_model_schemas(adapter, selected_uids)
self.create_schemas(adapter, required_schemas)
self.populate_adapter_cache(adapter, required_schemas)
with self._dbt_tracer.start_as_current_span("metadata.setup"):
self.create_schemas(adapter, required_schemas)
self.populate_adapter_cache(adapter, required_schemas)
self.populate_microbatch_batches(selected_uids)
group_lookup.init(self.manifest, selected_uids)
run_hooks_status = self.safe_run_hooks(adapter, RunHookType.Start, {})
Expand Down
39 changes: 33 additions & 6 deletions core/dbt/task/runnable.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from pathlib import Path
from typing import AbstractSet, Dict, Iterable, List, Optional, Set, Tuple, Type, Union

from opentelemetry import context, trace
from opentelemetry.trace import Link, SpanContext, StatusCode

import dbt.exceptions
import dbt.tracking
import dbt.utils
Expand Down Expand Up @@ -91,6 +94,8 @@ def __init__(self, args: Flags, config: RuntimeConfig, manifest: Manifest) -> No
self.previous_defer_state: Optional[PreviousState] = None
self.run_count: int = 0
self.started_at: float = 0
self._node_span_context_mapping: Dict[str, SpanContext] = {}
self._dbt_tracer = trace.get_tracer("dbt.runner")

if self.args.state:
self.previous_state = PreviousState(
Expand Down Expand Up @@ -222,14 +227,28 @@ def get_runner(self, node) -> BaseRunner:

return cls(self.config, adapter, node, run_count, num_nodes)

def call_runner(self, runner: BaseRunner) -> RunResult:
with log_contextvars(node_info=runner.node.node_info):
def call_runner(self, runner: BaseRunner, parent_context=None) -> RunResult:
node_info = runner.node.node_info
links = []
if hasattr(runner.node.depends_on, "nodes"):
for parent_node in runner.node.depends_on.nodes:
if parent_node in self._node_span_context_mapping:
links.append(
Link(
self._node_span_context_mapping[parent_node],
{"upstream.name": parent_node},
),
)
with log_contextvars(node_info=node_info), self._dbt_tracer.start_as_current_span(
node_info["unique_id"], context=parent_context, links=links
) as node_span:
self._node_span_context_mapping[node_info["unique_id"]] = node_span.get_span_context()
runner.node.update_event_status(
started_at=datetime.utcnow().isoformat(), node_status=RunningStatus.Started
)
fire_event(
NodeStart(
node_info=runner.node.node_info,
node_info=node_info,
)
)
try:
Expand All @@ -242,10 +261,16 @@ def call_runner(self, runner: BaseRunner) -> RunResult:
result = None
thread_exception = e
finally:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In Java I know that the resource (in try-with-resource pattern) is closed before calling finally (would be the span in this case). Is this the case in Python as well?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but this try-catch-finally is different inner scope within with block correct?

if result.status in (NodeStatus.Error, NodeStatus.Fail, NodeStatus.PartialSuccess):
node_span.set_status(StatusCode.ERROR)
node_span.set_attribute("node.status", result.status.value)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add this as the description for the status?

node_span.set_attribute("node.materialization", node_info["materialized"])
node_span.set_attribute("node.database", node_info["node_relation"]["database"])
node_span.set_attribute("node.schema", node_info["node_relation"]["schema"])
if result is not None:
fire_event(
NodeFinished(
node_info=runner.node.node_info,
node_info=node_info,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had to refer some attributes in node_info at different place and using full name seems lengthy and difficult to read hence shortened it.

run_result=result.to_msg_dict(),
)
)
Expand All @@ -256,7 +281,7 @@ def call_runner(self, runner: BaseRunner) -> RunResult:
GenericExceptionOnRun(
unique_id=runner.node.unique_id,
exc=str(thread_exception),
node_info=runner.node.node_info,
node_info=node_info,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto.

)
)

Expand Down Expand Up @@ -304,6 +329,7 @@ def _submit(self, pool, args, callback):

This does still go through the callback path for result collection.
"""
args.append(context.get_current())
if self.config.args.single_threaded:
callback(self.call_runner(*args))
else:
Expand Down Expand Up @@ -501,7 +527,8 @@ def populate_adapter_cache(
def before_run(self, adapter: BaseAdapter, selected_uids: AbstractSet[str]) -> RunStatus:
with adapter.connection_named("master"):
self.defer_to_manifest()
self.populate_adapter_cache(adapter)
with self._dbt_tracer.start_as_current_span("metadata.setup"):
self.populate_adapter_cache(adapter)
return RunStatus.Success

def after_run(self, adapter, results) -> None:
Expand Down
5 changes: 4 additions & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-adapters
git+https://github.com/sfc-gh-vguttha/dbt-adapters.git@vguttha-add-telemetry#subdirectory=dbt-adapters
git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-tests-adapter
git+https://github.com/dbt-labs/dbt-common.git@main
git+https://github.com/dbt-labs/dbt-adapters.git@main#subdirectory=dbt-postgres
git+https://github.com/sfc-gh-vguttha/dbt-adapters.git@vguttha-add-telemetry#subdirectory=dbt-snowflake
# black must match what's in .pre-commit-config.yaml to be sure local env matches CI
Comment on lines +1 to +5
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI: will revert these changes once I am fully complete with my testing.

black==24.3.0
bumpversion
Expand Down Expand Up @@ -38,3 +39,5 @@ types-pytz
types-requests
types-setuptools
mocker
opentelemetry-api
opentelemetry-sdk
Loading
Loading