Skip to content

Commit 9794c26

Browse files
committed
Resolved linting errors
1 parent cee3a53 commit 9794c26

File tree

7 files changed

+93
-90
lines changed

7 files changed

+93
-90
lines changed

Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FROM databricksruntime/python:15.4-LTS
1+
FROM databricksruntime/python:15.4-LTS
22

33
ARG PROJECT_DIR=/project
44

@@ -7,4 +7,4 @@ RUN pip install uv==0.4.20
77
WORKDIR ${PROJECT_DIR}
88
COPY dist/wine_quality-0.0.1-py3-none-any.whl ${PROJECT_DIR}/
99

10-
RUN uv pip install --python /databricks/python3 ${PROJECT_DIR}/wine_quality-0.0.1-py3-none-any.whl
10+
RUN uv pip install --python /databricks/python3 ${PROJECT_DIR}/wine_quality-0.0.1-py3-none-any.whl

notebooks/week3/01. feature_serving.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
OnlineTableSpecTriggeredSchedulingPolicy,
3333
)
3434
from databricks.sdk.service.serving import EndpointCoreConfigInput, ServedEntityInput
35+
from pyspark.dbutils import DBUtils
3536
from pyspark.sql import SparkSession
3637

3738
from wine_quality.config import ProjectConfig
@@ -40,11 +41,12 @@
4041

4142
# MAGIC %md
4243
# MAGIC ## Deploy and query a feature serving endpoint
43-
https://docs.databricks.com/en/machine-learning/feature-store/feature-serving-tutorial.html
44+
# MAGIC https://docs.databricks.com/en/machine-learning/feature-store/feature-serving-tutorial.html
4445

4546
# COMMAND ----------
4647

4748
spark = SparkSession.builder.getOrCreate()
49+
dbutils = DBUtils(spark)
4850

4951
# Initialize Databricks clients
5052
workspace = WorkspaceClient()
@@ -122,14 +124,14 @@
122124
perform_full_copy=False,
123125
)
124126

125-
#Create the online table in Databricks
127+
# Create the online table in Databricks
126128
try:
127129
online_table_pipeline = workspace.online_tables.create(name=online_table_name, spec=spec)
128130
except Exception as e:
129-
if "already exists" in str(e):
130-
pass
131-
else:
132-
raise e
131+
if "already exists" in str(e):
132+
pass
133+
else:
134+
raise e
133135

134136
online_table_pipeline = workspace.online_tables.get(name=online_table_name)
135137

@@ -139,7 +141,9 @@
139141
# Define features to look up from the feature table
140142
features = [
141143
FeatureLookup(
142-
table_name=feature_table_name, lookup_key="id", feature_names=[ "volatile_acidity", "alcohol", "sulphates", "quality"]
144+
table_name=feature_table_name,
145+
lookup_key="id",
146+
feature_names=["volatile_acidity", "alcohol", "sulphates", "quality"],
143147
)
144148
]
145149

@@ -175,7 +179,7 @@
175179
served_entities=[
176180
ServedEntityInput(
177181
entity_name=feature_spec_name, # feature spec name defined in the previous step
178-
scale_to_zero_enabled=True, # Cost saving mechanism where the endpoint scales down to zero when not in use
182+
scale_to_zero_enabled=True, # Cost saving mechanism where the endpoint scales down to zero when not in use
179183
workload_size="Small", # Define the workload size (Small, Medium, Large)
180184
)
181185
]
@@ -248,6 +252,7 @@
248252
headers = {"Authorization": f"Bearer {token}"}
249253
num_requests = 10
250254

255+
251256
# COMMAND ----------
252257
# Function to make a request and record latency
253258
def send_request():
@@ -261,6 +266,8 @@ def send_request():
261266
end_time = time.time()
262267
latency = end_time - start_time # Calculate latency for this request
263268
return response.status_code, latency
269+
270+
264271
# COMMAND ----------
265272

266273
# Measure total execution time
@@ -282,4 +289,4 @@ def send_request():
282289
average_latency = sum(latencies) / len(latencies)
283290

284291
print("\nTotal execution time:", total_execution_time, "seconds")
285-
print("Average latency per request:", average_latency, "seconds")
292+
print("Average latency per request:", average_latency, "seconds")

notebooks/week3/02.model_serving.py

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,24 @@
66

77
# COMMAND ----------
88

9+
import random
910
import time
10-
11-
import requests
12-
import random
1311
from concurrent.futures import ThreadPoolExecutor, as_completed
1412

13+
import requests
1514
from databricks.sdk import WorkspaceClient
1615
from databricks.sdk.service.serving import (
1716
EndpointCoreConfigInput,
1817
ServedEntityInput,
19-
TrafficConfig,
20-
Route,
2118
)
19+
from pyspark.dbutils import DBUtils
20+
from pyspark.sql import SparkSession
2221

2322
from wine_quality.config import ProjectConfig
24-
from pyspark.sql import SparkSession
2523

2624
workspace = WorkspaceClient()
2725
spark = SparkSession.builder.getOrCreate()
26+
dbutils = DBUtils(spark)
2827

2928
config = ProjectConfig.from_yaml(config_path="../../project_config.yml")
3029

@@ -45,13 +44,13 @@
4544
entity_version=3,
4645
)
4746
],
48-
# # Optional if only 1 entity is served
49-
# traffic_config=TrafficConfig(
50-
# routes=[
51-
# Route(served_model_name="wine_quality_model-3",
52-
# traffic_percentage=100)
53-
# ]
54-
# ),
47+
# # Optional if only 1 entity is served
48+
# traffic_config=TrafficConfig(
49+
# routes=[
50+
# Route(served_model_name="wine_quality_model-3",
51+
# traffic_percentage=100)
52+
# ]
53+
# ),
5554
),
5655
)
5756
except Exception as e:
@@ -88,7 +87,7 @@
8887
"density",
8988
"pH",
9089
"sulphates",
91-
"alcohol"
90+
"alcohol",
9291
]
9392

9493
sampled_records = train_set[required_columns].sample(n=1000, replace=True).to_dict(orient="records")
@@ -118,9 +117,7 @@
118117
# COMMAND ----------
119118
start_time = time.time()
120119

121-
model_serving_endpoint = (
122-
f"https://{host}/serving-endpoints/wine-quality-model-serving/invocations"
123-
)
120+
model_serving_endpoint = f"https://{host}/serving-endpoints/wine-quality-model-serving/invocations"
124121
response = requests.post(
125122
f"{model_serving_endpoint}",
126123
headers={"Authorization": f"Bearer {token}"},
@@ -142,9 +139,7 @@
142139
# COMMAND ----------
143140

144141
# Initialize variables
145-
model_serving_endpoint = (
146-
f"https://{host}/serving-endpoints/wine-quality-model-serving/invocations"
147-
)
142+
model_serving_endpoint = f"https://{host}/serving-endpoints/wine-quality-model-serving/invocations"
148143

149144
headers = {"Authorization": f"Bearer {token}"}
150145
num_requests = 1000
@@ -182,4 +177,4 @@ def send_request():
182177
average_latency = sum(latencies) / len(latencies)
183178

184179
print("\nTotal execution time:", total_execution_time, "seconds")
185-
print("Average latency per request:", average_latency, "seconds")
180+
print("Average latency per request:", average_latency, "seconds")

notebooks/week3/03.model_serving_feature_lookup.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,13 @@
2121
OnlineTableSpecTriggeredSchedulingPolicy,
2222
)
2323
from databricks.sdk.service.serving import EndpointCoreConfigInput, ServedEntityInput
24+
from pyspark.dbutils import DBUtils
2425
from pyspark.sql import SparkSession
2526

2627
from wine_quality.config import ProjectConfig
2728

2829
spark = SparkSession.builder.getOrCreate()
30+
dbutils = DBUtils(spark)
2931

3032
# Initialize Databricks clients
3133
workspace = WorkspaceClient()
@@ -47,14 +49,14 @@
4749
perform_full_copy=False,
4850
)
4951

50-
#Create the online table in Databricks
52+
# Create the online table in Databricks
5153
try:
5254
online_table_pipeline = workspace.online_tables.create(name=online_table_name, spec=spec)
5355
except Exception as e:
54-
if "already exists" in str(e):
55-
pass
56-
else:
57-
raise e
56+
if "already exists" in str(e):
57+
pass
58+
else:
59+
raise e
5860

5961
# COMMAND ----------
6062

@@ -127,7 +129,7 @@
127129

128130
# COMMAND ----------
129131

130-
train_set.dtypes
132+
train_set_dtypes = train_set.dtypes
131133

132134
# COMMAND ----------
133135

@@ -157,4 +159,4 @@
157159

158160
# COMMAND ----------
159161

160-
wine_features.dtypes
162+
wine_features_dtypes = wine_features.dtypes

notebooks/week3/04.AB_test_model_serving.py

Lines changed: 19 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,24 @@
55
# MAGIC %restart_python
66

77
# COMMAND ----------
8+
import hashlib
89
import time
910

1011
import mlflow
1112
import pandas as pd
13+
import requests
1214
from databricks.sdk import WorkspaceClient
1315
from databricks.sdk.service.serving import EndpointCoreConfigInput, ServedEntityInput
1416
from lightgbm import LGBMRegressor
1517
from mlflow import MlflowClient
1618
from mlflow.models import infer_signature
19+
from pyspark.dbutils import DBUtils
1720
from pyspark.sql import SparkSession
1821
from sklearn.compose import ColumnTransformer
1922
from sklearn.impute import SimpleImputer
2023
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
2124
from sklearn.pipeline import Pipeline
22-
from sklearn.preprocessing import OneHotEncoder
2325
from sklearn.preprocessing import StandardScaler
24-
import hashlib
25-
import requests
2626

2727
from wine_quality.config import ProjectConfig
2828

@@ -58,6 +58,10 @@
5858
"max_depth": ab_test_params["max_depth_b"],
5959
}
6060

61+
# COMMAND ----------
62+
spark = SparkSession.builder.getOrCreate()
63+
dbutils = DBUtils(spark)
64+
6165
# COMMAND ----------
6266

6367
# MAGIC %md
@@ -108,7 +112,7 @@
108112

109113
# Train the model
110114
pipeline.fit(X_train, y_train)
111-
y_pred = pipeline.predict(X_test)
115+
y_pred = pipeline.predict(X_test)
112116

113117
# Calculate performance metrics
114118
mse = mean_squared_error(y_test, y_pred)
@@ -124,9 +128,7 @@
124128
signature = infer_signature(model_input=X_train, model_output=y_pred)
125129

126130
# Log the input dataset for tracking reproducibility
127-
dataset = mlflow.data.from_spark(train_set_spark,
128-
table_name=f"{catalog_name}.{schema_name}.train_set",
129-
version="0")
131+
dataset = mlflow.data.from_spark(train_set_spark, table_name=f"{catalog_name}.{schema_name}.train_set", version="0")
130132
mlflow.log_input(dataset, context="training")
131133

132134
# Log the pipeline model in MLflow with a unique artifact path
@@ -178,8 +180,7 @@
178180
mlflow.log_metric("r2_score", r2)
179181
signature = infer_signature(model_input=X_train, model_output=y_pred)
180182

181-
dataset = mlflow.data.from_spark(train_set_spark,
182-
table_name=f"{catalog_name}.{schema_name}.train_set", version="0")
183+
dataset = mlflow.data.from_spark(train_set_spark, table_name=f"{catalog_name}.{schema_name}.train_set", version="0")
183184
mlflow.log_input(dataset, context="training")
184185
mlflow.sklearn.log_model(sk_model=pipeline, artifact_path="lightgbm-pipeline-model", signature=signature)
185186

@@ -233,16 +234,14 @@ def predict(self, context, model_input):
233234

234235
# COMMAND ----------
235236
X_train = train_set[num_features + ["id"]]
236-
X_test = test_set[num_features + ["id"]]
237+
X_test = test_set[num_features + ["id"]]
237238

238239

239240
# COMMAND ----------
240241
models = [model_A, model_B]
241242
wrapped_model = WineQualityModelWrapper(models) # we pass the loaded models to the wrapper
242243
example_input = X_test.iloc[0:1] # Select the first row for prediction as example
243-
example_prediction = wrapped_model.predict(
244-
context=None,
245-
model_input=example_input)
244+
example_prediction = wrapped_model.predict(context=None, model_input=example_input)
246245
print("Example Prediction:", example_prediction)
247246

248247
# COMMAND ----------
@@ -251,22 +250,16 @@ def predict(self, context, model_input):
251250

252251
with mlflow.start_run() as run:
253252
run_id = run.info.run_id
254-
signature = infer_signature(model_input=X_train,
255-
model_output={"Prediction": 1234.5,
256-
"model": "Model B"})
257-
dataset = mlflow.data.from_spark(train_set_spark,
258-
table_name=f"{catalog_name}.{schema_name}.train_set",
259-
version="0")
253+
signature = infer_signature(model_input=X_train, model_output={"Prediction": 1234.5, "model": "Model B"})
254+
dataset = mlflow.data.from_spark(train_set_spark, table_name=f"{catalog_name}.{schema_name}.train_set", version="0")
260255
mlflow.log_input(dataset, context="training")
261256
mlflow.pyfunc.log_model(
262-
python_model=wrapped_model,# passing wrapped model here instead sklearn model
257+
python_model=wrapped_model, # passing wrapped model here instead sklearn model
263258
artifact_path="pyfunc-wine-quality-model-ab",
264-
signature=signature
259+
signature=signature,
265260
)
266261
model_version = mlflow.register_model(
267-
model_uri=f"runs:/{run_id}/pyfunc-wine-quality-model-ab",
268-
name=model_name,
269-
tags={"git_sha": f"{git_sha}"}
262+
model_uri=f"runs:/{run_id}/pyfunc-wine-quality-model-ab", name=model_name, tags={"git_sha": f"{git_sha}"}
270263
)
271264

272265
# COMMAND ----------
@@ -276,7 +269,7 @@ def predict(self, context, model_input):
276269
predictions = model.predict(X_test.iloc[0:1])
277270

278271
# Display predictions
279-
predictions
272+
# predictions
280273

281274
# COMMAND ----------
282275

@@ -313,7 +306,6 @@ def predict(self, context, model_input):
313306
# MAGIC ### Call the endpoint
314307

315308
# COMMAND ----------
316-
317309
token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
318310
host = spark.conf.get("spark.databricks.workspaceUrl")
319311

@@ -342,9 +334,7 @@ def predict(self, context, model_input):
342334

343335
start_time = time.time()
344336

345-
model_serving_endpoint = (
346-
f"https://{host}/serving-endpoints/wine-quality-model-serving-ab-test/invocations"
347-
)
337+
model_serving_endpoint = f"https://{host}/serving-endpoints/wine-quality-model-serving-ab-test/invocations"
348338

349339
response = requests.post(
350340
f"{model_serving_endpoint}",

0 commit comments

Comments
 (0)