Skip to content

Commit f672566

Browse files
committed
log jobs and job defs in mlflow
1 parent 5e88318 commit f672566

File tree

10 files changed

+152
-28
lines changed

10 files changed

+152
-28
lines changed

jupyter_scheduler/executors.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@
77
from typing import Dict
88

99
import fsspec
10+
import mlflow
1011
import nbconvert
1112
import nbformat
1213
from nbconvert.preprocessors import CellExecutionError, ExecutePreprocessor
1314

1415
from jupyter_scheduler.models import DescribeJob, JobFeature, Status
1516
from jupyter_scheduler.orm import Job, create_session
1617
from jupyter_scheduler.parameterize import add_parameters
18+
from jupyter_scheduler.scheduler import MLFLOW_SERVER_URI
1719
from jupyter_scheduler.utils import get_utc_timestamp
1820

1921

@@ -136,16 +138,22 @@ def execute(self):
136138
store_widget_state=True,
137139
)
138140

139-
try:
140-
ep.preprocess(nb)
141-
except CellExecutionError as e:
142-
raise e
143-
finally:
144-
for output_format in job.output_formats:
145-
cls = nbconvert.get_exporter(output_format)
146-
output, resources = cls().from_notebook_node(nb)
147-
with fsspec.open(self.staging_paths[output_format], "w", encoding="utf-8") as f:
148-
f.write(output)
141+
mlflow.set_tracking_uri(MLFLOW_SERVER_URI)
142+
with mlflow.start_run(run_id=job.mlflow_run_id):
143+
try:
144+
ep.preprocess(nb)
145+
if job.parameters:
146+
mlflow.log_params(job.parameters)
147+
except CellExecutionError as e:
148+
raise e
149+
finally:
150+
for output_format in job.output_formats:
151+
cls = nbconvert.get_exporter(output_format)
152+
output, resources = cls().from_notebook_node(nb)
153+
output_path = self.staging_paths[output_format]
154+
with fsspec.open(output_path, "w", encoding="utf-8") as f:
155+
f.write(output)
156+
mlflow.log_artifact(output_path)
149157

150158
def supported_features(cls) -> Dict[JobFeature, bool]:
151159
return {

jupyter_scheduler/models.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ class CreateJob(BaseModel):
8585
name: str
8686
output_filename_template: Optional[str] = OUTPUT_FILENAME_TEMPLATE
8787
compute_type: Optional[str] = None
88+
mlflow_logging: Optional[bool] = None
89+
mlflow_experiment_id: Optional[str] = None
90+
mlflow_run_id: Optional[str] = None
8891

8992
@root_validator
9093
def compute_input_filename(cls, values) -> Dict:
@@ -145,6 +148,9 @@ class DescribeJob(BaseModel):
145148
status: Status = Status.CREATED
146149
status_message: Optional[str] = None
147150
downloaded: bool = False
151+
mlflow_logging: Optional[bool] = None
152+
mlflow_experiment_id: Optional[str] = None
153+
mlflow_run_id: Optional[str] = None
148154

149155
class Config:
150156
orm_mode = True
@@ -209,6 +215,8 @@ class CreateJobDefinition(BaseModel):
209215
compute_type: Optional[str] = None
210216
schedule: Optional[str] = None
211217
timezone: Optional[str] = None
218+
mlflow_logging: Optional[bool] = None
219+
mlflow_experiment_id: Optional[str] = None
212220

213221
@root_validator
214222
def compute_input_filename(cls, values) -> Dict:
@@ -234,6 +242,8 @@ class DescribeJobDefinition(BaseModel):
234242
create_time: int
235243
update_time: int
236244
active: bool
245+
mlflow_logging: Optional[bool] = None
246+
mlflow_experiment_id: Optional[str] = None
237247

238248
class Config:
239249
orm_mode = True

jupyter_scheduler/orm.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ class CommonColumns:
8585
output_filename_template = Column(String(256))
8686
update_time = Column(Integer, default=get_utc_timestamp, onupdate=get_utc_timestamp)
8787
create_time = Column(Integer, default=get_utc_timestamp)
88+
mlflow_logging = Column(Boolean)
89+
mlflow_experiment_id = Column(String(256), nullable=True)
8890

8991

9092
class Job(CommonColumns, Base):
@@ -98,6 +100,7 @@ class Job(CommonColumns, Base):
98100
url = Column(String(256), default=generate_jobs_url)
99101
pid = Column(Integer)
100102
idempotency_token = Column(String(256))
103+
mlflow_run_id = Column(String(256), nullable=True)
101104

102105

103106
class JobDefinition(CommonColumns, Base):

jupyter_scheduler/scheduler.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44
import shutil
55
import subprocess
66
from typing import Dict, Optional, Type, Union
7+
from uuid import uuid4
78

89
import fsspec
10+
import mlflow
911
import psutil
1012
from jupyter_core.paths import jupyter_data_dir
1113
from jupyter_server.transutils import _i18n
@@ -42,6 +44,10 @@
4244
from jupyter_scheduler.orm import Job, JobDefinition, create_session
4345
from jupyter_scheduler.utils import create_output_directory, create_output_filename
4446

47+
MLFLOW_SERVER_HOST = "127.0.0.1"
48+
MLFLOW_SERVER_PORT = "5000"
49+
MLFLOW_SERVER_URI = f"http://{MLFLOW_SERVER_HOST}:{MLFLOW_SERVER_PORT}"
50+
4551

4652
class BaseScheduler(LoggingConfigurable):
4753
"""Base class for schedulers. A default implementation
@@ -348,16 +354,13 @@ def start_mlflow_server(self):
348354
[
349355
"mlflow",
350356
"server",
351-
"--backend-store-uri",
352-
"./mlruns",
353-
"--default-artifact-root",
354-
"./mlartifacts",
355357
"--host",
356-
"0.0.0.0",
358+
MLFLOW_SERVER_HOST,
357359
"--port",
358-
"5000",
360+
MLFLOW_SERVER_PORT,
359361
]
360362
)
363+
mlflow.set_tracking_uri(MLFLOW_SERVER_URI)
361364

362365
def __init__(
363366
self,
@@ -415,6 +418,19 @@ def create_job(self, model: CreateJob) -> str:
415418
if not model.output_formats:
416419
model.output_formats = []
417420

421+
mlflow_client = mlflow.MlflowClient()
422+
423+
if model.job_definition_id and model.mlflow_experiment_id:
424+
experiment_id = model.mlflow_experiment_id
425+
else:
426+
experiment_id = mlflow_client.create_experiment(f"{model.name}-{uuid4()}")
427+
model.mlflow_experiment_id = experiment_id
428+
input_file_path = os.path.join(self.root_dir, model.input_uri)
429+
mlflow.log_artifact(input_file_path, "input")
430+
431+
mlflow_run = mlflow_client.create_run(experiment_id=experiment_id, run_name=model.name)
432+
model.mlflow_run_id = mlflow_run.info.run_id
433+
418434
job = Job(**model.dict(exclude_none=True, exclude={"input_uri"}))
419435
session.add(job)
420436
session.commit()
@@ -553,6 +569,12 @@ def create_job_definition(self, model: CreateJobDefinition) -> str:
553569
if not self.file_exists(model.input_uri):
554570
raise InputUriError(model.input_uri)
555571

572+
mlflow_client = mlflow.MlflowClient()
573+
experiment_id = mlflow_client.create_experiment(f"{model.name}-{uuid4()}")
574+
model.mlflow_experiment_id = experiment_id
575+
input_file_path = os.path.join(self.root_dir, model.input_uri)
576+
mlflow.log_artifact(input_file_path, "input")
577+
556578
job_definition = JobDefinition(**model.dict(exclude_none=True, exclude={"input_uri"}))
557579
session.add(job_definition)
558580
session.commit()

src/components/mlflow-checkbox.tsx

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,13 @@ import React, { ChangeEvent } from 'react';
22

33
import { Checkbox, FormControlLabel, FormGroup } from '@mui/material';
44

5-
export type MLFlowCheckboxProps = {
5+
export function MLFlowLoggingControl(props: {
66
onChange: (event: ChangeEvent<HTMLInputElement>) => void;
7-
};
8-
9-
export function MLFlowCheckbox(props: MLFlowCheckboxProps): JSX.Element {
7+
}): JSX.Element {
108
return (
119
<FormGroup>
1210
<FormControlLabel
13-
control={<Checkbox onChange={props.onChange} value={'mlflowLogging'} />}
11+
control={<Checkbox onChange={props.onChange} name={'mlflowLogging'} />}
1412
label="Log with MLFlow"
1513
/>
1614
</FormGroup>

src/handler.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,8 @@ export namespace Scheduler {
363363
compute_type?: string;
364364
schedule?: string;
365365
timezone?: string;
366+
mlflow_logging?: boolean;
367+
mlflow_experiment_id?: string;
366368
}
367369

368370
export interface IUpdateJobDefinition {
@@ -389,6 +391,8 @@ export namespace Scheduler {
389391
create_time: number;
390392
update_time: number;
391393
active: boolean;
394+
mlflow_logging: boolean;
395+
mlflow_experiment_id?: string;
392396
}
393397

394398
export interface IEmailNotifications {
@@ -415,6 +419,9 @@ export namespace Scheduler {
415419
output_filename_template?: string;
416420
output_formats?: string[];
417421
compute_type?: string;
422+
mlflow_logging?: boolean;
423+
mlflow_experiment_id?: string;
424+
mlflow_run_id?: string;
418425
}
419426

420427
export interface ICreateJobFromDefinition {
@@ -463,6 +470,9 @@ export namespace Scheduler {
463470
start_time?: number;
464471
end_time?: number;
465472
downloaded: boolean;
473+
mlflow_logging?: boolean;
474+
mlflow_experiment_id?: string;
475+
mlflow_run_id?: string;
466476
}
467477

468478
export interface ICreateJobResponse {

src/mainviews/create-job.tsx

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ import {
4242

4343
import { Box, Stack } from '@mui/system';
4444
import { getErrorMessage } from '../util/errors';
45-
import { MLFlowCheckbox } from '../components/mlflow-checkbox';
45+
import { MLFlowLoggingControl } from '../components/mlflow-checkbox';
4646

4747
export interface ICreateJobProps {
4848
model: ICreateJobModel;
@@ -175,7 +175,6 @@ export function CreateJob(props: ICreateJobProps): JSX.Element {
175175

176176
const handleInputChange = (event: ChangeEvent<HTMLInputElement>) => {
177177
const target = event.target;
178-
179178
const parameterNameIdx = parameterNameMatch(target.name);
180179
const parameterValueIdx = parameterValueMatch(target.name);
181180
const newParams = props.model.parameters || [];
@@ -321,7 +320,10 @@ export function CreateJob(props: ICreateJobProps): JSX.Element {
321320
compute_type: props.model.computeType,
322321
idempotency_token: props.model.idempotencyToken,
323322
tags: props.model.tags,
324-
runtime_environment_parameters: props.model.runtimeEnvironmentParameters
323+
runtime_environment_parameters: props.model.runtimeEnvironmentParameters,
324+
mlflow_logging: props.model.mlflowLogging,
325+
mlflow_experiment_id: props.model.mlflowExperimentId,
326+
mlflow_run_id: props.model.mlflowRunId
325327
};
326328

327329
if (props.model.parameters !== undefined) {
@@ -369,7 +371,9 @@ export function CreateJob(props: ICreateJobProps): JSX.Element {
369371
tags: props.model.tags,
370372
runtime_environment_parameters: props.model.runtimeEnvironmentParameters,
371373
schedule: props.model.schedule,
372-
timezone: props.model.timezone
374+
timezone: props.model.timezone,
375+
mlflow_logging: props.model.mlflowLogging,
376+
mlflow_experiment_id: props.model.mlflowExperimentId
373377
};
374378

375379
if (props.model.parameters !== undefined) {
@@ -505,7 +509,7 @@ export function CreateJob(props: ICreateJobProps): JSX.Element {
505509
environmentList={environmentList}
506510
value={props.model.environment}
507511
/>
508-
<MLFlowCheckbox onChange={handleInputChange} />
512+
<MLFlowLoggingControl onChange={handleInputChange} />
509513
<OutputFormatPicker
510514
label={trans.__('Output formats')}
511515
name="outputFormat"

src/mainviews/detail-view/job-definition.tsx

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import { Scheduler as SchedulerTokens } from '../../tokens';
3232

3333
import { timestampLocalize } from './job-detail';
3434
import { getErrorMessage } from '../../util/errors';
35+
import { OpenInNew } from '@mui/icons-material';
3536

3637
export interface IJobDefinitionProps {
3738
app: JupyterFrontEnd;
@@ -175,6 +176,18 @@ export function JobDefinition(props: IJobDefinitionProps): JSX.Element {
175176
>
176177
{trans.__('Edit Job Definition')}
177178
</Button>
179+
{model.mlflowLogging === true && (
180+
<Button
181+
variant="outlined"
182+
onClick={() => {
183+
const mlFlowUrl = `http://127.0.0.1:5000/#/experiments/${props.model?.mlflowExperimentId}`;
184+
window.open(mlFlowUrl);
185+
}}
186+
endIcon={<OpenInNew />}
187+
>
188+
{trans.__('Open in MLFlow')}
189+
</Button>
190+
)}
178191
<ConfirmDialogDeleteButton
179192
handleDelete={async () => {
180193
log('job-definition-detail.delete');
@@ -230,6 +243,16 @@ export function JobDefinition(props: IJobDefinitionProps): JSX.Element {
230243
value: model.timezone ?? '',
231244
label: trans.__('Time zone')
232245
}
246+
],
247+
[
248+
{
249+
value: model.mlflowLogging ? trans.__('Yes') : trans.__('No'),
250+
label: trans.__('MLFlow Logging')
251+
},
252+
{
253+
value: props.model.mlflowExperimentId,
254+
label: trans.__('MLFLow Experiment Id')
255+
}
233256
]
234257
];
235258

src/mainviews/detail-view/job-detail.tsx

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ import {
3939
LabeledValue
4040
} from '../../components/labeled-value';
4141
import { getErrorMessage } from '../../util/errors';
42+
import { OpenInNew } from '@mui/icons-material';
43+
44+
const MLFLOW_SERVER_HOST = '127.0.0.1';
45+
const MLFLOW_SERVER_PORT = '5000';
46+
const MLFLOW_SERVER_URI = `http://${MLFLOW_SERVER_HOST}:${MLFLOW_SERVER_PORT}`;
4247

4348
export interface IJobDetailProps {
4449
app: JupyterFrontEnd;
@@ -167,6 +172,18 @@ export function JobDetail(props: IJobDetailProps): JSX.Element {
167172
{trans.__('Download Job Files')}
168173
</Button>
169174
)}
175+
{props.model?.mlflowLogging === true && (
176+
<Button
177+
variant="outlined"
178+
onClick={() => {
179+
const mlFlowUrl = `${MLFLOW_SERVER_URI}/#/experiments/${props.model?.mlflowExperimentId}/runs/${props.model?.mlflowRunId}`;
180+
window.open(mlFlowUrl);
181+
}}
182+
endIcon={<OpenInNew />}
183+
>
184+
{trans.__('Open in MLFlow')}
185+
</Button>
186+
)}
170187
{props.model !== null && props.model.status === 'IN_PROGRESS' && (
171188
<ConfirmDialogStopButton
172189
handleStop={handleStopJob}
@@ -249,6 +266,22 @@ export function JobDetail(props: IJobDetailProps): JSX.Element {
249266
{
250267
value: timestampLocalize(props.model.endTime ?? ''),
251268
label: trans.__('End time')
269+
},
270+
{
271+
value: props.model.mlflowLogging ? trans.__('Yes') : trans.__('No'),
272+
273+
label: trans.__('MLFlow Logging')
274+
}
275+
],
276+
[
277+
{
278+
value: props.model.mlflowExperimentId,
279+
label: trans.__('MLFLow Experiment Id')
280+
},
281+
{
282+
value: props.model.mlflowRunId,
283+
284+
label: trans.__('MLFlow Run Id')
252285
}
253286
]
254287
];

0 commit comments

Comments
 (0)