Skip to content

Commit 20244cf

Browse files
singankitw-javedCopilot
authored
Users/singankit/upload evaluation run 1rp (#40771)
* Upload results for 1DP project * Calling onedp method for 1dp project * Update sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_constants.py Co-authored-by: Copilot <[email protected]> * Updating get token to work with autogenerated client * Fixing failed tests --------- Co-authored-by: Waqas Javed <[email protected]> Co-authored-by: Copilot <[email protected]>
1 parent 26aedf7 commit 20244cf

File tree

9 files changed

+115
-22
lines changed

9 files changed

+115
-22
lines changed

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_azure/_clients.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,4 +201,4 @@ def _generate_path(self, *paths: str) -> str:
201201
return url
202202

203203
def _get_headers(self) -> Dict[str, str]:
204-
return {"Authorization": f"Bearer {self.get_token()}", "Content-Type": "application/json"}
204+
return {"Authorization": f"Bearer {self.get_token().token}", "Content-Type": "application/json"}

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_azure/_token_manager.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import logging
66
import time
77
import inspect
8-
from typing import cast, Optional, Union
8+
from typing import cast, Optional, Union, Any
99

1010
from azure.core.credentials import TokenCredential, AccessToken
1111
from azure.identity import AzureCliCredential, DefaultAzureCredential, ManagedIdentityCredential
@@ -71,7 +71,7 @@ def get_aad_credential(self) -> Union[DefaultAzureCredential, ManagedIdentityCre
7171
# Fall back to using the parent implementation
7272
return super().get_aad_credential()
7373

74-
def get_token(self) -> str:
74+
def get_token(self, *scopes: str, claims: Union[str, None] = None, tenant_id: Union[str, None] = None, enable_cae: bool = False, **kwargs: Any) -> AccessToken:
7575
"""Get the API token. If the token is not available or has expired, refresh the token.
7676
7777
:return: API token
@@ -82,7 +82,7 @@ def get_token(self) -> str:
8282
access_token = credential.get_token(self.token_scope)
8383
self._update_token(access_token)
8484

85-
return cast(str, self.token) # check for none is hidden in the _token_needs_update method
85+
return self.token # check for none is hidden in the _token_needs_update method
8686

8787
async def get_token_async(self) -> str:
8888
"""Get the API token asynchronously. If the token is not available or has expired, refresh it.
@@ -112,7 +112,7 @@ def _token_needs_update(self) -> bool:
112112
)
113113

114114
def _update_token(self, access_token: AccessToken) -> None:
115-
self.token = cast(str, access_token.token)
115+
self.token = access_token
116116
self.token_expiry_time = access_token.expires_on
117117
self.last_refresh_time = time.time()
118118
self.logger.info("Refreshed Azure management token.")

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,14 @@
88
from . import constants
99
from .rai_service import evaluate_with_rai_service
1010
from .utils import get_harm_severity_level
11+
from .evaluation_onedp_client import EvaluationServiceOneDPClient
12+
from .onedp.models import EvaluationUpload, EvaluationResult
1113

1214
__all__ = [
1315
"get_harm_severity_level",
1416
"evaluate_with_rai_service",
1517
"constants",
18+
"EvaluationServiceOneDPClient",
19+
"EvaluationResult",
20+
"EvaluationUpload",
1621
]

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_common/evaluation_onedp_client.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# ---------------------------------------------------------
44

55
import logging
6-
from typing import Union, Any
6+
from typing import Union, Any, Dict
77
from azure.core.credentials import AzureKeyCredential, TokenCredential
88
from azure.ai.evaluation._common.onedp import AIProjectClient as RestEvaluationServiceClient
99
from azure.ai.evaluation._common.onedp.models import (PendingUploadRequest, PendingUploadType, EvaluationResult,
@@ -22,7 +22,7 @@ def __init__(self, endpoint: str, credential: Union[AzureKeyCredential, "TokenCr
2222
**kwargs,
2323
)
2424

25-
def create_evaluation_result(self, *, name: str, path: str, version=1, **kwargs) -> None:
25+
def create_evaluation_result(self, *, name: str, path: str, version=1, metrics: Dict[str, int]=None, **kwargs) -> EvaluationResult:
2626
"""Create and upload evaluation results to Azure evaluation service.
2727
2828
This method uploads evaluation results from a local path to Azure Blob Storage
@@ -38,6 +38,8 @@ def create_evaluation_result(self, *, name: str, path: str, version=1, **kwargs)
3838
:type path: str
3939
:param version: The version number for the evaluation results, defaults to 1
4040
:type version: int, optional
41+
:param metrics: Metrics to be added to evaluation result
42+
:type version: Dict[str, int], optional
4143
:param kwargs: Additional keyword arguments to pass to the underlying API calls
4244
:return: The response from creating the evaluation result version
4345
:rtype: EvaluationResult
@@ -58,12 +60,13 @@ def create_evaluation_result(self, *, name: str, path: str, version=1, **kwargs)
5860
upload(path=path, container_client=container_client, logger=LOGGER)
5961

6062
LOGGER.debug(f"Creating evaluation result version for {name} with version {version}")
61-
create_version_response = self.rest_client.evaluation_results.create_version(
63+
create_version_response = self.rest_client.evaluation_results.create_or_update_version(
6264
body=EvaluationResult(
6365
blob_uri=start_pending_upload_response.blob_reference_for_consumption.blob_uri,
6466
result_type=ResultType.EVALUATION,
6567
name=name,
66-
version=version
68+
version=version,
69+
metrics=metrics,
6770
),
6871
name=name,
6972
version=version,

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_constants.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,12 @@ class _AggregationType(enum.Enum):
8080
SUM = "sum"
8181
CUSTOM = "custom"
8282

83+
class TokenScope(enum.Enum):
84+
"""Defines the scope of the token used to access Azure resources."""
85+
86+
DEFAULT_AZURE_MANAGEMENT = "https://management.azure.com/.default"
87+
COGNITIVE_SERVICES = "https://cognitiveservices.azure.com/.default"
88+
8389

8490
DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json"
8591

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_eval_run.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ def get_metrics_url(self):
295295
return f"https://{self._url_base}" "/mlflow/v2.0" f"{self._get_scope()}" f"/api/2.0/mlflow/runs/log-metric"
296296

297297
def _get_token(self) -> str:
298-
return self._management_client.get_token()
298+
return self._management_client.get_token().token
299299

300300
def request_with_retry(
301301
self, url: str, method: str, json_dict: Dict[str, Any], headers: Optional[Dict[str, str]] = None

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_evaluate.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
_log_metrics_and_instance_results,
4444
_trace_destination_from_project_scope,
4545
_write_output,
46-
DataLoaderFactory,
46+
DataLoaderFactory, _log_metrics_and_instance_results_onedp,
4747
)
4848
from ._batch_run.batch_clients import BatchClient, BatchClientRun
4949

@@ -920,15 +920,19 @@ def _evaluate( # pylint: disable=too-many-locals,too-many-statements
920920
raise e
921921

922922
# Done with all evaluations, message outputs into final forms, and log results if needed.
923-
924-
# Since tracing is disabled, pass None for target_run so a dummy evaluation run will be created each time.
925-
trace_destination = _trace_destination_from_project_scope(azure_ai_project) if azure_ai_project else None
926-
studio_url = None
927-
if trace_destination:
928-
name_map = _map_names_to_builtins(evaluators, graders)
929-
studio_url = _log_metrics_and_instance_results(
930-
metrics, results_df, trace_destination, None, evaluation_name, name_map, **kwargs
923+
name_map = _map_names_to_builtins(evaluators, graders)
924+
if isinstance(azure_ai_project, str):
925+
studio_url = _log_metrics_and_instance_results_onedp(
926+
metrics, results_df, azure_ai_project, evaluation_name, name_map, **kwargs
931927
)
928+
else:
929+
# Since tracing is disabled, pass None for target_run so a dummy evaluation run will be created each time.
930+
trace_destination = _trace_destination_from_project_scope(azure_ai_project) if azure_ai_project else None
931+
studio_url = None
932+
if trace_destination:
933+
studio_url = _log_metrics_and_instance_results(
934+
metrics, results_df, trace_destination, None, evaluation_name, name_map, **kwargs
935+
)
932936

933937
result_df_dict = results_df.to_dict("records")
934938
result: EvaluationResult = {"rows": result_df_dict, "metrics": metrics, "studio_url": studio_url} # type: ignore
@@ -1125,7 +1129,6 @@ def _run_callable_evaluators(
11251129
# will be marked as outputs already so we do not need to rename them.
11261130

11271131
input_data_df = _rename_columns_conditionally(validated_data["input_data_df"])
1128-
11291132
eval_result_df = pd.concat([input_data_df, evaluators_result_df], axis=1, verify_integrity=True)
11301133
eval_metrics = _aggregate_metrics(evaluators_result_df, evaluators)
11311134
eval_metrics.update(evaluators_metric)

sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_evaluate/_utils.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,82 @@ def process_message_content(content, images_folder_path):
126126
f.write(image_data_binary)
127127
return None
128128

129+
def _log_metrics_and_instance_results_onedp(
130+
metrics: Dict[str, Any],
131+
instance_results: pd.DataFrame,
132+
project_url: str,
133+
evaluation_name: Optional[str],
134+
name_map: Dict[str, str],
135+
**kwargs,
136+
) -> Optional[str]:
137+
138+
# One RP Client
139+
from azure.ai.evaluation._azure._token_manager import AzureMLTokenManager
140+
from azure.ai.evaluation._constants import TokenScope
141+
from azure.ai.evaluation._common import EvaluationServiceOneDPClient, EvaluationUpload
142+
143+
credentials = AzureMLTokenManager(
144+
TokenScope.COGNITIVE_SERVICES.value, LOGGER, credential=kwargs.get("credential")
145+
)
146+
client = EvaluationServiceOneDPClient(
147+
endpoint=project_url,
148+
credential=credentials
149+
)
150+
151+
# Massaging before artifacts are put on disk
152+
# Adding line_number as index column this is needed by UI to form link to individual instance run
153+
instance_results["line_number"] = instance_results.index.values
154+
155+
artifact_name = "instance_results.jsonl"
156+
157+
with tempfile.TemporaryDirectory() as tmpdir:
158+
# storing multi_modal images if exists
159+
col_name = "inputs.conversation"
160+
if col_name in instance_results.columns:
161+
for item in instance_results[col_name].items():
162+
value = item[1]
163+
if "messages" in value:
164+
_store_multimodal_content(value["messages"], tmpdir)
165+
166+
# storing artifact result
167+
tmp_path = os.path.join(tmpdir, artifact_name)
168+
169+
with open(tmp_path, "w", encoding=DefaultOpenEncoding.WRITE) as f:
170+
f.write(instance_results.to_json(orient="records", lines=True))
171+
172+
properties = {
173+
EvaluationRunProperties.RUN_TYPE: "eval_run",
174+
EvaluationRunProperties.EVALUATION_RUN: "promptflow.BatchRun",
175+
EvaluationRunProperties.EVALUATION_SDK: f"azure-ai-evaluation:{VERSION}",
176+
EvaluationRunProperties.NAME_MAP: json.dumps(name_map),
177+
"_azureml.evaluate_artifacts": json.dumps([{"path": artifact_name, "type": "table"}]),
178+
}
179+
180+
create_evaluation_result_response = client.create_evaluation_result(
181+
name=uuid.uuid4(),
182+
path=tmp_path,
183+
metrics=metrics
184+
)
185+
186+
upload_run_response = client.start_evaluation_run(
187+
evaluation=EvaluationUpload(
188+
display_name=evaluation_name,
189+
)
190+
)
191+
192+
update_run_response = client.update_evaluation_run(
193+
name=upload_run_response.id,
194+
evaluation=EvaluationUpload(
195+
display_name=evaluation_name,
196+
status="Completed",
197+
outputs={
198+
'evaluationResultId': create_evaluation_result_response.id,
199+
},
200+
properties=properties,
201+
)
202+
)
203+
204+
return update_run_response.properties.get("AiStudioEvaluationUri")
129205

130206
def _log_metrics_and_instance_results(
131207
metrics: Dict[str, Any],

sdk/evaluation/azure-ai-evaluation/tests/e2etests/test_lite_management_client.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from typing import Any, Mapping
22
import pytest
33
import logging
4-
from azure.core.credentials import AzureSasCredential, TokenCredential
4+
from azure.core.credentials import AzureSasCredential, TokenCredential, AccessToken
55
from azure.ai.evaluation._azure._clients import LiteMLClient
66

77

@@ -31,7 +31,7 @@ def test_get_token(self, project_scope, azure_cred):
3131
)
3232

3333
token = client.get_token()
34-
assert isinstance(token, str) and len(token) > 0
34+
assert isinstance(token, AccessToken) and len(token) > 0
3535

3636
@pytest.mark.azuretest
3737
@pytest.mark.parametrize("include_credentials", [False, True])

0 commit comments

Comments
 (0)