googleapis · copybara-service · Jul 23, 2025
diff --git a/tests/unit/vertexai/genai/replays/test_evaluate.py b/tests/unit/vertexai/genai/replays/test_evaluate.py
@@ -19,6 +19,7 @@
 import pandas as pd
 
 
+# test change
 def test_evaluation_result(client):
     """Tests that evaluate() produces a correctly structured EvaluationResult."""
     prompts_df = pd.DataFrame({"prompt": ["What is Taylor Swift's most recent album?"]})

diff --git a/tests/unit/vertexai/genai/replays/test_resources/mock_eval_config.yaml b/tests/unit/vertexai/genai/replays/test_resources/mock_eval_config.yaml
@@ -0,0 +1,92 @@
+name: text_quality
+prompt_template: '
+
+  # Instruction
+
+  You are an expert evaluator. Your task is to evaluate the quality of the responses
+  generated by AI models.
+
+  We will provide you with the user input and an AI-generated response.
+
+  You should first read the user input carefully for analyzing the task, and then
+  evaluate the quality of the responses based on the Criteria provided in the Evaluation
+  section below.
+
+
+  You will assign the response a rating following the Rating Rubric and Evaluation
+  Steps. Give step-by-step explanations for your rating, and only choose ratings from
+  the Rating Rubric.
+
+
+  # Evaluation
+
+  ## Metric Definition
+
+  You will be assessing Text Quality, which measures how effectively the text conveys
+  clear, accurate, and engaging information that directly addresses the user''s prompt,
+  considering factors like fluency, coherence, relevance, and conciseness.
+
+
+  ## Criteria
+
+  Coherence: The response presents ideas in a logical and organized manner, with clear
+  transitions and a consistent focus, making it easy to follow and understand.
+
+  Fluency: The text flows smoothly and naturally, adhering to grammatical rules and
+  using appropriate vocabulary.
+
+  Instruction following: The response demonstrates a clear understanding of the task
+  instructions, satisfying all of the instruction''s requirements.
+
+  Groundedness: The response contains information included only in the context. The
+  response does not reference any outside information.
+
+  Verbosity: The response is appropriately concise, providing sufficient detail without
+  using complex language to thoroughly address the prompt without being overly wordy
+  or excessively brief.
+
+
+  ## Rating Rubric
+
+  5: (Very good). Exceptionally clear, coherent, fluent, and concise. Fully adheres
+  to instructions and stays grounded.
+
+  4: (Good). Well-written, coherent, and fluent. Mostly adheres to instructions and
+  stays grounded. Minor room for improvement.
+
+  3: (Ok). Adequate writing with decent coherence and fluency. Partially fulfills
+  instructions and may contain minor ungrounded information. Could be more concise.
+
+  2: (Bad). Poorly written, lacking coherence and fluency. Struggles to adhere to
+  instructions and may include ungrounded information. Issues with conciseness.
+
+  1: (Very bad). Very poorly written, incoherent, and non-fluent. Fails to follow
+  instructions and contains substantial ungrounded information. Severely lacking in
+  conciseness.
+
+
+
+  ## Evaluation Steps
+
+  STEP 1: Assess the response in aspects of all criteria provided. Provide assessment
+  according to each criterion.
+
+  STEP 2: Score based on the rating rubric. Give a brief rationale to explain your
+  evaluation considering each individual criterion.
+
+
+  # User Inputs and AI-generated Response
+
+  ## User Inputs
+
+  ### Prompt
+
+  {prompt}
+
+
+  ## AI-generated Response
+
+  {response}
+
+  '
+version: v1