Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions lmms_eval/tasks/vsisuper/count/_default_vsc_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
dataset_path: nyu-visionx/VSI-SUPER-Count
dataset_kwargs:
token: True
cache_dir: vsisuper_count
video: True
test_split: test
output_type: generate_until
doc_to_visual: !function vsc_utils.doc_to_visual
doc_to_text: !function vsc_utils.doc_to_text
doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 16
temperature: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function vsc_utils.process_results
metric_list:
- metric: mra
aggregation: mean
higher_is_better: true
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
metadata:
- version: 0.1
6 changes: 6 additions & 0 deletions lmms_eval/tasks/vsisuper/count/vsc_10mins.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include: _default_vsc_yaml
task: vsisuper_count_10mins
process_docs: !function vsc_utils.process_docs_10mins
tag:
- vsisuper
- vsisuper_count
6 changes: 6 additions & 0 deletions lmms_eval/tasks/vsisuper/count/vsc_120mins.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include: _default_vsc_yaml
task: vsisuper_count_120mins
process_docs: !function vsc_utils.process_docs_120mins
tag:
- vsisuper
- vsisuper_count
6 changes: 6 additions & 0 deletions lmms_eval/tasks/vsisuper/count/vsc_30mins.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include: _default_vsc_yaml
task: vsisuper_count_30mins
process_docs: !function vsc_utils.process_docs_30mins
tag:
- vsisuper
- vsisuper_count
6 changes: 6 additions & 0 deletions lmms_eval/tasks/vsisuper/count/vsc_60mins.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include: _default_vsc_yaml
task: vsisuper_count_60mins
process_docs: !function vsc_utils.process_docs_60mins
tag:
- vsisuper
- vsisuper_count
64 changes: 64 additions & 0 deletions lmms_eval/tasks/vsisuper/count/vsc_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import re

import datasets
import numpy as np

from lmms_eval.tasks._task_utils.media_resolver import resolve_media_reference

CACHE_DIR = "vsisuper_count"
_NUMBER_RE = re.compile(r"-?\d+(?:\.\d+)?")


def doc_to_visual(doc):
video_path = resolve_media_reference(doc["video_path"], media_type="video", cache_dir=CACHE_DIR, env_vars=("VSISUPER_VIDEO_DIR",))
return [video_path]


def doc_to_text(doc, lmms_eval_specific_kwargs=None):
question = str(doc["question"]).strip()
return "These are frames of a video.\n" + question + "\nPlease answer the question using a single word or phrase."


def process_docs_10mins(dataset: datasets.Dataset) -> datasets.Dataset:
return dataset.filter(lambda x: x["split"] == "10mins")


def process_docs_30mins(dataset: datasets.Dataset) -> datasets.Dataset:
return dataset.filter(lambda x: x["split"] == "30mins")


def process_docs_60mins(dataset: datasets.Dataset) -> datasets.Dataset:
return dataset.filter(lambda x: x["split"] == "60mins")


def process_docs_120mins(dataset: datasets.Dataset) -> datasets.Dataset:
return dataset.filter(lambda x: x["split"] == "120mins")


def _extract_number(text):
if text is None:
return 0.0
match = _NUMBER_RE.search(str(text))
if not match:
return 0.0
return float(match.group(0))


def _mean_relative_accuracy(pred, target, start=0.5, end=0.95, interval=0.05):
target = float(target)
pred = float(pred)

if target == 0.0:
return 1.0 if pred == 0.0 else 0.0

relative_error = abs(pred - target) / abs(target)
num_pts = int((end - start) / interval) + 2
conf_intervals = np.linspace(start, end, num_pts)
return float((relative_error <= (1.0 - conf_intervals)).mean())


def process_results(doc, results):
prediction = results[0] if results else ""
target = float(doc.get("answer", 0.0) or 0.0)
pred = _extract_number(prediction)
return {"mra": _mean_relative_accuracy(pred, target)}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
dataset_path: nyu-visionx/VSI-SUPER-Count
dataset_kwargs:
token: True
cache_dir: vsisuper_count
video: True
test_split: test
output_type: generate_until
doc_to_visual: !function vsc_streaming_utils.doc_to_visual
doc_to_text: !function vsc_streaming_utils.doc_to_text
doc_to_target: "answers"
generation_kwargs:
max_new_tokens: 16
temperature: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function vsc_streaming_utils.process_results
metric_list:
- metric: mra
aggregation: mean
higher_is_better: true
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
metadata:
- version: 0.1
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include: _default_vsc_streaming_yaml
task: vsisuper_count_streaming_10mins
process_docs: !function vsc_streaming_utils.process_docs_streaming_10mins
tag:
- vsisuper
- vsisuper_count_streaming
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include: _default_vsc_streaming_yaml
task: vsisuper_count_streaming_120mins
process_docs: !function vsc_streaming_utils.process_docs_streaming_120mins
tag:
- vsisuper
- vsisuper_count_streaming
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include: _default_vsc_streaming_yaml
task: vsisuper_count_streaming_30mins
process_docs: !function vsc_streaming_utils.process_docs_streaming_30mins
tag:
- vsisuper
- vsisuper_count_streaming
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include: _default_vsc_streaming_yaml
task: vsisuper_count_streaming_60mins
process_docs: !function vsc_streaming_utils.process_docs_streaming_60mins
tag:
- vsisuper
- vsisuper_count_streaming
63 changes: 63 additions & 0 deletions lmms_eval/tasks/vsisuper/count_streaming/vsc_streaming_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import json

import datasets
import numpy as np

from lmms_eval.tasks._task_utils.media_resolver import resolve_media_reference

CACHE_DIR = "vsisuper_count"


def doc_to_visual(doc):
video_path = resolve_media_reference(doc["video_path"], media_type="video", cache_dir=CACHE_DIR, env_vars=("VSISUPER_VIDEO_DIR",))
return [video_path]


def doc_to_text(doc, lmms_eval_specific_kwargs=None):
question = str(doc["question"]).strip()
return "These are frames of a video.\n" + question + "\nPlease answer the question using a single word or phrase."


def process_docs_streaming_10mins(dataset: datasets.Dataset) -> datasets.Dataset:
return dataset.filter(lambda x: x["split"] == "10mins_streaming")


def process_docs_streaming_30mins(dataset: datasets.Dataset) -> datasets.Dataset:
return dataset.filter(lambda x: x["split"] == "30mins_streaming")


def process_docs_streaming_60mins(dataset: datasets.Dataset) -> datasets.Dataset:
return dataset.filter(lambda x: x["split"] == "60mins_streaming")


def process_docs_streaming_120mins(dataset: datasets.Dataset) -> datasets.Dataset:
return dataset.filter(lambda x: x["split"] == "120mins_streaming")


def _abs_dist_norm(pred, target):
try:
return abs(pred - target) / target
except BaseException:
return 0.0


def _mean_relative_accuracy(pred, target, start=0.5, end=0.95, interval=0.05):
num_pts = (end - start) / interval + 2
conf_intervs = np.linspace(start, end, int(num_pts))
accuracy = _abs_dist_norm(pred, target) <= 1 - conf_intervs
return float(np.mean(accuracy))


def _parse_prediction_list(text):
return json.loads(text)


def process_results(doc, results):
prediction = results[0] if results else "[]"
parsed_results = _parse_prediction_list(prediction)

mra_scores = []
for streaming_output, answer in zip(parsed_results, doc["answers"]):
mra_scores.append(_mean_relative_accuracy(streaming_output, answer))

return {"mra": float(np.mean(mra_scores)) if mra_scores else 0.0}
27 changes: 27 additions & 0 deletions lmms_eval/tasks/vsisuper/recall/_default_vsr_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
dataset_path: nyu-visionx/VSI-SUPER-Recall
dataset_kwargs:
token: True
cache_dir: vsisuper_recall
video: True
test_split: test
output_type: generate_until
doc_to_visual: !function vsr_utils.doc_to_visual
doc_to_text: !function vsr_utils.doc_to_text
doc_to_target: "answer"
generation_kwargs:
max_new_tokens: 16
temperature: 0
top_p: 1.0
num_beams: 1
do_sample: false
process_results: !function vsr_utils.process_results
metric_list:
- metric: accuracy
aggregation: mean
higher_is_better: true
lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
metadata:
- version: 0.1
6 changes: 6 additions & 0 deletions lmms_eval/tasks/vsisuper/recall/vsr_10mins.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include: _default_vsr_yaml
task: vsisuper_recall_10mins
process_docs: !function vsr_utils.process_docs_10mins
tag:
- vsisuper
- vsisuper_recall
6 changes: 6 additions & 0 deletions lmms_eval/tasks/vsisuper/recall/vsr_120mins.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include: _default_vsr_yaml
task: vsisuper_recall_120mins
process_docs: !function vsr_utils.process_docs_120mins
tag:
- vsisuper
- vsisuper_recall
6 changes: 6 additions & 0 deletions lmms_eval/tasks/vsisuper/recall/vsr_240mins.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include: _default_vsr_yaml
task: vsisuper_recall_240mins
process_docs: !function vsr_utils.process_docs_240mins
tag:
- vsisuper
- vsisuper_recall
6 changes: 6 additions & 0 deletions lmms_eval/tasks/vsisuper/recall/vsr_30mins.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include: _default_vsr_yaml
task: vsisuper_recall_30mins
process_docs: !function vsr_utils.process_docs_30mins
tag:
- vsisuper
- vsisuper_recall
6 changes: 6 additions & 0 deletions lmms_eval/tasks/vsisuper/recall/vsr_60mins.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
include: _default_vsr_yaml
task: vsisuper_recall_60mins
process_docs: !function vsr_utils.process_docs_60mins
tag:
- vsisuper
- vsisuper_recall
56 changes: 56 additions & 0 deletions lmms_eval/tasks/vsisuper/recall/vsr_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import re
from collections import OrderedDict

import datasets
from loguru import logger as eval_logger

from lmms_eval.tasks._task_utils.media_resolver import resolve_media_reference

CACHE_DIR = "vsisuper_recall"
_OPTION_RE = re.compile(r"[A-D]")


def doc_to_visual(doc):
video_path = resolve_media_reference(doc["video_path"], media_type="video", cache_dir=CACHE_DIR, env_vars=("VSISUPER_VIDEO_DIR",))
return [video_path]


def doc_to_text(doc, lmms_eval_specific_kwargs=None):
question = str(doc["question"]).strip()
options = doc.get("options") or []
options_text = "\n".join(str(option) for option in options)

return question + "\nOptions:\n" + options_text + "\nAnswer with the option's letter from the given choices directly."


def process_docs_10mins(dataset: datasets.Dataset) -> datasets.Dataset:
return dataset.filter(lambda x: x["type"] == "10mins")


def process_docs_30mins(dataset: datasets.Dataset) -> datasets.Dataset:
return dataset.filter(lambda x: x["type"] == "30mins")


def process_docs_60mins(dataset: datasets.Dataset) -> datasets.Dataset:
return dataset.filter(lambda x: x["type"] == "60mins")


def process_docs_120mins(dataset: datasets.Dataset) -> datasets.Dataset:
return dataset.filter(lambda x: x["type"] == "120mins")


def process_docs_240mins(dataset: datasets.Dataset) -> datasets.Dataset:
return dataset.filter(lambda x: x["type"] == "240mins")


def _normalize_option(text):
if text is None:
return ""
match = _OPTION_RE.search(str(text).upper())
return match.group(0) if match else ""


def process_results(doc, results):
prediction = _normalize_option(results[0] if results else "")
target = _normalize_option(doc.get("answer", ""))
return {"accuracy": 1.0 if prediction == target else 0.0}
Loading