Skip to content

Commit 701f9c0

Browse files
[Minor] Support cmd vlmutil scan
1 parent 2f1f6c8 commit 701f9c0

File tree

2 files changed

+80
-2
lines changed

2 files changed

+80
-2
lines changed

vlmeval/tools.py

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from vlmeval.smp import *
55

66
# Define valid modes
7-
MODES = ('dlist', 'mlist', 'missing', 'circular', 'localize', 'check', 'run', 'eval', 'merge_pkl')
7+
MODES = ('dlist', 'mlist', 'missing', 'circular', 'localize', 'check', 'run', 'eval', 'merge_pkl', 'scan')
88

99
CLI_HELP_MSG = \
1010
f"""
@@ -35,7 +35,8 @@
3535
vlmutil eval [dataset_name] [prediction_file]
3636
9. Merge pkl files:
3737
vlmutil merge_pkl [pkl_dir] [world_size]
38-
38+
10. Scan evaluation results and detect api failure
39+
vlmutil scan --model [model_list.txt or model_names] --data [dataset_names] --root [root_dir]
3940
GitHub: https://github.com/open-compass/VLMEvalKit
4041
""" # noqa: E501
4142

@@ -395,6 +396,15 @@ def parse_args_eval():
395396
return args
396397

397398

399+
def parse_args_scan():
400+
parser = argparse.ArgumentParser()
401+
parser.add_argument('--model', type=str, nargs='+')
402+
parser.add_argument('--data', type=str, nargs='+')
403+
parser.add_argument('--root', type=str, default=None)
404+
args, unknownargs = parser.parse_known_args()
405+
return args, unknownargs
406+
407+
398408
def MERGE_PKL(pkl_dir, world_size=1):
399409
prefs = []
400410
for ws in list(range(1, 9)):
@@ -416,6 +426,53 @@ def MERGE_PKL(pkl_dir, world_size=1):
416426
dump(res_all[k], f'{pkl_dir}/{pf}{k}')
417427
print(f'Merged {len(res_all[k])} records into {pkl_dir}/{dump_prefs[0]}{k}')
418428

429+
430+
def SCAN(root, model, dataset):
431+
from termcolor import colored
432+
FAIL_MSG = 'Failed to obtain answer via API.'
433+
root = osp.join(root, model)
434+
fname = f'{model}_{dataset}.xlsx'
435+
pth = osp.join(root, fname)
436+
if osp.exists(pth):
437+
data = load(pth)
438+
# Detect Failure
439+
assert 'prediction' in data
440+
data['prediction'] = [str(x) for x in data['prediction']]
441+
fail = [FAIL_MSG in x for x in data['prediction']]
442+
if sum(fail):
443+
nfail = sum(fail)
444+
ntot = len(fail)
445+
print(colored(f'Model {model} x Dataset {dataset} Inference: {nfail} out of {ntot} failed. {nfail / ntot * 100: .2f}%. ', 'light_red'))
446+
447+
eval_files = ls(root, match=f'{model}_{dataset}_')
448+
eval_files = [x for x in eval_files if listinstr([f'{dataset}_openai', f'{dataset}_gpt'], x) and x.endswith('.xlsx')]
449+
450+
if len(eval_files) == 0:
451+
return
452+
453+
for eval_file in eval_files:
454+
data = load(eval_file)
455+
456+
if 'MMVet' in dataset:
457+
bad = [x for x in data['log'] if 'All 5 retries failed.' in str(x)]
458+
if len(bad):
459+
print(f'Evaluation ({eval_file}): {len(bad)} out of {len(data)} failed.')
460+
elif 'MathVista' in dataset:
461+
bad = [x for x in data['res'] if FAIL_MSG in str(x)]
462+
if len(bad):
463+
print(f'Evaluation ({eval_file}): {len(bad)} out of {len(data)} failed.')
464+
elif dataset == 'LLaVABench':
465+
sub = data[data['gpt4_score'] == -1]
466+
sub = sub[sub['gpt4_score'] == -1]
467+
if len(sub):
468+
print(f'Evaluation ({eval_file}): {len(sub)} out of {len(data)} failed.')
469+
else:
470+
bad = [x for x in data['log'] if FAIL_MSG in str(x)]
471+
if len(bad):
472+
print(f'Evaluation ({eval_file}): {len(bad)} out of {len(data)} failed.')
473+
else:
474+
print(colored(f'Model {model} x Dataset {dataset} Inference Result Missing! ', 'red'))
475+
419476

420477
def cli():
421478
logger = get_logger('VLMEvalKit Tools')
@@ -491,6 +548,25 @@ def extract_dataset(file_name):
491548
args[2] = int(args[2])
492549
assert args[2] in [1, 2, 4, 8]
493550
MERGE_PKL(args[1], args[2])
551+
elif args[0].lower() == 'scan':
552+
args, unknownargs = parse_args_scan()
553+
# The default value is only for the maintainer usage
554+
root = args.root if args.root is not None else osp.join(osp.expanduser('~'), 'mmeval')
555+
models = []
556+
for m in args.model:
557+
if osp.exists(m) and m.endswith('.txt'):
558+
lines = mrlines(m)
559+
models.extend([x.split()[0] for x in lines if len(x.split()) >= 1])
560+
else:
561+
models.append(m)
562+
datasets = args.data
563+
assert len(datasets)
564+
for m in models:
565+
if not osp.exists(osp.join(root, m)):
566+
warnings.warn(f'Model {m} not found in {root}')
567+
continue
568+
for d in datasets:
569+
SCAN(root, m, d)
494570
else:
495571
logger.error('WARNING: command error!')
496572
logger.info(CLI_HELP_MSG)

vlmeval/vlm/vita.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ class VITAQwen2(BaseModel):
1717
IMAGE_TOKEN_INDEX = -200
1818

1919
def __init__(self, model_path='VITA/vita', root=None, **kwargs):
20+
if root is None:
21+
root = os.environ.get('VITA_ROOT', '')
2022
sys.path.append(root)
2123
assert model_path is not None
2224
try:

0 commit comments

Comments
 (0)