Skip to content

Commit 55f85ef

Browse files
akolesnikoffLucas Beyerxiaohuazhai
committed
image-text proj update (#55)
Co-authored-by: Lucas Beyer <[email protected]> Co-authored-by: Xiaohua Zhai <[email protected]>
1 parent 538b8a8 commit 55f85ef

22 files changed

+237
-258
lines changed

big_vision/configs/proj/image_text/common.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2022 Big Vision Authors.
1+
# Copyright 2023 Big Vision Authors.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -61,7 +61,7 @@ def _drop_no_imagenet(f):
6161
'class_names_dataset_name': 'imagenet2012',
6262
'pp_img': lambda sz: (
6363
_square875(sz) + '|pad_to_shape(inkey="real_label", outkey="label", shape=[10], pad_value=-1)|keep("label", "image")'), # pylint: disable=line-too-long
64-
'filter_fn': _drop_no_real_label,
64+
'pre_filter_fn': _drop_no_real_label,
6565
},
6666
'imagenet_v2': {'class_names': 'clip'},
6767
'imagenet_a': {
@@ -75,16 +75,16 @@ def _drop_no_imagenet(f):
7575
}
7676

7777

78-
def get_disclf(sz, *, log_steps, pp_txt=None, dataset_names=('imagenet2012',)):
78+
def get_disclf(sz, *, pp_txt=None, dataset_names=('imagenet2012',), **kw):
7979
"""Returns config for discriminative_classifier of specified datasets."""
8080
config = ml_collections.ConfigDict(dict(
8181
dataset_names=list(dataset_names),
8282
type='proj.image_text.discriminative_classifier',
8383
prefix='z/0shot/',
8484
pp_img=_square875(sz),
8585
dataset_overrides={},
86-
log_steps=log_steps,
8786
cache_final=True,
87+
**kw,
8888
))
8989
if pp_txt:
9090
config.pp_txt = pp_txt
@@ -100,18 +100,18 @@ def get_disclf(sz, *, log_steps, pp_txt=None, dataset_names=('imagenet2012',)):
100100

101101
def get_coco(
102102
*,
103-
log_steps,
104103
pp_img='resize(224)|value_range(-1, 1)',
105104
pp_txt='tokenize(max_len=16, inkey="texts", eos="sticky", pad_value=1)',
106-
prefix='z/retr/coco_'):
105+
prefix='z/retr/coco_',
106+
**kw):
107107
"""Returns config for mscoco retrieval zero-shot.
108108
109109
Args:
110-
log_steps: How often the evaluators should be run.
111110
pp_img: Pre-processing string for "image" feature.
112111
pp_txt: Pre-processing string for texts (expected to tokenize "texts" to
113112
"labels").
114113
prefix: Prefix to use for metrics.
114+
**kw: Other config settings, most notably log_{steps,percent,...}.
115115
116116
Returns:
117117
`ConfigDict` that can be used as a retrieval evaluator configuration.
@@ -123,5 +123,5 @@ def get_coco(
123123
'prefix': prefix,
124124
'dataset': 'coco_captions',
125125
'txt_name': ('captions', 'text'),
126-
'log_steps': log_steps,
126+
**kw,
127127
})

big_vision/configs/proj/image_text/lit.ipynb

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

big_vision/configs/proj/image_text/lit_coco.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2022 Big Vision Authors.
1+
# Copyright 2023 Big Vision Authors.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -129,14 +129,11 @@ def get_config(arg=None):
129129
]
130130

131131
config.grad_clip_norm = 1.0
132-
# Gather representations across TPU cores for larger batch size for loss.
133-
# See Figure 9 from https://arxiv.org/abs/2111.07991
134-
config.loss_use_global_batch = True
135132

136133
# Eval section (Both few-shot and zero-shot)
137134
eval_common = dict(
138135
type='proj.image_text.contrastive',
139-
use_global_batch=config.loss_use_global_batch,
136+
use_global_batch=True,
140137
log_steps=500 if not arg.runlocal else 5,
141138
)
142139
config.evals = {}

big_vision/evaluators/proj/image_text/contrastive.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright 2022 Big Vision Authors.
1+
# Copyright 2023 Big Vision Authors.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.

0 commit comments

Comments
 (0)