Skip to content

Commit edb27cb

Browse files
authored
Merge pull request #241 from weaviate/add-support-for-multi2vec-jina
Add factory and unit tests for multi2vecjina module
2 parents d197556 + 3a6760a commit edb27cb

File tree

4 files changed

+129
-5
lines changed

4 files changed

+129
-5
lines changed

src/collections/config/types/vectorizer.ts

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ export type Vectorizer =
2424
| 'multi2vec-bind'
2525
| Multi2VecPalmVectorizer
2626
| 'multi2vec-google'
27+
| 'multi2vec-jinaai'
2728
| 'multi2vec-voyageai'
2829
| 'ref2vec-centroid'
2930
| 'text2vec-aws'
@@ -170,7 +171,7 @@ export type Multi2VecGoogleConfig = {
170171
videoFields?: string[];
171172
/** The model ID in use. */
172173
modelId?: string;
173-
/** The number of dimensions in use. */
174+
/** The dimensionality of the vector once embedded. */
174175
dimensions?: number;
175176
/** Whether the collection name is vectorized. */
176177
vectorizeCollectionName?: boolean;
@@ -185,6 +186,32 @@ export type Multi2VecGoogleConfig = {
185186
};
186187
};
187188

189+
/** The configuration for multi-media vectorization using the Jina module.
190+
*
191+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings-multimodal) for detailed usage.
192+
*/
193+
export type Multi2VecJinaAIConfig = {
194+
/** The base URL to use where API requests should go. */
195+
baseURL?: string;
196+
/** The dimensionality of the vector once embedded. */
197+
dimensions?: number;
198+
/** The image fields used when vectorizing. */
199+
imageFields?: string[];
200+
/** The model to use. */
201+
model?: string;
202+
/** The text fields used when vectorizing. */
203+
textFields?: string[];
204+
/** Whether the collection name is vectorized. */
205+
vectorizeCollectionName?: boolean;
206+
/** The weights of the fields used for vectorization. */
207+
weights?: {
208+
/** The weights of the image fields. */
209+
imageFields?: number[];
210+
/** The weights of the text fields. */
211+
textFields?: number[];
212+
};
213+
};
214+
188215
/** The configuration for multi-media vectorization using the VoyageAI module.
189216
*
190217
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal) for detailed usage.
@@ -359,7 +386,7 @@ export type Text2VecOllamaConfig = {
359386
export type Text2VecOpenAIConfig = {
360387
/** The base URL to use where API requests should go. */
361388
baseURL?: string;
362-
/** The dimensions to use. */
389+
/** The dimensionality of the vector once embedded. */
363390
dimensions?: number;
364391
/** The model to use. */
365392
model?: 'text-embedding-3-small' | 'text-embedding-3-large' | 'text-embedding-ada-002' | string;
@@ -434,7 +461,7 @@ export type Text2VecVoyageAIConfig = {
434461
export type Text2VecWeaviateConfig = {
435462
/** The base URL to use where API requests should go. */
436463
baseURL?: string;
437-
/** The dimensions to use. */
464+
/** The dimensionality of the vector once embedded. */
438465
dimensions?: number;
439466
/** The model to use. */
440467
model?: 'Snowflake/snowflake-arctic-embed-m-v1.5' | string;
@@ -449,6 +476,7 @@ export type VectorizerConfig =
449476
| Multi2VecClipConfig
450477
| Multi2VecBindConfig
451478
| Multi2VecGoogleConfig
479+
| Multi2VecJinaAIConfig
452480
| Multi2VecPalmConfig
453481
| Multi2VecVoyageAIConfig
454482
| Ref2VecCentroidConfig
@@ -478,6 +506,8 @@ export type VectorizerConfigType<V> = V extends 'img2vec-neural'
478506
? Multi2VecBindConfig | undefined
479507
: V extends 'multi2vec-google'
480508
? Multi2VecGoogleConfig
509+
: V extends 'multi2vec-jinaai'
510+
? Multi2VecJinaAIConfig | undefined
481511
: V extends Multi2VecPalmVectorizer
482512
? Multi2VecPalmConfig
483513
: V extends 'multi2vec-voyageai'

src/collections/configure/types/vectorizer.ts

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,19 @@ export type Multi2VecCohereConfigCreate = {
127127
vectorizeCollectionName?: boolean;
128128
};
129129

130+
export type Multi2VecJinaAIConfigCreate = {
131+
/** The base URL to use where API requests should go. */
132+
baseURL?: string;
133+
/** The dimensionality of the vector once embedded. */
134+
dimensions?: number;
135+
/** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
136+
imageFields?: string[] | Multi2VecField[];
137+
/** The text fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
138+
textFields?: string[] | Multi2VecField[];
139+
/** Whether to vectorize the collection name. */
140+
vectorizeCollectionName?: boolean;
141+
};
142+
130143
/** @deprecated Use `Multi2VecGoogleConfigCreate` instead.*/
131144
export type Multi2VecPalmConfigCreate = Multi2VecGoogleConfigCreate;
132145

@@ -144,7 +157,7 @@ export type Multi2VecGoogleConfigCreate = {
144157
videoFields?: string[] | Multi2VecField[];
145158
/** The model ID to use. */
146159
modelId?: string;
147-
/** The number of dimensions to use. */
160+
/** The dimensionality of the vector once embedded. */
148161
dimensions?: number;
149162
/** Whether to vectorize the collection name. */
150163
vectorizeCollectionName?: boolean;
@@ -200,6 +213,8 @@ export type VectorizerConfigCreateType<V> = V extends 'img2vec-neural'
200213
? Multi2VecCohereConfigCreate | undefined
201214
: V extends 'multi2vec-bind'
202215
? Multi2VecBindConfigCreate | undefined
216+
: V extends 'multi2vec-jinaai'
217+
? Multi2VecJinaAIConfigCreate | undefined
203218
: V extends 'multi2vec-palm'
204219
? Multi2VecPalmConfigCreate
205220
: V extends 'multi2vec-google'

src/collections/configure/unit.test.ts

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,53 @@ describe('Unit testing of the vectorizer factory class', () => {
621621
},
622622
});
623623
});
624-
624+
it('should create the correct Multi2VecJinaAIConfig type with defaults', () => {
625+
const config = configure.vectorizer.multi2VecJinaAI();
626+
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'multi2vec-jinaai'>>({
627+
name: undefined,
628+
vectorIndex: {
629+
name: 'hnsw',
630+
config: undefined,
631+
},
632+
vectorizer: {
633+
name: 'multi2vec-jinaai',
634+
config: undefined,
635+
},
636+
});
637+
});
638+
it('should create the correct Multi2VecJinaAIConfig type with all values and weights', () => {
639+
const config = configure.vectorizer.multi2VecJinaAI({
640+
name: 'test',
641+
imageFields: [
642+
{ name: 'field1', weight: 0.1 },
643+
{ name: 'field2', weight: 0.2 },
644+
],
645+
textFields: [
646+
{ name: 'field3', weight: 0.3 },
647+
{ name: 'field4', weight: 0.4 },
648+
],
649+
vectorizeCollectionName: true,
650+
});
651+
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'multi2vec-jinaai'>>({
652+
name: 'test',
653+
vectorIndex: {
654+
name: 'hnsw',
655+
config: undefined,
656+
},
657+
vectorizer: {
658+
name: 'multi2vec-jinaai',
659+
config: {
660+
imageFields: ['field1', 'field2'],
661+
textFields: ['field3', 'field4'],
662+
vectorizeCollectionName: true,
663+
weights: {
664+
imageFields: [0.1, 0.2],
665+
textFields: [0.3, 0.4],
666+
},
667+
},
668+
},
669+
});
670+
});
625671
it('should create the correct Multi2VecPalmConfig type using deprecated method with defaults', () => {
626672
const config = configure.vectorizer.multi2VecPalm({
627673
projectId: 'project-id',

src/collections/configure/vectorizer.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,39 @@ export const vectorizer = {
196196
},
197197
});
198198
},
199+
/**
200+
* Create a `VectorConfigCreate` object with the vectorizer set to `'multi2vec-jinaai'`.
201+
*
202+
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings-multimodal) for detailed usage.
203+
*
204+
* @param {ConfigureNonTextVectorizerOptions<N, I, 'multi2vec-jinaai'>} [opts] The configuration options for the `multi2vec-jinaai` vectorizer.
205+
* @returns {VectorConfigCreate<PrimitiveKeys<T>[], N, I, 'multi2vec-jinaai'>} The configuration object.
206+
*/
207+
multi2VecJinaAI: <N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
208+
opts?: ConfigureNonTextVectorizerOptions<N, I, 'multi2vec-jinaai'>
209+
): VectorConfigCreate<never, N, I, 'multi2vec-jinaai'> => {
210+
const { name, vectorIndexConfig, ...config } = opts || {};
211+
const imageFields = config.imageFields?.map(mapMulti2VecField);
212+
const textFields = config.textFields?.map(mapMulti2VecField);
213+
let weights: Multi2VecBindConfig['weights'] = {};
214+
weights = formatMulti2VecFields(weights, 'imageFields', imageFields);
215+
weights = formatMulti2VecFields(weights, 'textFields', textFields);
216+
return makeVectorizer(name, {
217+
vectorIndexConfig,
218+
vectorizerConfig: {
219+
name: 'multi2vec-jinaai',
220+
config:
221+
Object.keys(config).length === 0
222+
? undefined
223+
: {
224+
...config,
225+
imageFields: imageFields?.map((f) => f.name),
226+
textFields: textFields?.map((f) => f.name),
227+
weights: Object.keys(weights).length === 0 ? undefined : weights,
228+
},
229+
},
230+
});
231+
},
199232
/**
200233
* Create a `VectorConfigCreate` object with the vectorizer set to `'multi2vec-palm'`.
201234
*

0 commit comments

Comments
 (0)