Skip to content

Commit 95c1563

Browse files
authored
Merge pull request #242 from weaviate/modules/fix-jina-and-voyage
Fix inputs and mappings for JinaAI and VoyageAI modules
2 parents 8078936 + f228770 commit 95c1563

File tree

4 files changed

+103
-31
lines changed

4 files changed

+103
-31
lines changed

src/collections/config/types/vectorizer.ts

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ export type Vectorizer =
3434
| 'text2vec-databricks'
3535
| 'text2vec-gpt4all'
3636
| 'text2vec-huggingface'
37-
| 'text2vec-jina'
37+
| 'text2vec-jinaai'
3838
| 'text2vec-mistral'
3939
| 'text2vec-ollama'
4040
| 'text2vec-openai'
@@ -217,10 +217,18 @@ export type Multi2VecJinaAIConfig = {
217217
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal) for detailed usage.
218218
*/
219219
export type Multi2VecVoyageAIConfig = {
220+
/** The base URL to use where API requests should go. */
221+
baseURL?: string;
220222
/** The image fields used when vectorizing. */
221223
imageFields?: string[];
224+
/** The model to use. */
225+
model?: string;
222226
/** The text fields used when vectorizing. */
223227
textFields?: string[];
228+
/** Whether the input should be truncated to fit in the context window. */
229+
truncate?: boolean;
230+
/** Whether the collection name is vectorized. */
231+
vectorizeCollectionName?: boolean;
224232
/** The weights of the fields used for vectorization. */
225233
weights?: {
226234
/** The weights of the image fields. */
@@ -282,7 +290,7 @@ export type Text2VecCohereConfig = {
282290
baseURL?: string;
283291
/** The model to use. */
284292
model?: string;
285-
/** The truncation strategy to use. */
293+
/** Whether to truncate the input texts to fit within the context length. */
286294
truncate?: boolean;
287295
/** Whether to vectorize the collection name. */
288296
vectorizeCollectionName?: boolean;
@@ -345,13 +353,16 @@ export type Text2VecHuggingFaceConfig = {
345353
*
346354
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings) for detailed usage.
347355
*/
348-
export type Text2VecJinaConfig = {
356+
export type Text2VecJinaAIConfig = {
349357
/** The model to use. */
350358
model?: 'jina-embeddings-v2-base-en' | 'jina-embeddings-v2-small-en' | string;
351359
/** Whether to vectorize the collection name. */
352360
vectorizeCollectionName?: boolean;
353361
};
354362

363+
/** @deprecated Use `Text2VecJinaAIConfig` instead. */
364+
export type Text2VecJinaConfig = Text2VecJinaAIConfig;
365+
355366
/**
356367
* The configuration for text vectorization using the Mistral module.
357368
*
@@ -488,7 +499,7 @@ export type VectorizerConfig =
488499
| Text2VecGoogleConfig
489500
| Text2VecGPT4AllConfig
490501
| Text2VecHuggingFaceConfig
491-
| Text2VecJinaConfig
502+
| Text2VecJinaAIConfig
492503
| Text2VecOpenAIConfig
493504
| Text2VecPalmConfig
494505
| Text2VecTransformersConfig
@@ -528,8 +539,8 @@ export type VectorizerConfigType<V> = V extends 'img2vec-neural'
528539
? Text2VecGPT4AllConfig | undefined
529540
: V extends 'text2vec-huggingface'
530541
? Text2VecHuggingFaceConfig | undefined
531-
: V extends 'text2vec-jina'
532-
? Text2VecJinaConfig | undefined
542+
: V extends 'text2vec-jinaai'
543+
? Text2VecJinaAIConfig | undefined
533544
: V extends 'text2vec-mistral'
534545
? Text2VecMistralConfig | undefined
535546
: V extends 'text2vec-ollama'

src/collections/configure/types/vectorizer.ts

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ import {
1111
Text2VecGPT4AllConfig,
1212
Text2VecGoogleConfig,
1313
Text2VecHuggingFaceConfig,
14-
Text2VecJinaConfig,
14+
Text2VecJinaAIConfig,
1515
Text2VecMistralConfig,
1616
Text2VecOllamaConfig,
1717
Text2VecOpenAIConfig,
@@ -132,6 +132,8 @@ export type Multi2VecJinaAIConfigCreate = {
132132
baseURL?: string;
133133
/** The dimensionality of the vector once embedded. */
134134
dimensions?: number;
135+
/** The model to use. */
136+
model?: string;
135137
/** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
136138
imageFields?: string[] | Multi2VecField[];
137139
/** The text fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
@@ -164,10 +166,18 @@ export type Multi2VecGoogleConfigCreate = {
164166
};
165167

166168
export type Multi2VecVoyageAIConfigCreate = {
169+
/** The base URL to use where API requests should go. */
170+
baseURL?: string;
167171
/** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
168172
imageFields?: string[] | Multi2VecField[];
173+
/** The model to use. */
174+
model?: string;
169175
/** The text fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
170176
textFields?: string[] | Multi2VecField[];
177+
/** Whether the input should be truncated to fit the context window. */
178+
truncate?: boolean;
179+
/** Whether to vectorize the collection name. */
180+
vectorizeCollectionName?: boolean;
171181
};
172182

173183
export type Ref2VecCentroidConfigCreate = Ref2VecCentroidConfig;
@@ -186,7 +196,7 @@ export type Text2VecGPT4AllConfigCreate = Text2VecGPT4AllConfig;
186196

187197
export type Text2VecHuggingFaceConfigCreate = Text2VecHuggingFaceConfig;
188198

189-
export type Text2VecJinaConfigCreate = Text2VecJinaConfig;
199+
export type Text2VecJinaAIConfigCreate = Text2VecJinaAIConfig;
190200

191201
export type Text2VecMistralConfigCreate = Text2VecMistralConfig;
192202

@@ -235,8 +245,8 @@ export type VectorizerConfigCreateType<V> = V extends 'img2vec-neural'
235245
? Text2VecGPT4AllConfigCreate | undefined
236246
: V extends 'text2vec-huggingface'
237247
? Text2VecHuggingFaceConfigCreate | undefined
238-
: V extends 'text2vec-jina'
239-
? Text2VecJinaConfigCreate | undefined
248+
: V extends 'text2vec-jinaai'
249+
? Text2VecJinaAIConfigCreate | undefined
240250
: V extends 'text2vec-mistral'
241251
? Text2VecMistralConfigCreate | undefined
242252
: V extends 'text2vec-ollama'

src/collections/configure/unit.test.ts

Lines changed: 56 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -621,6 +621,7 @@ describe('Unit testing of the vectorizer factory class', () => {
621621
},
622622
});
623623
});
624+
624625
it('should create the correct Multi2VecJinaAIConfig type with defaults', () => {
625626
const config = configure.vectorizer.multi2VecJinaAI();
626627
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'multi2vec-jinaai'>>({
@@ -635,6 +636,7 @@ describe('Unit testing of the vectorizer factory class', () => {
635636
},
636637
});
637638
});
639+
638640
it('should create the correct Multi2VecJinaAIConfig type with all values and weights', () => {
639641
const config = configure.vectorizer.multi2VecJinaAI({
640642
name: 'test',
@@ -668,6 +670,7 @@ describe('Unit testing of the vectorizer factory class', () => {
668670
},
669671
});
670672
});
673+
671674
it('should create the correct Multi2VecPalmConfig type using deprecated method with defaults', () => {
672675
const config = configure.vectorizer.multi2VecPalm({
673676
projectId: 'project-id',
@@ -771,6 +774,51 @@ describe('Unit testing of the vectorizer factory class', () => {
771774
});
772775
});
773776

777+
it('should create the correct Multi2VecVoyageAIConfig type with defaults', () => {
778+
const config = configure.vectorizer.multi2VecVoyageAI();
779+
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'multi2vec-voyageai'>>({
780+
name: undefined,
781+
vectorIndex: {
782+
name: 'hnsw',
783+
config: undefined,
784+
},
785+
vectorizer: {
786+
name: 'multi2vec-voyageai',
787+
config: undefined,
788+
},
789+
});
790+
});
791+
792+
it('should create the correct Multi2VecVoyageAIConfig type with all values', () => {
793+
const config = configure.vectorizer.multi2VecVoyageAI({
794+
baseURL: 'base-url',
795+
model: 'model',
796+
name: 'test',
797+
truncate: true,
798+
imageFields: ['field1', 'field2'],
799+
textFields: ['field3', 'field4'],
800+
vectorizeCollectionName: true,
801+
});
802+
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'multi2vec-voyageai'>>({
803+
name: 'test',
804+
vectorIndex: {
805+
name: 'hnsw',
806+
config: undefined,
807+
},
808+
vectorizer: {
809+
name: 'multi2vec-voyageai',
810+
config: {
811+
baseURL: 'base-url',
812+
model: 'model',
813+
truncate: true,
814+
imageFields: ['field1', 'field2'],
815+
textFields: ['field3', 'field4'],
816+
vectorizeCollectionName: true,
817+
},
818+
},
819+
});
820+
});
821+
774822
it('should create the correct Text2VecAWSConfig type with defaults', () => {
775823
const config = configure.vectorizer.text2VecAWS({
776824
region: 'region',
@@ -1071,35 +1119,35 @@ describe('Unit testing of the vectorizer factory class', () => {
10711119
});
10721120
});
10731121

1074-
it('should create the correct Text2VecJinaConfig type with defaults', () => {
1075-
const config = configure.vectorizer.text2VecJina();
1076-
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'text2vec-jina'>>({
1122+
it('should create the correct Text2VecJinaAIConfig type with defaults', () => {
1123+
const config = configure.vectorizer.text2VecJinaAI();
1124+
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'text2vec-jinaai'>>({
10771125
name: undefined,
10781126
vectorIndex: {
10791127
name: 'hnsw',
10801128
config: undefined,
10811129
},
10821130
vectorizer: {
1083-
name: 'text2vec-jina',
1131+
name: 'text2vec-jinaai',
10841132
config: undefined,
10851133
},
10861134
});
10871135
});
10881136

1089-
it('should create the correct Text2VecJinaConfig type with all values', () => {
1090-
const config = configure.vectorizer.text2VecJina({
1137+
it('should create the correct Text2VecJinaAIConfig type with all values', () => {
1138+
const config = configure.vectorizer.text2VecJinaAI({
10911139
name: 'test',
10921140
model: 'model',
10931141
vectorizeCollectionName: true,
10941142
});
1095-
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'text2vec-jina'>>({
1143+
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'text2vec-jinaai'>>({
10961144
name: 'test',
10971145
vectorIndex: {
10981146
name: 'hnsw',
10991147
config: undefined,
11001148
},
11011149
vectorizer: {
1102-
name: 'text2vec-jina',
1150+
name: 'text2vec-jinaai',
11031151
config: {
11041152
model: 'model',
11051153
vectorizeCollectionName: true,

src/collections/configure/vectorizer.ts

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -318,12 +318,15 @@ export const vectorizer = {
318318
vectorIndexConfig,
319319
vectorizerConfig: {
320320
name: 'multi2vec-voyageai',
321-
config: {
322-
...config,
323-
imageFields: imageFields?.map((f) => f.name),
324-
textFields: textFields?.map((f) => f.name),
325-
weights: Object.keys(weights).length === 0 ? undefined : weights,
326-
},
321+
config:
322+
Object.keys(config).length === 0
323+
? undefined
324+
: {
325+
...config,
326+
imageFields: imageFields?.map((f) => f.name),
327+
textFields: textFields?.map((f) => f.name),
328+
weights: Object.keys(weights).length === 0 ? undefined : weights,
329+
},
327330
},
328331
});
329332
},
@@ -495,22 +498,22 @@ export const vectorizer = {
495498
});
496499
},
497500
/**
498-
* Create a `VectorConfigCreate` object with the vectorizer set to `'text2vec-jina'`.
501+
* Create a `VectorConfigCreate` object with the vectorizer set to `'text2vec-jinaai'`.
499502
*
500503
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings) for detailed usage.
501504
*
502-
* @param {ConfigureTextVectorizerOptions<T, N, I, 'text2vec-jina'>} [opts] The configuration for the `text2vec-jina` vectorizer.
503-
* @returns {VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-jina'>} The configuration object.
505+
* @param {ConfigureTextVectorizerOptions<T, N, I, 'text2vec-jinaai'>} [opts] The configuration for the `text2vec-jinaai` vectorizer.
506+
* @returns {VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-jinaai'>} The configuration object.
504507
*/
505-
text2VecJina: <T, N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
506-
opts?: ConfigureTextVectorizerOptions<T, N, I, 'text2vec-jina'>
507-
): VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-jina'> => {
508+
text2VecJinaAI: <T, N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
509+
opts?: ConfigureTextVectorizerOptions<T, N, I, 'text2vec-jinaai'>
510+
): VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-jinaai'> => {
508511
const { name, sourceProperties, vectorIndexConfig, ...config } = opts || {};
509512
return makeVectorizer(name, {
510513
sourceProperties,
511514
vectorIndexConfig,
512515
vectorizerConfig: {
513-
name: 'text2vec-jina',
516+
name: 'text2vec-jinaai',
514517
config: Object.keys(config).length === 0 ? undefined : config,
515518
},
516519
});

0 commit comments

Comments
 (0)