Skip to content

Commit

Permalink
Merge pull request #242 from weaviate/modules/fix-jina-and-voyage
Browse files Browse the repository at this point in the history
Fix inputs and mappings for JinaAI and VoyageAI modules
  • Loading branch information
tsmith023 authored Jan 3, 2025
2 parents 8078936 + f228770 commit 95c1563
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 31 deletions.
23 changes: 17 additions & 6 deletions src/collections/config/types/vectorizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ export type Vectorizer =
| 'text2vec-databricks'
| 'text2vec-gpt4all'
| 'text2vec-huggingface'
| 'text2vec-jina'
| 'text2vec-jinaai'
| 'text2vec-mistral'
| 'text2vec-ollama'
| 'text2vec-openai'
Expand Down Expand Up @@ -217,10 +217,18 @@ export type Multi2VecJinaAIConfig = {
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/transformers/embeddings-multimodal) for detailed usage.
*/
export type Multi2VecVoyageAIConfig = {
/** The base URL to use where API requests should go. */
baseURL?: string;
/** The image fields used when vectorizing. */
imageFields?: string[];
/** The model to use. */
model?: string;
/** The text fields used when vectorizing. */
textFields?: string[];
/** Whether the input should be truncated to fit in the context window. */
truncate?: boolean;
/** Whether the collection name is vectorized. */
vectorizeCollectionName?: boolean;
/** The weights of the fields used for vectorization. */
weights?: {
/** The weights of the image fields. */
Expand Down Expand Up @@ -282,7 +290,7 @@ export type Text2VecCohereConfig = {
baseURL?: string;
/** The model to use. */
model?: string;
/** The truncation strategy to use. */
/** Whether to truncate the input texts to fit within the context length. */
truncate?: boolean;
/** Whether to vectorize the collection name. */
vectorizeCollectionName?: boolean;
Expand Down Expand Up @@ -345,13 +353,16 @@ export type Text2VecHuggingFaceConfig = {
*
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings) for detailed usage.
*/
export type Text2VecJinaConfig = {
export type Text2VecJinaAIConfig = {
/** The model to use. */
model?: 'jina-embeddings-v2-base-en' | 'jina-embeddings-v2-small-en' | string;
/** Whether to vectorize the collection name. */
vectorizeCollectionName?: boolean;
};

/** @deprecated Use `Text2VecJinaAIConfig` instead. */
export type Text2VecJinaConfig = Text2VecJinaAIConfig;

/**
* The configuration for text vectorization using the Mistral module.
*
Expand Down Expand Up @@ -488,7 +499,7 @@ export type VectorizerConfig =
| Text2VecGoogleConfig
| Text2VecGPT4AllConfig
| Text2VecHuggingFaceConfig
| Text2VecJinaConfig
| Text2VecJinaAIConfig
| Text2VecOpenAIConfig
| Text2VecPalmConfig
| Text2VecTransformersConfig
Expand Down Expand Up @@ -528,8 +539,8 @@ export type VectorizerConfigType<V> = V extends 'img2vec-neural'
? Text2VecGPT4AllConfig | undefined
: V extends 'text2vec-huggingface'
? Text2VecHuggingFaceConfig | undefined
: V extends 'text2vec-jina'
? Text2VecJinaConfig | undefined
: V extends 'text2vec-jinaai'
? Text2VecJinaAIConfig | undefined
: V extends 'text2vec-mistral'
? Text2VecMistralConfig | undefined
: V extends 'text2vec-ollama'
Expand Down
18 changes: 14 additions & 4 deletions src/collections/configure/types/vectorizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import {
Text2VecGPT4AllConfig,
Text2VecGoogleConfig,
Text2VecHuggingFaceConfig,
Text2VecJinaConfig,
Text2VecJinaAIConfig,
Text2VecMistralConfig,
Text2VecOllamaConfig,
Text2VecOpenAIConfig,
Expand Down Expand Up @@ -132,6 +132,8 @@ export type Multi2VecJinaAIConfigCreate = {
baseURL?: string;
/** The dimensionality of the vector once embedded. */
dimensions?: number;
/** The model to use. */
model?: string;
/** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
imageFields?: string[] | Multi2VecField[];
/** The text fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
Expand Down Expand Up @@ -164,10 +166,18 @@ export type Multi2VecGoogleConfigCreate = {
};

export type Multi2VecVoyageAIConfigCreate = {
/** The base URL to use where API requests should go. */
baseURL?: string;
/** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
imageFields?: string[] | Multi2VecField[];
/** The model to use. */
model?: string;
/** The text fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
textFields?: string[] | Multi2VecField[];
/** Whether the input should be truncated to fit the context window. */
truncate?: boolean;
/** Whether to vectorize the collection name. */
vectorizeCollectionName?: boolean;
};

export type Ref2VecCentroidConfigCreate = Ref2VecCentroidConfig;
Expand All @@ -186,7 +196,7 @@ export type Text2VecGPT4AllConfigCreate = Text2VecGPT4AllConfig;

export type Text2VecHuggingFaceConfigCreate = Text2VecHuggingFaceConfig;

export type Text2VecJinaConfigCreate = Text2VecJinaConfig;
export type Text2VecJinaAIConfigCreate = Text2VecJinaAIConfig;

export type Text2VecMistralConfigCreate = Text2VecMistralConfig;

Expand Down Expand Up @@ -235,8 +245,8 @@ export type VectorizerConfigCreateType<V> = V extends 'img2vec-neural'
? Text2VecGPT4AllConfigCreate | undefined
: V extends 'text2vec-huggingface'
? Text2VecHuggingFaceConfigCreate | undefined
: V extends 'text2vec-jina'
? Text2VecJinaConfigCreate | undefined
: V extends 'text2vec-jinaai'
? Text2VecJinaAIConfigCreate | undefined
: V extends 'text2vec-mistral'
? Text2VecMistralConfigCreate | undefined
: V extends 'text2vec-ollama'
Expand Down
64 changes: 56 additions & 8 deletions src/collections/configure/unit.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,7 @@ describe('Unit testing of the vectorizer factory class', () => {
},
});
});

it('should create the correct Multi2VecJinaAIConfig type with defaults', () => {
const config = configure.vectorizer.multi2VecJinaAI();
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'multi2vec-jinaai'>>({
Expand All @@ -635,6 +636,7 @@ describe('Unit testing of the vectorizer factory class', () => {
},
});
});

it('should create the correct Multi2VecJinaAIConfig type with all values and weights', () => {
const config = configure.vectorizer.multi2VecJinaAI({
name: 'test',
Expand Down Expand Up @@ -668,6 +670,7 @@ describe('Unit testing of the vectorizer factory class', () => {
},
});
});

it('should create the correct Multi2VecPalmConfig type using deprecated method with defaults', () => {
const config = configure.vectorizer.multi2VecPalm({
projectId: 'project-id',
Expand Down Expand Up @@ -771,6 +774,51 @@ describe('Unit testing of the vectorizer factory class', () => {
});
});

it('should create the correct Multi2VecVoyageAIConfig type with defaults', () => {
const config = configure.vectorizer.multi2VecVoyageAI();
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'multi2vec-voyageai'>>({
name: undefined,
vectorIndex: {
name: 'hnsw',
config: undefined,
},
vectorizer: {
name: 'multi2vec-voyageai',
config: undefined,
},
});
});

it('should create the correct Multi2VecVoyageAIConfig type with all values', () => {
const config = configure.vectorizer.multi2VecVoyageAI({
baseURL: 'base-url',
model: 'model',
name: 'test',
truncate: true,
imageFields: ['field1', 'field2'],
textFields: ['field3', 'field4'],
vectorizeCollectionName: true,
});
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'multi2vec-voyageai'>>({
name: 'test',
vectorIndex: {
name: 'hnsw',
config: undefined,
},
vectorizer: {
name: 'multi2vec-voyageai',
config: {
baseURL: 'base-url',
model: 'model',
truncate: true,
imageFields: ['field1', 'field2'],
textFields: ['field3', 'field4'],
vectorizeCollectionName: true,
},
},
});
});

it('should create the correct Text2VecAWSConfig type with defaults', () => {
const config = configure.vectorizer.text2VecAWS({
region: 'region',
Expand Down Expand Up @@ -1071,35 +1119,35 @@ describe('Unit testing of the vectorizer factory class', () => {
});
});

it('should create the correct Text2VecJinaConfig type with defaults', () => {
const config = configure.vectorizer.text2VecJina();
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'text2vec-jina'>>({
it('should create the correct Text2VecJinaAIConfig type with defaults', () => {
const config = configure.vectorizer.text2VecJinaAI();
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'text2vec-jinaai'>>({
name: undefined,
vectorIndex: {
name: 'hnsw',
config: undefined,
},
vectorizer: {
name: 'text2vec-jina',
name: 'text2vec-jinaai',
config: undefined,
},
});
});

it('should create the correct Text2VecJinaConfig type with all values', () => {
const config = configure.vectorizer.text2VecJina({
it('should create the correct Text2VecJinaAIConfig type with all values', () => {
const config = configure.vectorizer.text2VecJinaAI({
name: 'test',
model: 'model',
vectorizeCollectionName: true,
});
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'text2vec-jina'>>({
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'text2vec-jinaai'>>({
name: 'test',
vectorIndex: {
name: 'hnsw',
config: undefined,
},
vectorizer: {
name: 'text2vec-jina',
name: 'text2vec-jinaai',
config: {
model: 'model',
vectorizeCollectionName: true,
Expand Down
29 changes: 16 additions & 13 deletions src/collections/configure/vectorizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -318,12 +318,15 @@ export const vectorizer = {
vectorIndexConfig,
vectorizerConfig: {
name: 'multi2vec-voyageai',
config: {
...config,
imageFields: imageFields?.map((f) => f.name),
textFields: textFields?.map((f) => f.name),
weights: Object.keys(weights).length === 0 ? undefined : weights,
},
config:
Object.keys(config).length === 0
? undefined
: {
...config,
imageFields: imageFields?.map((f) => f.name),
textFields: textFields?.map((f) => f.name),
weights: Object.keys(weights).length === 0 ? undefined : weights,
},
},
});
},
Expand Down Expand Up @@ -495,22 +498,22 @@ export const vectorizer = {
});
},
/**
* Create a `VectorConfigCreate` object with the vectorizer set to `'text2vec-jina'`.
* Create a `VectorConfigCreate` object with the vectorizer set to `'text2vec-jinaai'`.
*
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/jinaai/embeddings) for detailed usage.
*
* @param {ConfigureTextVectorizerOptions<T, N, I, 'text2vec-jina'>} [opts] The configuration for the `text2vec-jina` vectorizer.
* @returns {VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-jina'>} The configuration object.
* @param {ConfigureTextVectorizerOptions<T, N, I, 'text2vec-jinaai'>} [opts] The configuration for the `text2vec-jinaai` vectorizer.
* @returns {VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-jinaai'>} The configuration object.
*/
text2VecJina: <T, N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
opts?: ConfigureTextVectorizerOptions<T, N, I, 'text2vec-jina'>
): VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-jina'> => {
text2VecJinaAI: <T, N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
opts?: ConfigureTextVectorizerOptions<T, N, I, 'text2vec-jinaai'>
): VectorConfigCreate<PrimitiveKeys<T>, N, I, 'text2vec-jinaai'> => {
const { name, sourceProperties, vectorIndexConfig, ...config } = opts || {};
return makeVectorizer(name, {
sourceProperties,
vectorIndexConfig,
vectorizerConfig: {
name: 'text2vec-jina',
name: 'text2vec-jinaai',
config: Object.keys(config).length === 0 ? undefined : config,
},
});
Expand Down

0 comments on commit 95c1563

Please sign in to comment.