Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add configure.multi2VecCohere factory method and types for the module #227

Merged
merged 1 commit into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/collections/config/types/vectorizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ type Text2VecPalmVectorizer = 'text2vec-palm';
export type Vectorizer =
| 'img2vec-neural'
| 'multi2vec-clip'
| 'multi2vec-cohere'
| 'multi2vec-bind'
| Multi2VecPalmVectorizer
| 'multi2vec-google'
Expand Down Expand Up @@ -81,6 +82,33 @@ export type Multi2VecClipConfig = {
};
};

/**
* The configuration for multi-media vectorization using the Cohere module.
*
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings-multimodal) for detailed usage.
*/
export type Multi2VecCohereConfig = {
/** The base URL to use where API requests should go. */
baseURL?: string;
/** The image fields used when vectorizing. */
imageFields?: string[];
/** The specific model to use. */
model?: string;
/** The text fields used when vectorizing. */
textFields?: string[];
/** The truncation strategy to use. */
truncate?: string;
/** Whether the collection name is vectorized. */
vectorizeCollectionName?: boolean;
/** The weights of the fields used for vectorization. */
weights?: {
/** The weights of the image fields. */
imageFields?: number[];
/** The weights of the text fields. */
textFields?: number[];
};
};

/** The configuration for multi-media vectorization using the Bind module.
*
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/imagebind/embeddings-multimodal) for detailed usage.
Expand Down Expand Up @@ -421,6 +449,8 @@ export type VectorizerConfigType<V> = V extends 'img2vec-neural'
? Img2VecNeuralConfig | undefined
: V extends 'multi2vec-clip'
? Multi2VecClipConfig | undefined
: V extends 'multi2vec-cohere'
? Multi2VecCohereConfig | undefined
: V extends 'multi2vec-bind'
? Multi2VecBindConfig | undefined
: V extends 'multi2vec-google'
Expand Down
18 changes: 18 additions & 0 deletions src/collections/configure/types/vectorizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,22 @@ export type Multi2VecBindConfigCreate = {
vectorizeCollectionName?: boolean;
};

/** The configuration for the `multi2vec-cohere` vectorizer. */
export type Multi2VecCohereConfigCreate = {
/** The base URL to use where API requests should go. */
baseURL?: string;
/** The image fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
imageFields?: string[] | Multi2VecField[];
/** The specific model to use. */
model?: string;
/** The text fields to use in vectorization. Can be string of `Multi2VecField` type. If string, weight 0 will be assumed. */
textFields?: string[] | Multi2VecField[];
/** The truncation strategy to use. */
truncate?: string;
/** Whether to vectorize the collection name. */
vectorizeCollectionName?: boolean;
};

/** @deprecated Use `Multi2VecGoogleConfigCreate` instead.*/
export type Multi2VecPalmConfigCreate = Multi2VecGoogleConfigCreate;

Expand Down Expand Up @@ -173,6 +189,8 @@ export type VectorizerConfigCreateType<V> = V extends 'img2vec-neural'
? Img2VecNeuralConfigCreate | undefined
: V extends 'multi2vec-clip'
? Multi2VecClipConfigCreate | undefined
: V extends 'multi2vec-cohere'
? Multi2VecCohereConfigCreate | undefined
: V extends 'multi2vec-bind'
? Multi2VecBindConfigCreate | undefined
: V extends 'multi2vec-palm'
Expand Down
73 changes: 73 additions & 0 deletions src/collections/configure/unit.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,79 @@ describe('Unit testing of the vectorizer factory class', () => {
});
});

it('should create the correct Multi2VecCohereConfig type with defaults', () => {
const config = configure.vectorizer.multi2VecCohere();
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'multi2vec-cohere'>>({
name: undefined,
vectorIndex: {
name: 'hnsw',
config: undefined,
},
vectorizer: {
name: 'multi2vec-cohere',
config: undefined,
},
});
});

it('should create the correct Multi2VecCohereConfig type with all values', () => {
const config = configure.vectorizer.multi2VecCohere({
name: 'test',
model: 'model',
vectorizeCollectionName: true,
});
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'multi2vec-cohere'>>({
name: 'test',
vectorIndex: {
name: 'hnsw',
config: undefined,
},
vectorizer: {
name: 'multi2vec-cohere',
config: {
model: 'model',
vectorizeCollectionName: true,
},
},
});
});

it('should create the correct Multi2VecCohereConfig type with all values and weights', () => {
const config = configure.vectorizer.multi2VecCohere({
name: 'test',
model: 'model',
imageFields: [
{ name: 'field1', weight: 0.1 },
{ name: 'field2', weight: 0.2 },
],
textFields: [
{ name: 'field3', weight: 0.3 },
{ name: 'field4', weight: 0.4 },
],
vectorizeCollectionName: true,
});
expect(config).toEqual<VectorConfigCreate<never, 'test', 'hnsw', 'multi2vec-cohere'>>({
name: 'test',
vectorIndex: {
name: 'hnsw',
config: undefined,
},
vectorizer: {
name: 'multi2vec-cohere',
config: {
model: 'model',
imageFields: ['field1', 'field2'],
textFields: ['field3', 'field4'],
vectorizeCollectionName: true,
weights: {
imageFields: [0.1, 0.2],
textFields: [0.3, 0.4],
},
},
},
});
});

it('should create the correct Multi2VecClipConfig type with defaults', () => {
const config = configure.vectorizer.multi2VecClip();
expect(config).toEqual<VectorConfigCreate<never, undefined, 'hnsw', 'multi2vec-clip'>>({
Expand Down
33 changes: 33 additions & 0 deletions src/collections/configure/vectorizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,39 @@ export const vectorizer = {
},
});
},
/**
* Create a `VectorConfigCreate` object with the vectorizer set to `'multi2vec-cohere'`.
*
* See the [documentation](https://weaviate.io/developers/weaviate/model-providers/cohere/embeddings) for detailed usage.
*
* @param {ConfigureNonTextVectorizerOptions<N, I, 'multi2vec-cohere'>} [opts] The configuration options for the `multi2vec-cohere` vectorizer.
* @returns {VectorConfigCreate<PrimitiveKeys<T>[], N, I, 'multi2vec-cohere'>} The configuration object.
*/
multi2VecCohere: <N extends string | undefined = undefined, I extends VectorIndexType = 'hnsw'>(
opts?: ConfigureNonTextVectorizerOptions<N, I, 'multi2vec-cohere'>
): VectorConfigCreate<never, N, I, 'multi2vec-cohere'> => {
const { name, vectorIndexConfig, ...config } = opts || {};
const imageFields = config.imageFields?.map(mapMulti2VecField);
const textFields = config.textFields?.map(mapMulti2VecField);
let weights: Multi2VecBindConfig['weights'] = {};
weights = formatMulti2VecFields(weights, 'imageFields', imageFields);
weights = formatMulti2VecFields(weights, 'textFields', textFields);
return makeVectorizer(name, {
vectorIndexConfig,
vectorizerConfig: {
name: 'multi2vec-cohere',
config:
Object.keys(config).length === 0
? undefined
: {
...config,
imageFields: imageFields?.map((f) => f.name),
textFields: textFields?.map((f) => f.name),
weights: Object.keys(weights).length === 0 ? undefined : weights,
},
},
});
},
/**
* Create a `VectorConfigCreate` object with the vectorizer set to `'multi2vec-clip'`.
*
Expand Down
Loading