Skip to content

Commit fc64882

Browse files
author
Joanna Grycz
committed
feat: generativeaionvertexai_embedding_batch
1 parent 5bd0c38 commit fc64882

File tree

2 files changed

+189
-0
lines changed

2 files changed

+189
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/*
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
'use strict';
18+
19+
async function main(projectId, inputUri, outputUri, jobName) {
20+
// [START generativeaionvertexai_embedding_batch]
21+
// Imports the aiplatform library
22+
const aiplatformLib = require('@google-cloud/aiplatform');
23+
const aiplatform = aiplatformLib.protos.google.cloud.aiplatform.v1;
24+
25+
/**
26+
* TODO(developer): Uncomment/update these variables before running the sample.
27+
*/
28+
// projectId = 'YOUR_PROJECT_ID';
29+
30+
// Optional: URI of the input dataset.
31+
// Could be a BigQuery table or a Google Cloud Storage file.
32+
// E.g. "gs://[BUCKET]/[DATASET].jsonl" OR "bq://[PROJECT].[DATASET].[TABLE]"
33+
// inputUri =
34+
// 'gs://cloud-samples-data/generative-ai/embeddings/embeddings_input.jsonl';
35+
36+
// Optional: URI where the output will be stored.
37+
// Could be a BigQuery table or a Google Cloud Storage file.
38+
// E.g. "gs://[BUCKET]/[OUTPUT].jsonl" OR "bq://[PROJECT].[DATASET].[TABLE]"
39+
// outputUri = 'gs://your_backet/embedding_batch_output';
40+
41+
// The name of the job
42+
// jobName = `Batch embedding job: ${new Date().getMilliseconds()}`;
43+
44+
const textEmbeddingModel = 'text-embedding-005';
45+
const location = 'us-central1';
46+
47+
// Configure the parent resource
48+
const parent = `projects/${projectId}/locations/${location}`;
49+
const modelName = `projects/${projectId}/locations/${location}/publishers/google/models/${textEmbeddingModel}`;
50+
51+
// Specifies the location of the api endpoint
52+
const clientOptions = {
53+
apiEndpoint: `${location}-aiplatform.googleapis.com`,
54+
};
55+
56+
// Instantiates a client
57+
const jobServiceClient = new aiplatformLib.JobServiceClient(clientOptions);
58+
59+
// Generates embeddings from text using batch processing.
60+
// Read more: https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/batch-prediction-genai-embeddings
61+
async function callBatchEmbedding() {
62+
const gcsSource = new aiplatform.GcsSource({
63+
uris: [inputUri],
64+
});
65+
66+
const inputConfig = new aiplatform.BatchPredictionJob.InputConfig({
67+
gcsSource,
68+
instancesFormat: 'jsonl',
69+
});
70+
71+
const gcsDestination = new aiplatform.GcsDestination({
72+
outputUriPrefix: outputUri,
73+
});
74+
75+
const outputConfig = new aiplatform.BatchPredictionJob.OutputConfig({
76+
gcsDestination,
77+
predictionsFormat: 'jsonl',
78+
});
79+
80+
const batchPredictionJob = new aiplatform.BatchPredictionJob({
81+
displayName: jobName,
82+
model: modelName,
83+
inputConfig,
84+
outputConfig,
85+
});
86+
87+
const request = {
88+
parent,
89+
batchPredictionJob,
90+
};
91+
92+
// Create batch prediction job request
93+
const [response] = await jobServiceClient.createBatchPredictionJob(request);
94+
95+
console.log('Raw response: ', JSON.stringify(response, null, 2));
96+
}
97+
98+
await callBatchEmbedding();
99+
// [END generativeaionvertexai_embedding_batch]
100+
}
101+
102+
main(...process.argv.slice(2)).catch(err => {
103+
console.error(err.message);
104+
process.exitCode = 1;
105+
});
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* Copyright 2024 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
'use strict';
18+
19+
const {assert} = require('chai');
20+
const {after, before, describe, it} = require('mocha');
21+
const uuid = require('uuid').v4;
22+
const cp = require('child_process');
23+
const {JobServiceClient} = require('@google-cloud/aiplatform');
24+
const {Storage} = require('@google-cloud/storage');
25+
26+
const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'});
27+
28+
describe('Batch embedding', async () => {
29+
const displayName = `batch-embedding-job-${uuid()}`;
30+
const location = 'us-central1';
31+
const inputUri =
32+
'gs://cloud-samples-data/generative-ai/embeddings/embeddings_input.jsonl';
33+
let outputUri = 'gs://ucaip-samples-test-output/';
34+
const jobServiceClient = new JobServiceClient({
35+
apiEndpoint: `${location}-aiplatform.googleapis.com`,
36+
});
37+
const projectId = process.env.CAIP_PROJECT_ID;
38+
const storage = new Storage({
39+
projectId,
40+
});
41+
let batchPredictionJobId;
42+
let bucket;
43+
44+
before(async () => {
45+
const bucketName = `test-bucket-${uuid()}`;
46+
// Create a Google Cloud Storage bucket for UsageReports
47+
[bucket] = await storage.createBucket(bucketName);
48+
outputUri = `gs://${bucketName}/embedding_batch_output`;
49+
});
50+
51+
after(async () => {
52+
// Delete job
53+
const name = jobServiceClient.batchPredictionJobPath(
54+
projectId,
55+
location,
56+
batchPredictionJobId
57+
);
58+
59+
const cancelRequest = {
60+
name,
61+
};
62+
63+
jobServiceClient.cancelBatchPredictionJob(cancelRequest).then(() => {
64+
const deleteRequest = {
65+
name,
66+
};
67+
68+
return jobServiceClient.deleteBatchPredictionJob(deleteRequest);
69+
});
70+
// Delete the Google Cloud Storage bucket created for usage reports.
71+
await bucket.delete();
72+
});
73+
74+
it('should create batch prediction job', async () => {
75+
const response = execSync(
76+
`node ./create-batch-embedding.js ${projectId} ${inputUri} ${outputUri} ${displayName}`
77+
);
78+
79+
assert.match(response, new RegExp(displayName));
80+
batchPredictionJobId = response
81+
.split(`/locations/${location}/batchPredictionJobs/`)[1]
82+
.split('\n')[0];
83+
});
84+
});

0 commit comments

Comments
 (0)