Skip to content

Commit e6153a9

Browse files
authored
refactor: update genai samples (#3588)
* refactor: filling out multimodal with image sample and clean up * refactor: filling out multimodal video sample * refactor: adding tests for multimodal samples * fix: resolving linting errors
1 parent c89fde7 commit e6153a9

13 files changed

+224
-32
lines changed

generative-ai/snippets/countTokens.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ async function countTokens(
2626
*/
2727
// const projectId = 'your-project-id';
2828
// const location = 'us-central1';
29-
// const model = 'gemini-pro';
29+
// const model = 'chosen-genai-model';
3030

3131
// Initialize Vertex with your Cloud project and location
3232
const vertex_ai = new VertexAI({project: projectId, location: location});

generative-ai/snippets/nonStreamingChat.js

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,26 +14,18 @@
1414

1515
const {VertexAI} = require('@google-cloud/vertexai');
1616

17-
function wait(time) {
18-
return new Promise(resolve => {
19-
setTimeout(resolve, time);
20-
});
21-
}
22-
2317
async function createNonStreamingChat(
2418
projectId = 'PROJECT_ID',
2519
location = 'LOCATION_ID',
2620
model = 'MODEL'
2721
) {
28-
// TODO: Find better method. Setting delay to give api time to respond, otherwise it will 404
29-
// await wait(10);
30-
31-
// [START aiplatform_gemini_multiturn_chat]
22+
// [START aiplatform_gemini_multiturn_chat_nonstreaming]
3223
/**
3324
* TODO(developer): Uncomment these variables before running the sample.
3425
*/
3526
// const projectId = 'your-project-id';
3627
// const location = 'us-central1';
28+
// const model = 'chosen-genai-model';
3729

3830
// Initialize Vertex with your Cloud project and location
3931
const vertexAI = new VertexAI({project: projectId, location: location});
@@ -64,7 +56,7 @@ async function createNonStreamingChat(
6456
const response3 = result3.response.candidates[0].content.parts[0].text;
6557
console.log('Chat bot: ', response3);
6658

67-
// [END aiplatform_gemini_multiturn_chat]
59+
// [END aiplatform_gemini_multiturn_chat_nonstreaming]
6860
}
6961

7062
createNonStreamingChat(...process.argv.slice(2)).catch(err => {

generative-ai/snippets/nonStreamingContent.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,14 @@ async function createNonStreamingContent(
1919
location = 'LOCATION_ID',
2020
model = 'MODEL'
2121
) {
22-
// [START aiplatform_gemini_function_calling]
22+
// [START aiplatform_gemini_content_nonstreaming]
2323

2424
/**
2525
* TODO(developer): Uncomment these variables before running the sample.
2626
*/
2727
// const projectId = 'your-project-id';
2828
// const location = 'us-central1';
29+
// const model = 'chosen-genai-model';
2930

3031
// Initialize Vertex with your Cloud project and location
3132
const vertexAI = new VertexAI({project: projectId, location: location});
@@ -55,7 +56,7 @@ async function createNonStreamingContent(
5556

5657
console.log(fullTextResponse);
5758

58-
// [END aiplatform_gemini_function_calling]
59+
// [END aiplatform_gemini_content_nonstreaming]
5960
}
6061

6162
createNonStreamingContent(...process.argv.slice(2)).catch(err => {

generative-ai/snippets/nonStreamingMultipartContent.js

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@ async function createNonStreamingMultipartContent(
4141

4242
// For images, the SDK supports both Google Cloud Storage URI and base64 strings
4343
const filePart = {
44-
file_data: {
45-
file_uri: image,
46-
mime_type: mimeType,
44+
fileSata: {
45+
fileUri: image,
46+
mimeType: mimeType,
4747
},
4848
};
4949

@@ -57,8 +57,8 @@ async function createNonStreamingMultipartContent(
5757

5858
console.log('Prompt Text:');
5959
console.log(request.contents[0].parts[0].text);
60-
console.log('Non-Streaming Response Text:');
6160

61+
console.log('Non-Streaming Response Text:');
6262
// Create the response stream
6363
const responseStream =
6464
await generativeVisionModel.generateContentStream(request);

generative-ai/snippets/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"dependencies": {
1616
"@google-cloud/aiplatform": "^3.0.0",
1717
"@google-cloud/vertexai": "github:googleapis/nodejs-vertexai",
18+
"axios": "^1.6.2",
1819
"supertest": "^6.3.3"
1920
},
2021
"devDependencies": {

generative-ai/snippets/safetySettings.js

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,18 @@ const {
1818
HarmBlockThreshold,
1919
} = require('@google-cloud/vertexai');
2020

21-
async function createStreamContent() {
21+
async function setSafetySettings(
22+
projectId = 'PROJECT_ID',
23+
location = 'LOCATION_ID',
24+
model = 'MODEL'
25+
) {
2226
// [START aiplatform_gemini_safety_settings]
2327
/**
2428
* TODO(developer): Uncomment these variables before running the sample.
2529
*/
26-
const projectId = 'cloud-llm-preview1';
27-
const location = 'us-central1';
28-
const model = 'gemini-pro';
30+
// const projectId = 'your-project-id';
31+
// const location = 'us-central1';
32+
// const model = 'chosen-genai-model';
2933

3034
// Initialize Vertex with your Cloud project and location
3135
const vertexAI = new VertexAI({project: projectId, location: location});
@@ -66,7 +70,7 @@ async function createStreamContent() {
6670
// [END aiplatform_gemini_safety_settings]
6771
}
6872

69-
createStreamContent(...process.argv.slice(3)).catch(err => {
73+
setSafetySettings(...process.argv.slice(3)).catch(err => {
7074
console.error(err.message);
7175
process.exitCode = 1;
7276
});

generative-ai/snippets/sendMultiModalPromptWithImage.js

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,90 @@
1313
// limitations under the License.
1414

1515
const {VertexAI} = require('@google-cloud/vertexai');
16+
const axios = require('axios');
17+
18+
async function getBase64(url) {
19+
const image = await axios.get(url, {responseType: 'arraybuffer'});
20+
return Buffer.from(image.data).toString('base64');
21+
}
1622

1723
async function sendMultiModalPromptWithImage(
1824
projectId = 'PROJECT_ID',
1925
location = 'LOCATION_ID',
2026
model = 'MODEL'
2127
) {
2228
// [START aiplatform_gemini_single_turn_multi_image]
29+
/**
30+
* TODO(developer): Uncomment these variables before running the sample.
31+
*/
32+
// const projectId = 'your-project-id';
33+
// const location = 'us-central1';
34+
// const model = 'chosen-genai-model';
35+
36+
// For images, the SDK supports base64 strings
37+
const landmarkImage1 = await getBase64(
38+
'https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark1.png'
39+
);
40+
const landmarkImage2 = await getBase64(
41+
'https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark1.png'
42+
);
43+
const landmarkImage3 = await getBase64(
44+
'https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark1.png'
45+
);
46+
47+
// Initialize Vertex with your Cloud project and location
48+
const vertexAI = new VertexAI({project: projectId, location: location});
49+
50+
const generativeVisionModel = vertexAI.preview.getGenerativeModel({
51+
model: model,
52+
});
53+
54+
// Pass multimodal prompt
55+
const request = {
56+
contents: [
57+
{
58+
role: 'user',
59+
parts: [
60+
{
61+
inlineData: {
62+
data: landmarkImage1,
63+
mimeType: 'image/png',
64+
},
65+
},
66+
{
67+
text: 'city: Rome, Landmark: the Colosseum',
68+
},
69+
70+
{
71+
inlineData: {
72+
data: landmarkImage2,
73+
mimeType: 'image/png',
74+
},
75+
},
76+
{
77+
text: 'city: Beijing, Landmark: Forbidden City',
78+
},
79+
{
80+
inlineData: {
81+
data: landmarkImage3,
82+
mimeType: 'image/png',
83+
},
84+
},
85+
],
86+
},
87+
],
88+
};
89+
90+
// Create the response
91+
const response = await generativeVisionModel.generateContent(request);
92+
// Wait for the response to complete
93+
const aggregatedResponse = await response.response;
94+
// Select the text from the response
95+
const fullTextResponse =
96+
aggregatedResponse.candidates[0].content.parts[0].text;
97+
98+
console.log(fullTextResponse);
99+
23100
// [END aiplatform_gemini_single_turn_multi_image]
24101
}
25102

generative-ai/snippets/sendMultiModalPromptWithVideo.js

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,60 @@
1414

1515
const {VertexAI} = require('@google-cloud/vertexai');
1616

17-
async function sendMultiModalPromptWithImage(
17+
async function sendMultiModalPromptWithVideo(
1818
projectId = 'PROJECT_ID',
1919
location = 'LOCATION_ID',
2020
model = 'MODEL'
2121
) {
2222
// [START aiplatform_gemini_single_turn_video]
23+
/**
24+
* TODO(developer): Uncomment these variables before running the sample.
25+
*/
26+
// const projectId = 'your-project-id';
27+
// const location = 'us-central1';
28+
// const model = 'chosen-genai-model';
29+
30+
// Initialize Vertex with your Cloud project and location
31+
const vertexAI = new VertexAI({project: projectId, location: location});
32+
33+
const generativeVisionModel = vertexAI.preview.getGenerativeModel({
34+
model: model,
35+
});
36+
37+
// Pass multimodal prompt
38+
const request = {
39+
contents: [
40+
{
41+
role: 'user',
42+
parts: [
43+
{
44+
text: 'What is in the video?',
45+
},
46+
{
47+
fileData: {
48+
fileUri: 'gs://cloud-samples-data/video/animals.mp4',
49+
mimeType: 'video/mp4',
50+
},
51+
},
52+
],
53+
},
54+
],
55+
};
56+
57+
// Create the response
58+
const response = await generativeVisionModel.generateContent(request);
59+
// Wait for the response to complete
60+
const aggregatedResponse = await response.response;
61+
// Select the text from the response
62+
const fullTextResponse =
63+
aggregatedResponse.candidates[0].content.parts[0].text;
64+
65+
console.log(fullTextResponse);
66+
2367
// [END aiplatform_gemini_single_turn_video]
2468
}
2569

26-
sendMultiModalPromptWithImage(...process.argv.slice(2)).catch(err => {
70+
sendMultiModalPromptWithVideo(...process.argv.slice(2)).catch(err => {
2771
console.error(err.message);
2872
process.exitCode = 1;
2973
});

generative-ai/snippets/streamChat.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ async function createStreamChat(
2020
model = 'MODEL'
2121
) {
2222
// [START aiplatform_gemini_multiturn_chat]
23-
2423
/**
2524
* TODO(developer): Uncomment these variables before running the sample.
2625
*/
2726
// const projectId = 'your-project-id';
2827
// const location = 'us-central1';
28+
// const model = 'chosen-genai-model';
2929

3030
// Initialize Vertex with your Cloud project and location
3131
const vertexAI = new VertexAI({project: projectId, location: location});
@@ -36,9 +36,10 @@ async function createStreamChat(
3636
});
3737

3838
const chat = generativeModel.startChat({});
39-
4039
const chatInput1 = 'How can I learn more about that?';
40+
4141
console.log(`User: ${chatInput1}`);
42+
4243
const result1 = await chat.sendMessageStream(chatInput1);
4344
for await (const item of result1.stream) {
4445
console.log(item.candidates[0].content.parts[0].text);

generative-ai/snippets/streamContent.js

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,14 @@ async function createStreamContent(
1919
location = 'LOCATION_ID',
2020
model = 'MODEL'
2121
) {
22-
// [START aiplatform_gemini_function_calling]
22+
// [START aiplatform_gemini_content]
2323

2424
/**
2525
* TODO(developer): Uncomment these variables before running the sample.
2626
*/
2727
// const projectId = 'your-project-id';
2828
// const location = 'us-central1';
29+
// const model = 'chosen-genai-model';
2930

3031
// Initialize Vertex with your Cloud project and location
3132
const vertexAI = new VertexAI({project: projectId, location: location});
@@ -51,7 +52,7 @@ async function createStreamContent(
5152
process.stdout.write(item.candidates[0].content.parts[0].text);
5253
}
5354

54-
// [END aiplatform_gemini_function_calling]
55+
// [END aiplatform_gemini_content]
5556
}
5657

5758
createStreamContent(...process.argv.slice(2)).catch(err => {

generative-ai/snippets/streamMultipartContent.js

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ async function createStreamMultipartContent(
2828
*/
2929
// const projectId = 'your-project-id';
3030
// const location = 'us-central1';
31+
// const model = 'chosen-genai-model';
3132
// const image = 'gs://generativeai-downloads/images/scones.jpg'; // Google Cloud Storage image
3233
// const mimeType = 'image/jpeg';
3334

@@ -41,9 +42,9 @@ async function createStreamMultipartContent(
4142

4243
// For images, the SDK supports both Google Cloud Storage URI and base64 strings
4344
const filePart = {
44-
file_data: {
45-
file_uri: image,
46-
mime_type: mimeType,
45+
fileData: {
46+
fileUri: image,
47+
mimeType: mimeType,
4748
},
4849
};
4950

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// Copyright 2023 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// https://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
'use strict';
16+
17+
const {assert} = require('chai');
18+
const {describe, it} = require('mocha');
19+
const cp = require('child_process');
20+
21+
const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'});
22+
23+
describe('Generative AI Stream MultiModal with Image', () => {
24+
const project = 'cloud-llm-preview1';
25+
const location = 'us-central1';
26+
const model = 'gemini-pro-vision';
27+
28+
it('should create stream multimodal content', async () => {
29+
const output = execSync(
30+
`node ./sendMultiModalPromptWithImage.js ${project} ${location} ${model}`
31+
);
32+
// Ensure that the conversation is what we expect for these images
33+
assert(output.match(/Paris/));
34+
});
35+
});

0 commit comments

Comments
 (0)