From e6153a970da264dea6ec06a2b33ae62293055dda Mon Sep 17 00:00:00 2001 From: Patti Shin Date: Wed, 13 Dec 2023 00:54:32 -0800 Subject: [PATCH] refactor: update genai samples (#3588) * refactor: filling out multimodal with image sample and clean up * refactor: filling out multimodal video sample * refactor: adding tests for multimodal samples * fix: resolving linting errors --- generative-ai/snippets/countTokens.js | 2 +- generative-ai/snippets/nonStreamingChat.js | 14 +--- generative-ai/snippets/nonStreamingContent.js | 5 +- .../snippets/nonStreamingMultipartContent.js | 8 +- generative-ai/snippets/package.json | 1 + generative-ai/snippets/safetySettings.js | 14 ++-- .../snippets/sendMultiModalPromptWithImage.js | 77 +++++++++++++++++++ .../snippets/sendMultiModalPromptWithVideo.js | 48 +++++++++++- generative-ai/snippets/streamChat.js | 5 +- generative-ai/snippets/streamContent.js | 5 +- .../snippets/streamMultipartContent.js | 7 +- .../sendMultiModalPromptWithImage.test.js | 35 +++++++++ .../sendMultiModalPromptWithVideo.test.js | 35 +++++++++ 13 files changed, 224 insertions(+), 32 deletions(-) create mode 100644 generative-ai/snippets/test/sendMultiModalPromptWithImage.test.js create mode 100644 generative-ai/snippets/test/sendMultiModalPromptWithVideo.test.js diff --git a/generative-ai/snippets/countTokens.js b/generative-ai/snippets/countTokens.js index 68847dc62d..83b0f4db57 100644 --- a/generative-ai/snippets/countTokens.js +++ b/generative-ai/snippets/countTokens.js @@ -26,7 +26,7 @@ async function countTokens( */ // const projectId = 'your-project-id'; // const location = 'us-central1'; - // const model = 'gemini-pro'; + // const model = 'chosen-genai-model'; // Initialize Vertex with your Cloud project and location const vertex_ai = new VertexAI({project: projectId, location: location}); diff --git a/generative-ai/snippets/nonStreamingChat.js b/generative-ai/snippets/nonStreamingChat.js index 199c387498..0a3ec0e629 100644 --- a/generative-ai/snippets/nonStreamingChat.js +++ b/generative-ai/snippets/nonStreamingChat.js @@ -14,26 +14,18 @@ const {VertexAI} = require('@google-cloud/vertexai'); -function wait(time) { - return new Promise(resolve => { - setTimeout(resolve, time); - }); -} - async function createNonStreamingChat( projectId = 'PROJECT_ID', location = 'LOCATION_ID', model = 'MODEL' ) { - // TODO: Find better method. Setting delay to give api time to respond, otherwise it will 404 - // await wait(10); - - // [START aiplatform_gemini_multiturn_chat] + // [START aiplatform_gemini_multiturn_chat_nonstreaming] /** * TODO(developer): Uncomment these variables before running the sample. */ // const projectId = 'your-project-id'; // const location = 'us-central1'; + // const model = 'chosen-genai-model'; // Initialize Vertex with your Cloud project and location const vertexAI = new VertexAI({project: projectId, location: location}); @@ -64,7 +56,7 @@ async function createNonStreamingChat( const response3 = result3.response.candidates[0].content.parts[0].text; console.log('Chat bot: ', response3); - // [END aiplatform_gemini_multiturn_chat] + // [END aiplatform_gemini_multiturn_chat_nonstreaming] } createNonStreamingChat(...process.argv.slice(2)).catch(err => { diff --git a/generative-ai/snippets/nonStreamingContent.js b/generative-ai/snippets/nonStreamingContent.js index 21936c9d01..1b72588ae4 100644 --- a/generative-ai/snippets/nonStreamingContent.js +++ b/generative-ai/snippets/nonStreamingContent.js @@ -19,13 +19,14 @@ async function createNonStreamingContent( location = 'LOCATION_ID', model = 'MODEL' ) { - // [START aiplatform_gemini_function_calling] + // [START aiplatform_gemini_content_nonstreaming] /** * TODO(developer): Uncomment these variables before running the sample. */ // const projectId = 'your-project-id'; // const location = 'us-central1'; + // const model = 'chosen-genai-model'; // Initialize Vertex with your Cloud project and location const vertexAI = new VertexAI({project: projectId, location: location}); @@ -55,7 +56,7 @@ async function createNonStreamingContent( console.log(fullTextResponse); - // [END aiplatform_gemini_function_calling] + // [END aiplatform_gemini_content_nonstreaming] } createNonStreamingContent(...process.argv.slice(2)).catch(err => { diff --git a/generative-ai/snippets/nonStreamingMultipartContent.js b/generative-ai/snippets/nonStreamingMultipartContent.js index e899886d3e..086c4586d1 100644 --- a/generative-ai/snippets/nonStreamingMultipartContent.js +++ b/generative-ai/snippets/nonStreamingMultipartContent.js @@ -41,9 +41,9 @@ async function createNonStreamingMultipartContent( // For images, the SDK supports both Google Cloud Storage URI and base64 strings const filePart = { - file_data: { - file_uri: image, - mime_type: mimeType, + fileSata: { + fileUri: image, + mimeType: mimeType, }, }; @@ -57,8 +57,8 @@ async function createNonStreamingMultipartContent( console.log('Prompt Text:'); console.log(request.contents[0].parts[0].text); - console.log('Non-Streaming Response Text:'); + console.log('Non-Streaming Response Text:'); // Create the response stream const responseStream = await generativeVisionModel.generateContentStream(request); diff --git a/generative-ai/snippets/package.json b/generative-ai/snippets/package.json index 765acfa217..29a62f5f37 100644 --- a/generative-ai/snippets/package.json +++ b/generative-ai/snippets/package.json @@ -15,6 +15,7 @@ "dependencies": { "@google-cloud/aiplatform": "^3.0.0", "@google-cloud/vertexai": "github:googleapis/nodejs-vertexai", + "axios": "^1.6.2", "supertest": "^6.3.3" }, "devDependencies": { diff --git a/generative-ai/snippets/safetySettings.js b/generative-ai/snippets/safetySettings.js index deb3c7f046..2ccbc108a4 100644 --- a/generative-ai/snippets/safetySettings.js +++ b/generative-ai/snippets/safetySettings.js @@ -18,14 +18,18 @@ const { HarmBlockThreshold, } = require('@google-cloud/vertexai'); -async function createStreamContent() { +async function setSafetySettings( + projectId = 'PROJECT_ID', + location = 'LOCATION_ID', + model = 'MODEL' +) { // [START aiplatform_gemini_safety_settings] /** * TODO(developer): Uncomment these variables before running the sample. */ - const projectId = 'cloud-llm-preview1'; - const location = 'us-central1'; - const model = 'gemini-pro'; + // const projectId = 'your-project-id'; + // const location = 'us-central1'; + // const model = 'chosen-genai-model'; // Initialize Vertex with your Cloud project and location const vertexAI = new VertexAI({project: projectId, location: location}); @@ -66,7 +70,7 @@ async function createStreamContent() { // [END aiplatform_gemini_safety_settings] } -createStreamContent(...process.argv.slice(3)).catch(err => { +setSafetySettings(...process.argv.slice(3)).catch(err => { console.error(err.message); process.exitCode = 1; }); diff --git a/generative-ai/snippets/sendMultiModalPromptWithImage.js b/generative-ai/snippets/sendMultiModalPromptWithImage.js index f9d1d486a4..8b2392e9ed 100644 --- a/generative-ai/snippets/sendMultiModalPromptWithImage.js +++ b/generative-ai/snippets/sendMultiModalPromptWithImage.js @@ -13,6 +13,12 @@ // limitations under the License. const {VertexAI} = require('@google-cloud/vertexai'); +const axios = require('axios'); + +async function getBase64(url) { + const image = await axios.get(url, {responseType: 'arraybuffer'}); + return Buffer.from(image.data).toString('base64'); +} async function sendMultiModalPromptWithImage( projectId = 'PROJECT_ID', @@ -20,6 +26,77 @@ async function sendMultiModalPromptWithImage( model = 'MODEL' ) { // [START aiplatform_gemini_single_turn_multi_image] + /** + * TODO(developer): Uncomment these variables before running the sample. + */ + // const projectId = 'your-project-id'; + // const location = 'us-central1'; + // const model = 'chosen-genai-model'; + + // For images, the SDK supports base64 strings + const landmarkImage1 = await getBase64( + 'https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark1.png' + ); + const landmarkImage2 = await getBase64( + 'https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark1.png' + ); + const landmarkImage3 = await getBase64( + 'https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark1.png' + ); + + // Initialize Vertex with your Cloud project and location + const vertexAI = new VertexAI({project: projectId, location: location}); + + const generativeVisionModel = vertexAI.preview.getGenerativeModel({ + model: model, + }); + + // Pass multimodal prompt + const request = { + contents: [ + { + role: 'user', + parts: [ + { + inlineData: { + data: landmarkImage1, + mimeType: 'image/png', + }, + }, + { + text: 'city: Rome, Landmark: the Colosseum', + }, + + { + inlineData: { + data: landmarkImage2, + mimeType: 'image/png', + }, + }, + { + text: 'city: Beijing, Landmark: Forbidden City', + }, + { + inlineData: { + data: landmarkImage3, + mimeType: 'image/png', + }, + }, + ], + }, + ], + }; + + // Create the response + const response = await generativeVisionModel.generateContent(request); + // Wait for the response to complete + const aggregatedResponse = await response.response; + // Select the text from the response + const fullTextResponse = + aggregatedResponse.candidates[0].content.parts[0].text; + + console.log(fullTextResponse); + // [END aiplatform_gemini_single_turn_multi_image] } diff --git a/generative-ai/snippets/sendMultiModalPromptWithVideo.js b/generative-ai/snippets/sendMultiModalPromptWithVideo.js index a7564c7855..cf723a7328 100644 --- a/generative-ai/snippets/sendMultiModalPromptWithVideo.js +++ b/generative-ai/snippets/sendMultiModalPromptWithVideo.js @@ -14,16 +14,60 @@ const {VertexAI} = require('@google-cloud/vertexai'); -async function sendMultiModalPromptWithImage( +async function sendMultiModalPromptWithVideo( projectId = 'PROJECT_ID', location = 'LOCATION_ID', model = 'MODEL' ) { // [START aiplatform_gemini_single_turn_video] + /** + * TODO(developer): Uncomment these variables before running the sample. + */ + // const projectId = 'your-project-id'; + // const location = 'us-central1'; + // const model = 'chosen-genai-model'; + + // Initialize Vertex with your Cloud project and location + const vertexAI = new VertexAI({project: projectId, location: location}); + + const generativeVisionModel = vertexAI.preview.getGenerativeModel({ + model: model, + }); + + // Pass multimodal prompt + const request = { + contents: [ + { + role: 'user', + parts: [ + { + text: 'What is in the video?', + }, + { + fileData: { + fileUri: 'gs://cloud-samples-data/video/animals.mp4', + mimeType: 'video/mp4', + }, + }, + ], + }, + ], + }; + + // Create the response + const response = await generativeVisionModel.generateContent(request); + // Wait for the response to complete + const aggregatedResponse = await response.response; + // Select the text from the response + const fullTextResponse = + aggregatedResponse.candidates[0].content.parts[0].text; + + console.log(fullTextResponse); + // [END aiplatform_gemini_single_turn_video] } -sendMultiModalPromptWithImage(...process.argv.slice(2)).catch(err => { +sendMultiModalPromptWithVideo(...process.argv.slice(2)).catch(err => { console.error(err.message); process.exitCode = 1; }); diff --git a/generative-ai/snippets/streamChat.js b/generative-ai/snippets/streamChat.js index b28536129c..c33f2be7a6 100644 --- a/generative-ai/snippets/streamChat.js +++ b/generative-ai/snippets/streamChat.js @@ -20,12 +20,12 @@ async function createStreamChat( model = 'MODEL' ) { // [START aiplatform_gemini_multiturn_chat] - /** * TODO(developer): Uncomment these variables before running the sample. */ // const projectId = 'your-project-id'; // const location = 'us-central1'; + // const model = 'chosen-genai-model'; // Initialize Vertex with your Cloud project and location const vertexAI = new VertexAI({project: projectId, location: location}); @@ -36,9 +36,10 @@ async function createStreamChat( }); const chat = generativeModel.startChat({}); - const chatInput1 = 'How can I learn more about that?'; + console.log(`User: ${chatInput1}`); + const result1 = await chat.sendMessageStream(chatInput1); for await (const item of result1.stream) { console.log(item.candidates[0].content.parts[0].text); diff --git a/generative-ai/snippets/streamContent.js b/generative-ai/snippets/streamContent.js index 6f2ed3e2bc..95cae269ba 100644 --- a/generative-ai/snippets/streamContent.js +++ b/generative-ai/snippets/streamContent.js @@ -19,13 +19,14 @@ async function createStreamContent( location = 'LOCATION_ID', model = 'MODEL' ) { - // [START aiplatform_gemini_function_calling] + // [START aiplatform_gemini_content] /** * TODO(developer): Uncomment these variables before running the sample. */ // const projectId = 'your-project-id'; // const location = 'us-central1'; + // const model = 'chosen-genai-model'; // Initialize Vertex with your Cloud project and location const vertexAI = new VertexAI({project: projectId, location: location}); @@ -51,7 +52,7 @@ async function createStreamContent( process.stdout.write(item.candidates[0].content.parts[0].text); } - // [END aiplatform_gemini_function_calling] + // [END aiplatform_gemini_content] } createStreamContent(...process.argv.slice(2)).catch(err => { diff --git a/generative-ai/snippets/streamMultipartContent.js b/generative-ai/snippets/streamMultipartContent.js index aa816baf92..186b5a24ac 100644 --- a/generative-ai/snippets/streamMultipartContent.js +++ b/generative-ai/snippets/streamMultipartContent.js @@ -28,6 +28,7 @@ async function createStreamMultipartContent( */ // const projectId = 'your-project-id'; // const location = 'us-central1'; + // const model = 'chosen-genai-model'; // const image = 'gs://generativeai-downloads/images/scones.jpg'; // Google Cloud Storage image // const mimeType = 'image/jpeg'; @@ -41,9 +42,9 @@ async function createStreamMultipartContent( // For images, the SDK supports both Google Cloud Storage URI and base64 strings const filePart = { - file_data: { - file_uri: image, - mime_type: mimeType, + fileData: { + fileUri: image, + mimeType: mimeType, }, }; diff --git a/generative-ai/snippets/test/sendMultiModalPromptWithImage.test.js b/generative-ai/snippets/test/sendMultiModalPromptWithImage.test.js new file mode 100644 index 0000000000..a421893228 --- /dev/null +++ b/generative-ai/snippets/test/sendMultiModalPromptWithImage.test.js @@ -0,0 +1,35 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +const {assert} = require('chai'); +const {describe, it} = require('mocha'); +const cp = require('child_process'); + +const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); + +describe('Generative AI Stream MultiModal with Image', () => { + const project = 'cloud-llm-preview1'; + const location = 'us-central1'; + const model = 'gemini-pro-vision'; + + it('should create stream multimodal content', async () => { + const output = execSync( + `node ./sendMultiModalPromptWithImage.js ${project} ${location} ${model}` + ); + // Ensure that the conversation is what we expect for these images + assert(output.match(/Paris/)); + }); +}); diff --git a/generative-ai/snippets/test/sendMultiModalPromptWithVideo.test.js b/generative-ai/snippets/test/sendMultiModalPromptWithVideo.test.js new file mode 100644 index 0000000000..91d6fedb56 --- /dev/null +++ b/generative-ai/snippets/test/sendMultiModalPromptWithVideo.test.js @@ -0,0 +1,35 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +'use strict'; + +const {assert} = require('chai'); +const {describe, it} = require('mocha'); +const cp = require('child_process'); + +const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); + +describe('Generative AI Stream MultiModal with Video', () => { + const project = 'cloud-llm-preview1'; + const location = 'us-central1'; + const model = 'gemini-pro-vision'; + + it('should create stream multimodal content', async () => { + const output = execSync( + `node ./sendMultiModalPromptWithVideo.js ${project} ${location} ${model}` + ); + // Ensure that the conversation is what we expect for these images + assert(output.match(/advertisement/)); + }); +});