1
+ package com.google.firebase.example.ai.send_requests
2
+
3
+ import android.content.Context
4
+ import android.content.res.Resources
5
+ import android.graphics.Bitmap
6
+ import android.graphics.BitmapFactory
7
+ import android.net.Uri
8
+ import android.util.Log
9
+ import com.google.firebase.ai.GenerativeModel
10
+ import com.google.firebase.ai.type.content
11
+ import com.google.firebase.example.ai.R
12
+
13
+ class GenerateMultimodal (
14
+ private val applicationContext : Context ,
15
+ private val resources : Resources ,
16
+ private val model : GenerativeModel ,
17
+ ) {
18
+
19
+ private val TAG = " GenerateMultimodal"
20
+
21
+ private suspend fun audioNonStreaming (audioUri : Uri ) {
22
+ // [START multimodal_audio_non_streaming]
23
+ val contentResolver = applicationContext.contentResolver
24
+
25
+ val inputStream = contentResolver.openInputStream(audioUri)
26
+
27
+ if (inputStream != null ) { // Check if the audio loaded successfully
28
+ inputStream.use { stream ->
29
+ val bytes = stream.readBytes()
30
+
31
+ // Provide a prompt that includes the audio specified above and text
32
+ val prompt = content {
33
+ inlineData(bytes, " audio/mpeg" ) // Specify the appropriate audio MIME type
34
+ text(" Transcribe what's said in this audio recording." )
35
+ }
36
+
37
+ // To generate text output, call `generateContent` with the prompt
38
+ val response = model.generateContent(prompt)
39
+
40
+ // Log the generated text, handling the case where it might be null
41
+ Log .d(TAG , response.text ? : " " )
42
+ }
43
+ } else {
44
+ Log .e(TAG , " Error getting input stream for audio." )
45
+ // Handle the error appropriately
46
+ }
47
+ // [END multimodal_audio_non_streaming]
48
+ }
49
+
50
+ private suspend fun audioStreaming (audioUri : Uri ) {
51
+ // [START multimodal_audio_streaming]
52
+ val contentResolver = applicationContext.contentResolver
53
+
54
+ val inputStream = contentResolver.openInputStream(audioUri)
55
+
56
+ if (inputStream != null ) { // Check if the audio loaded successfully
57
+ inputStream.use { stream ->
58
+ val bytes = stream.readBytes()
59
+
60
+ // Provide a prompt that includes the audio specified above and text
61
+ val prompt = content {
62
+ inlineData(bytes, " audio/mpeg" ) // Specify the appropriate audio MIME type
63
+ text(" Transcribe what's said in this audio recording." )
64
+ }
65
+
66
+ // To stream generated text output, call `generateContentStream` with the prompt
67
+ var fullResponse = " "
68
+ model.generateContentStream(prompt).collect { chunk ->
69
+ // Log the generated text, handling the case where it might be null
70
+ Log .d(TAG , chunk.text ? : " " )
71
+ fullResponse + = chunk.text ? : " "
72
+ }
73
+ }
74
+ } else {
75
+ Log .e(TAG , " Error getting input stream for audio." )
76
+ // Handle the error appropriately
77
+ }
78
+ // [END multimodal_audio_streaming]
79
+ }
80
+
81
+ private suspend fun multiImagesNonStreaming () {
82
+ // [START multimodal_images_non_streaming]
83
+ // Loads an image from the app/res/drawable/ directory
84
+ val bitmap1: Bitmap = BitmapFactory .decodeResource(resources, R .drawable.sparky)
85
+ val bitmap2: Bitmap = BitmapFactory .decodeResource(resources, R .drawable.sparky_eats_pizza)
86
+
87
+ // Provide a prompt that includes the images specified above and text
88
+ val prompt = content {
89
+ image(bitmap1)
90
+ image(bitmap2)
91
+ text(" What is different between these pictures?" )
92
+ }
93
+
94
+ // To generate text output, call generateContent with the prompt
95
+ val response = model.generateContent(prompt)
96
+ print (response.text)
97
+ // [END multimodal_images_non_streaming]
98
+ }
99
+
100
+ private suspend fun multiImagesStreaming () {
101
+ // [START multimodal_images_streaming]
102
+ // Loads an image from the app/res/drawable/ directory
103
+ val bitmap1: Bitmap = BitmapFactory .decodeResource(resources, R .drawable.sparky)
104
+ val bitmap2: Bitmap = BitmapFactory .decodeResource(resources, R .drawable.sparky_eats_pizza)
105
+
106
+ // Provide a prompt that includes the images specified above and text
107
+ val prompt = content {
108
+ image(bitmap1)
109
+ image(bitmap2)
110
+ text(" What's different between these pictures?" )
111
+ }
112
+
113
+ // To stream generated text output, call generateContentStream with the prompt
114
+ var fullResponse = " "
115
+ model.generateContentStream(prompt).collect { chunk ->
116
+ print (chunk.text)
117
+ fullResponse + = chunk.text
118
+ }
119
+ // [END multimodal_images_streaming]
120
+ }
121
+
122
+ private suspend fun oneImageNonStreaming () {
123
+ // [START multimodal_one_image_non_streaming]
124
+ // Loads an image from the app/res/drawable/ directory
125
+ val bitmap: Bitmap = BitmapFactory .decodeResource(resources, R .drawable.sparky)
126
+
127
+ // Provide a prompt that includes the image specified above and text
128
+ val prompt = content {
129
+ image(bitmap)
130
+ text(" What developer tool is this mascot from?" )
131
+ }
132
+
133
+ // To generate text output, call generateContent with the prompt
134
+ val response = model.generateContent(prompt)
135
+ print (response.text)
136
+ // [END multimodal_one_image_non_streaming]
137
+ }
138
+
139
+ private suspend fun oneImageStreaming () {
140
+ // [START multimodal_one_image_streaming]
141
+ // Loads an image from the app/res/drawable/ directory
142
+ val bitmap: Bitmap = BitmapFactory .decodeResource(resources, R .drawable.sparky)
143
+
144
+ // Provide a prompt that includes the image specified above and text
145
+ val prompt = content {
146
+ image(bitmap)
147
+ text(" What developer tool is this mascot from?" )
148
+ }
149
+
150
+ // To stream generated text output, call generateContentStream with the prompt
151
+ var fullResponse = " "
152
+ model.generateContentStream(prompt).collect { chunk ->
153
+ print (chunk.text)
154
+ fullResponse + = chunk.text
155
+ }
156
+ // [END multimodal_one_image_streaming]
157
+ }
158
+
159
+ private suspend fun onePdfNonStreaming (pdfUri : Uri ) {
160
+ // [START multimodal_one_pdf_non_streaming]
161
+ val contentResolver = applicationContext.contentResolver
162
+
163
+ // Provide the URI for the PDF file you want to send to the model
164
+ val inputStream = contentResolver.openInputStream(pdfUri)
165
+
166
+ if (inputStream != null ) { // Check if the PDF file loaded successfully
167
+ inputStream.use { stream ->
168
+ // Provide a prompt that includes the PDF file specified above and text
169
+ val prompt = content {
170
+ inlineData(
171
+ bytes = stream.readBytes(),
172
+ mimeType = " application/pdf" // Specify the appropriate PDF file MIME type
173
+ )
174
+ text(" Summarize the important results in this report." )
175
+ }
176
+
177
+ // To generate text output, call `generateContent` with the prompt
178
+ val response = model.generateContent(prompt)
179
+
180
+ // Log the generated text, handling the case where it might be null
181
+ Log .d(TAG , response.text ? : " " )
182
+ }
183
+ } else {
184
+ Log .e(TAG , " Error getting input stream for file." )
185
+ // Handle the error appropriately
186
+ }
187
+ // [END multimodal_one_pdf_non_streaming]
188
+ }
189
+
190
+ private suspend fun onePdfStreaming (pdfUri : Uri ) {
191
+ // [START multimodal_one_pdf_streaming]
192
+ val contentResolver = applicationContext.contentResolver
193
+
194
+ // Provide the URI for the PDF you want to send to the model
195
+ val inputStream = contentResolver.openInputStream(pdfUri)
196
+
197
+ if (inputStream != null ) { // Check if the PDF file loaded successfully
198
+ inputStream.use { stream ->
199
+ // Provide a prompt that includes the PDF file specified above and text
200
+ val prompt = content {
201
+ inlineData(
202
+ bytes = stream.readBytes(),
203
+ mimeType = " application/pdf" // Specify the appropriate PDF file MIME type
204
+ )
205
+ text(" Summarize the important results in this report." )
206
+ }
207
+
208
+ // To stream generated text output, call `generateContentStream` with the prompt
209
+ var fullResponse = " "
210
+ model.generateContentStream(prompt).collect { chunk ->
211
+ // Log the generated text, handling the case where it might be null
212
+ val chunkText = chunk.text ? : " "
213
+ Log .d(TAG , chunkText)
214
+ fullResponse + = chunkText
215
+ }
216
+ }
217
+ } else {
218
+ Log .e(TAG , " Error getting input stream for file." )
219
+ // Handle the error appropriately
220
+ }
221
+ // [END multimodal_one_pdf_streaming]
222
+ }
223
+
224
+ private suspend fun videoNonStreaming (videoUri : Uri ) {
225
+ // [START multimodal_video_non_streaming]
226
+ val contentResolver = applicationContext.contentResolver
227
+ contentResolver.openInputStream(videoUri).use { stream ->
228
+ stream?.let {
229
+ val bytes = stream.readBytes()
230
+
231
+ // Provide a prompt that includes the video specified above and text
232
+ val prompt = content {
233
+ inlineData(bytes, " video/mp4" )
234
+ text(" What is in the video?" )
235
+ }
236
+
237
+ // To generate text output, call generateContent with the prompt
238
+ val response = model.generateContent(prompt)
239
+ Log .d(TAG , response.text ? : " " )
240
+ }
241
+ }
242
+ // [END multimodal_video_non_streaming]
243
+ }
244
+
245
+ private suspend fun videoStreaming (videoUri : Uri ) {
246
+ // [START multimodal_video_streaming]
247
+ val contentResolver = applicationContext.contentResolver
248
+ contentResolver.openInputStream(videoUri).use { stream ->
249
+ stream?.let {
250
+ val bytes = stream.readBytes()
251
+
252
+ // Provide a prompt that includes the video specified above and text
253
+ val prompt = content {
254
+ inlineData(bytes, " video/mp4" )
255
+ text(" What is in the video?" )
256
+ }
257
+
258
+ // To stream generated text output, call generateContentStream with the prompt
259
+ var fullResponse = " "
260
+ model.generateContentStream(prompt).collect { chunk ->
261
+ Log .d(TAG , chunk.text ? : " " )
262
+ fullResponse + = chunk.text
263
+ }
264
+ }
265
+ }
266
+ // [END multimodal_video_streaming]
267
+ }
268
+
269
+
270
+ }
0 commit comments