Merge branch 'main' into main

aallam · web-flow · commit d6610fa99b2e · 2024-04-01T16:13:09.000+02:00
diff --git a/.tool-versions b/.tool-versions
@@ -0,0 +1 @@
+java temurin-11.0.22+7
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,14 @@
+## Unreleased
+
+### Added
+
+- **Audio**: add `timestampGranularities` (thanks @mxwell)
+
+### Fixed
+
+- **Core**: nullable `OpenAIErrorDetails` fields (#315)
+- **Messages**: nullable field `MessageContent.Image#fileId` (#313)
+
 ## 3.7.0
 
 ### Added
diff --git a/openai-client/src/commonMain/kotlin/com.aallam.openai.client/internal/api/AudioApi.kt b/openai-client/src/commonMain/kotlin/com.aallam.openai.client/internal/api/AudioApi.kt
@@ -61,6 +61,11 @@ internal class AudioApi(val requester: HttpRequester) : Audio {
         request.responseFormat?.let { append(key = "response_format", value = it.value) }
         request.temperature?.let { append(key = "temperature", value = it) }
         request.language?.let { append(key = "language", value = it) }
+        if (request.responseFormat == AudioResponseFormat.VerboseJson) {
+            for (timestampGranularity in request.timestampGranularities.orEmpty()) {
+                append(key = "timestamp_granularities[]", value = timestampGranularity.value)
+            }
+        }
     }
 
     @BetaOpenAI
diff --git a/openai-client/src/commonTest/kotlin/com/aallam/openai/client/TestAudio.kt b/openai-client/src/commonTest/kotlin/com/aallam/openai/client/TestAudio.kt
@@ -55,6 +55,22 @@ class TestAudio : TestOpenAI() {
         assertTrue { transcription.segments?.isNotEmpty() ?: false }
     }
 
+    @Test
+    fun transcriptionWithWordTimestamps() = test {
+        val request = transcriptionRequest {
+            audio = FileSource(path = testFilePath("audio/micro-machines.wav"), fileSystem = TestFileSystem)
+            model = ModelId("whisper-1")
+            responseFormat = AudioResponseFormat.VerboseJson
+            timestampGranularities = listOf(TimestampGranularity.Word)
+        }
+        val transcription = openAI.transcription(request)
+        assertTrue { transcription.text.isNotEmpty() }
+        assertEquals(transcription.language, "english")
+        assertEquals(transcription.duration!!, 29.88, absoluteTolerance = 0.1)
+        assertEquals(transcription.segments, null)
+        assertTrue { transcription.words?.isNotEmpty() ?: false }
+    }
+
     @Test
     fun translation() = test {
         val request = translationRequest {
diff --git a/openai-core/src/commonMain/kotlin/com.aallam.openai.api/audio/TimestampGranularity.kt b/openai-core/src/commonMain/kotlin/com.aallam.openai.api/audio/TimestampGranularity.kt
@@ -0,0 +1,13 @@
+package com.aallam.openai.api.audio
+
+import kotlinx.serialization.Serializable
+import kotlin.jvm.JvmInline
+
+@Serializable
+@JvmInline
+public value class TimestampGranularity(public val value: String) {
+    public companion object {
+        public val Word: TimestampGranularity = TimestampGranularity("word")
+        public val Segment: TimestampGranularity = TimestampGranularity("segment")
+    }
+}
diff --git a/openai-core/src/commonMain/kotlin/com.aallam.openai.api/audio/Transcription.kt b/openai-core/src/commonMain/kotlin/com.aallam.openai.api/audio/Transcription.kt
@@ -20,4 +20,5 @@ public data class Transcription(
     @SerialName("language") val language: String? = null,
     @SerialName("duration") val duration: Double? = null,
     @SerialName("segments") val segments: List<Segment>? = null,
+    @SerialName("words") val words: List<Word>? = null,
 )
diff --git a/openai-core/src/commonMain/kotlin/com.aallam.openai.api/audio/TranscriptionRequest.kt b/openai-core/src/commonMain/kotlin/com.aallam.openai.api/audio/TranscriptionRequest.kt
@@ -1,6 +1,5 @@
 package com.aallam.openai.api.audio
 
-import com.aallam.openai.api.BetaOpenAI
 import com.aallam.openai.api.OpenAIDsl
 import com.aallam.openai.api.file.FileSource
 import com.aallam.openai.api.model.ModelId
@@ -43,6 +42,14 @@ public class TranscriptionRequest(
      * [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will improve accuracy and latency.
      */
     public val language: String? = null,
+
+    /**
+     * The timestamp granularities to populate for this transcription.
+     * [responseFormat] must be set [AudioResponseFormat.VerboseJson] to use timestamp granularities.
+     * Either or both of these options are supported: [TimestampGranularity.Word], or [TimestampGranularity.Segment].
+     * Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
+     */
+    public val timestampGranularities: List<TimestampGranularity>? = null,
 )
 
 /**
@@ -90,6 +97,14 @@ public class TranscriptionRequestBuilder {
      */
     public var language: String? = null
 
+    /**
+     * The timestamp granularities to populate for this transcription.
+     * responseFormat must be set verbose_json to use timestamp granularities.
+     * Either or both of these options are supported: word, or segment.
+     * Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
+     */
+    public var timestampGranularities: List<TimestampGranularity>? = null
+
     /**
      * Builder of [TranscriptionRequest] instances.
      */
@@ -100,5 +115,6 @@ public class TranscriptionRequestBuilder {
         responseFormat = responseFormat,
         temperature = temperature,
         language = language,
+        timestampGranularities = timestampGranularities,
     )
 }
diff --git a/openai-core/src/commonMain/kotlin/com.aallam.openai.api/audio/Word.kt b/openai-core/src/commonMain/kotlin/com.aallam.openai.api/audio/Word.kt
@@ -0,0 +1,11 @@
+package com.aallam.openai.api.audio
+
+import kotlinx.serialization.SerialName
+import kotlinx.serialization.Serializable
+
+@Serializable
+public data class Word(
+    @SerialName("word") val word: String,
+    @SerialName("start") val start: Double,
+    @SerialName("end") val end: Double,
+)
diff --git a/openai-core/src/commonMain/kotlin/com.aallam.openai.api/exception/OpenAIErrorDetails.kt b/openai-core/src/commonMain/kotlin/com.aallam.openai.api/exception/OpenAIErrorDetails.kt
@@ -10,7 +10,7 @@ import kotlinx.serialization.Serializable
  */
 @Serializable
 public data class OpenAIError(
-    @SerialName("error") public val detail: OpenAIErrorDetails?,
+    @SerialName("error") public val detail: OpenAIErrorDetails? = null,
 )
 
 /**
@@ -23,8 +23,8 @@ public data class OpenAIError(
  */
 @Serializable
 public data class OpenAIErrorDetails(
-    @SerialName("code") val code: String?,
-    @SerialName("message") val message: String?,
-    @SerialName("param") val param: String?,
-    @SerialName("type") val type: String?,
+    @SerialName("code") val code: String? = null,
+    @SerialName("message") val message: String? = null,
+    @SerialName("param") val param: String? = null,
+    @SerialName("type") val type: String? = null,
 )
diff --git a/openai-core/src/commonMain/kotlin/com.aallam.openai.api/message/ImageFile.kt b/openai-core/src/commonMain/kotlin/com.aallam.openai.api/message/ImageFile.kt
@@ -0,0 +1,16 @@
+package com.aallam.openai.api.message
+
+import com.aallam.openai.api.file.FileId
+import kotlinx.serialization.SerialName
+import kotlinx.serialization.Serializable
+
+/**
+ * References an image File in the content of a message.
+ */
+@Serializable
+public data class ImageFile(
+    /**
+     * The File ID of the image in the message content.
+     */
+    @SerialName("file_id") val fileId: FileId
+)
diff --git a/openai-core/src/commonMain/kotlin/com.aallam.openai.api/message/MessageContent.kt b/openai-core/src/commonMain/kotlin/com.aallam.openai.api/message/MessageContent.kt
@@ -35,7 +35,7 @@ public sealed interface MessageContent {
         /**
          * The File ID of the image in the message content.
          */
-        @SerialName("file_id") val fileId: FileId
+        @SerialName("image_file") val imageFile: ImageFile
     ) : MessageContent
 
 }
diff --git a/openai-core/src/commonMain/kotlin/com.aallam.openai.api/message/MessageFile.kt b/openai-core/src/commonMain/kotlin/com.aallam.openai.api/message/MessageFile.kt
@@ -2,6 +2,7 @@ package com.aallam.openai.api.message
 
 import com.aallam.openai.api.BetaOpenAI
 import com.aallam.openai.api.file.FileId
+import kotlinx.serialization.SerialName
 import kotlinx.serialization.Serializable
 
 /**
@@ -13,13 +14,13 @@ public data class MessageFile(
     /**
      * The identifier, which can be referenced in API endpoints.
      */
-    val id: FileId,
+    @SerialName("id") val id: FileId,
     /**
      * The Unix timestamp (in seconds) for when the message file was created.
      */
-    val createdAt: Int? = null,
+    @SerialName("created_at") val createdAt: Int? = null,
     /**
      * The ID of the message that the File is attached to.
      */
-    val messageId: MessageId? = null,
+    @SerialName("message_id") val messageId: MessageId? = null,
 )

Original file line number	Diff line number	Diff line change
`@@ -20,4 +20,5 @@ public data class Transcription(`
`20`	`20`	`@SerialName("language") val language: String? = null,`
`21`	`21`	`@SerialName("duration") val duration: Double? = null,`
`22`	`22`	`@SerialName("segments") val segments: List<Segment>? = null,`
	`23`	`+ @SerialName("words") val words: List<Word>? = null,`
`23`	`24`	`)`
Original file line number	Diff line number	Diff line change
`@@ -35,7 +35,7 @@ public sealed interface MessageContent {`
`35`	`35`	`/**`
`36`	`36`	`* The File ID of the image in the message content.`
`37`	`37`	`*/`
`38`		`- @SerialName("file_id") val fileId: FileId`
	`38`	`+ @SerialName("image_file") val imageFile: ImageFile`
`39`	`39`	`) : MessageContent`
`40`	`40`
`41`	`41`	`}`