Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
aallam authored Apr 1, 2024
2 parents fe3b800 + 445160e commit d6610fa
Show file tree
Hide file tree
Showing 12 changed files with 101 additions and 10 deletions.
1 change: 1 addition & 0 deletions .tool-versions
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
java temurin-11.0.22+7
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
## Unreleased

### Added

- **Audio**: add `timestampGranularities` (thanks @mxwell)

### Fixed

- **Core**: nullable `OpenAIErrorDetails` fields (#315)
- **Messages**: nullable field `MessageContent.Image#fileId` (#313)

## 3.7.0

### Added
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ internal class AudioApi(val requester: HttpRequester) : Audio {
request.responseFormat?.let { append(key = "response_format", value = it.value) }
request.temperature?.let { append(key = "temperature", value = it) }
request.language?.let { append(key = "language", value = it) }
if (request.responseFormat == AudioResponseFormat.VerboseJson) {
for (timestampGranularity in request.timestampGranularities.orEmpty()) {
append(key = "timestamp_granularities[]", value = timestampGranularity.value)
}
}
}

@BetaOpenAI
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,22 @@ class TestAudio : TestOpenAI() {
assertTrue { transcription.segments?.isNotEmpty() ?: false }
}

@Test
fun transcriptionWithWordTimestamps() = test {
val request = transcriptionRequest {
audio = FileSource(path = testFilePath("audio/micro-machines.wav"), fileSystem = TestFileSystem)
model = ModelId("whisper-1")
responseFormat = AudioResponseFormat.VerboseJson
timestampGranularities = listOf(TimestampGranularity.Word)
}
val transcription = openAI.transcription(request)
assertTrue { transcription.text.isNotEmpty() }
assertEquals(transcription.language, "english")
assertEquals(transcription.duration!!, 29.88, absoluteTolerance = 0.1)
assertEquals(transcription.segments, null)
assertTrue { transcription.words?.isNotEmpty() ?: false }
}

@Test
fun translation() = test {
val request = translationRequest {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package com.aallam.openai.api.audio

import kotlinx.serialization.Serializable
import kotlin.jvm.JvmInline

@Serializable
@JvmInline
public value class TimestampGranularity(public val value: String) {
public companion object {
public val Word: TimestampGranularity = TimestampGranularity("word")
public val Segment: TimestampGranularity = TimestampGranularity("segment")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ public data class Transcription(
@SerialName("language") val language: String? = null,
@SerialName("duration") val duration: Double? = null,
@SerialName("segments") val segments: List<Segment>? = null,
@SerialName("words") val words: List<Word>? = null,
)
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package com.aallam.openai.api.audio

import com.aallam.openai.api.BetaOpenAI
import com.aallam.openai.api.OpenAIDsl
import com.aallam.openai.api.file.FileSource
import com.aallam.openai.api.model.ModelId
Expand Down Expand Up @@ -43,6 +42,14 @@ public class TranscriptionRequest(
* [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will improve accuracy and latency.
*/
public val language: String? = null,

/**
* The timestamp granularities to populate for this transcription.
* [responseFormat] must be set [AudioResponseFormat.VerboseJson] to use timestamp granularities.
* Either or both of these options are supported: [TimestampGranularity.Word], or [TimestampGranularity.Segment].
* Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
*/
public val timestampGranularities: List<TimestampGranularity>? = null,
)

/**
Expand Down Expand Up @@ -90,6 +97,14 @@ public class TranscriptionRequestBuilder {
*/
public var language: String? = null

/**
* The timestamp granularities to populate for this transcription.
* responseFormat must be set verbose_json to use timestamp granularities.
* Either or both of these options are supported: word, or segment.
* Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
*/
public var timestampGranularities: List<TimestampGranularity>? = null

/**
* Builder of [TranscriptionRequest] instances.
*/
Expand All @@ -100,5 +115,6 @@ public class TranscriptionRequestBuilder {
responseFormat = responseFormat,
temperature = temperature,
language = language,
timestampGranularities = timestampGranularities,
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.aallam.openai.api.audio

import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable

@Serializable
public data class Word(
@SerialName("word") val word: String,
@SerialName("start") val start: Double,
@SerialName("end") val end: Double,
)
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import kotlinx.serialization.Serializable
*/
@Serializable
public data class OpenAIError(
@SerialName("error") public val detail: OpenAIErrorDetails?,
@SerialName("error") public val detail: OpenAIErrorDetails? = null,
)

/**
Expand All @@ -23,8 +23,8 @@ public data class OpenAIError(
*/
@Serializable
public data class OpenAIErrorDetails(
@SerialName("code") val code: String?,
@SerialName("message") val message: String?,
@SerialName("param") val param: String?,
@SerialName("type") val type: String?,
@SerialName("code") val code: String? = null,
@SerialName("message") val message: String? = null,
@SerialName("param") val param: String? = null,
@SerialName("type") val type: String? = null,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package com.aallam.openai.api.message

import com.aallam.openai.api.file.FileId
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable

/**
* References an image File in the content of a message.
*/
@Serializable
public data class ImageFile(
/**
* The File ID of the image in the message content.
*/
@SerialName("file_id") val fileId: FileId
)
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public sealed interface MessageContent {
/**
* The File ID of the image in the message content.
*/
@SerialName("file_id") val fileId: FileId
@SerialName("image_file") val imageFile: ImageFile
) : MessageContent

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package com.aallam.openai.api.message

import com.aallam.openai.api.BetaOpenAI
import com.aallam.openai.api.file.FileId
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable

/**
Expand All @@ -13,13 +14,13 @@ public data class MessageFile(
/**
* The identifier, which can be referenced in API endpoints.
*/
val id: FileId,
@SerialName("id") val id: FileId,
/**
* The Unix timestamp (in seconds) for when the message file was created.
*/
val createdAt: Int? = null,
@SerialName("created_at") val createdAt: Int? = null,
/**
* The ID of the message that the File is attached to.
*/
val messageId: MessageId? = null,
@SerialName("message_id") val messageId: MessageId? = null,
)

0 comments on commit d6610fa

Please sign in to comment.