Skip to content

Commit

Permalink
Add timestamp_granularities (#311)
Browse files Browse the repository at this point in the history
  • Loading branch information
mxwell authored Apr 1, 2024
1 parent fb89ec6 commit e78654d
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 0 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## Unreleased

### Added

- **Audio**: add `timestampGranularities`

## 3.7.0

### Added
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ internal class AudioApi(val requester: HttpRequester) : Audio {
request.responseFormat?.let { append(key = "response_format", value = it.value) }
request.temperature?.let { append(key = "temperature", value = it) }
request.language?.let { append(key = "language", value = it) }
if (request.responseFormat == AudioResponseFormat.VerboseJson) {
for (timestampGranularity in request.timestampGranularities) {
append(key = "timestamp_granularities[]", value = timestampGranularity.value)
}
}
}

@BetaOpenAI
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,22 @@ class TestAudio : TestOpenAI() {
assertTrue { transcription.segments?.isNotEmpty() ?: false }
}

@Test
fun transcriptionWithWordTimestamps() = test {
val request = transcriptionRequest {
audio = FileSource(path = testFilePath("audio/micro-machines.wav"), fileSystem = TestFileSystem)
model = ModelId("whisper-1")
responseFormat = AudioResponseFormat.VerboseJson
timestampGranularities = listOf(TimestampGranularity.Word)
}
val transcription = openAI.transcription(request)
assertTrue { transcription.text.isNotEmpty() }
assertEquals(transcription.language, "english")
assertEquals(transcription.duration!!, 29.88, absoluteTolerance = 0.1)
assertEquals(transcription.segments, null)
assertTrue { transcription.words?.isNotEmpty() ?: false }
}

@Test
fun translation() = test {
val request = translationRequest {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package com.aallam.openai.api.audio

import kotlinx.serialization.Serializable
import kotlin.jvm.JvmInline

@Serializable
@JvmInline
public value class TimestampGranularity(public val value: String) {
public companion object {
public val Word: TimestampGranularity = TimestampGranularity("word")
public val Segment: TimestampGranularity = TimestampGranularity("segment")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ public data class Transcription(
@SerialName("language") val language: String? = null,
@SerialName("duration") val duration: Double? = null,
@SerialName("segments") val segments: List<Segment>? = null,
@SerialName("words") val words: List<Word>? = null,
)
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ public class TranscriptionRequest(
* [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will improve accuracy and latency.
*/
public val language: String? = null,

/**
* The timestamp granularities to populate for this transcription.
* responseFormat must be set verbose_json to use timestamp granularities.
* Either or both of these options are supported: word, or segment.
* Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
*/
public val timestampGranularities: List<TimestampGranularity> = emptyList(),
)

/**
Expand Down Expand Up @@ -90,6 +98,14 @@ public class TranscriptionRequestBuilder {
*/
public var language: String? = null

/**
* The timestamp granularities to populate for this transcription.
* responseFormat must be set verbose_json to use timestamp granularities.
* Either or both of these options are supported: word, or segment.
* Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
*/
public var timestampGranularities: List<TimestampGranularity> = emptyList()

/**
* Builder of [TranscriptionRequest] instances.
*/
Expand All @@ -100,5 +116,6 @@ public class TranscriptionRequestBuilder {
responseFormat = responseFormat,
temperature = temperature,
language = language,
timestampGranularities = timestampGranularities,
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.aallam.openai.api.audio

import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable

@Serializable
public data class Word(
@SerialName("word") val word: String,
@SerialName("start") val start: Double,
@SerialName("end") val end: Double,
)

0 comments on commit e78654d

Please sign in to comment.