Skip to content

Commit 3a31a46

Browse files
feat(api): Fix audio transcription response formats
1 parent 2f39a2b commit 3a31a46

File tree

5 files changed

+6
-228
lines changed

5 files changed

+6
-228
lines changed

api.md

+2-4
Original file line numberDiff line numberDiff line change
@@ -60,22 +60,20 @@ Methods:
6060
Types:
6161

6262
- <code><a href="./src/resources/audio/transcriptions.ts">Transcription</a></code>
63-
- <code><a href="./src/resources/audio/transcriptions.ts">TranscriptionCreateResponse</a></code>
6463

6564
Methods:
6665

67-
- <code title="post /openai/v1/audio/transcriptions">client.audio.transcriptions.<a href="./src/resources/audio/transcriptions.ts">create</a>({ ...params }) -> TranscriptionCreateResponse</code>
66+
- <code title="post /openai/v1/audio/transcriptions">client.audio.transcriptions.<a href="./src/resources/audio/transcriptions.ts">create</a>({ ...params }) -> Transcription</code>
6867

6968
## Translations
7069

7170
Types:
7271

7372
- <code><a href="./src/resources/audio/translations.ts">Translation</a></code>
74-
- <code><a href="./src/resources/audio/translations.ts">TranslationCreateResponse</a></code>
7573

7674
Methods:
7775

78-
- <code title="post /openai/v1/audio/translations">client.audio.translations.<a href="./src/resources/audio/translations.ts">create</a>({ ...params }) -> TranslationCreateResponse</code>
76+
- <code title="post /openai/v1/audio/translations">client.audio.translations.<a href="./src/resources/audio/translations.ts">create</a>({ ...params }) -> Translation</code>
7977

8078
# Models
8179

src/resources/audio/audio.ts

-2
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,8 @@ export class Audio extends APIResource {
1212
export namespace Audio {
1313
export import Transcriptions = TranscriptionsAPI.Transcriptions;
1414
export import Transcription = TranscriptionsAPI.Transcription;
15-
export import TranscriptionCreateResponse = TranscriptionsAPI.TranscriptionCreateResponse;
1615
export import TranscriptionCreateParams = TranscriptionsAPI.TranscriptionCreateParams;
1716
export import Translations = TranslationsAPI.Translations;
1817
export import Translation = TranslationsAPI.Translation;
19-
export import TranslationCreateResponse = TranslationsAPI.TranslationCreateResponse;
2018
export import TranslationCreateParams = TranslationsAPI.TranslationCreateParams;
2119
}

src/resources/audio/index.ts

+2-12
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,5 @@
11
// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
22

33
export { Audio } from './audio';
4-
export {
5-
Transcription,
6-
TranscriptionCreateResponse,
7-
TranscriptionCreateParams,
8-
Transcriptions,
9-
} from './transcriptions';
10-
export {
11-
Translation,
12-
TranslationCreateResponse,
13-
TranslationCreateParams,
14-
Translations,
15-
} from './translations';
4+
export { Transcription, TranscriptionCreateParams, Transcriptions } from './transcriptions';
5+
export { Translation, TranslationCreateParams, Translations } from './translations';

src/resources/audio/transcriptions.ts

+1-120
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,7 @@ export class Transcriptions extends APIResource {
99
/**
1010
* Transcribes audio into the input language.
1111
*/
12-
create(
13-
body: TranscriptionCreateParams,
14-
options?: Core.RequestOptions,
15-
): Core.APIPromise<TranscriptionCreateResponse> {
12+
create(body: TranscriptionCreateParams, options?: Core.RequestOptions): Core.APIPromise<Transcription> {
1613
return this._client.post(
1714
'/openai/v1/audio/transcriptions',
1815
multipartFormRequestOptions({ body, ...options }),
@@ -31,121 +28,6 @@ export interface Transcription {
3128
text: string;
3229
}
3330

34-
/**
35-
* Represents a transcription response returned by model, based on the provided
36-
* input.
37-
*/
38-
export type TranscriptionCreateResponse =
39-
| Transcription
40-
| TranscriptionCreateResponse.CreateTranscriptionResponseVerboseJson;
41-
42-
export namespace TranscriptionCreateResponse {
43-
/**
44-
* Represents a verbose json transcription response returned by model, based on the
45-
* provided input.
46-
*/
47-
export interface CreateTranscriptionResponseVerboseJson {
48-
/**
49-
* The duration of the input audio.
50-
*/
51-
duration: string;
52-
53-
/**
54-
* The language of the input audio.
55-
*/
56-
language: string;
57-
58-
/**
59-
* The transcribed text.
60-
*/
61-
text: string;
62-
63-
/**
64-
* Segments of the transcribed text and their corresponding details.
65-
*/
66-
segments?: Array<CreateTranscriptionResponseVerboseJson.Segment>;
67-
68-
/**
69-
* Extracted words and their corresponding timestamps.
70-
*/
71-
words?: Array<CreateTranscriptionResponseVerboseJson.Word>;
72-
}
73-
74-
export namespace CreateTranscriptionResponseVerboseJson {
75-
export interface Segment {
76-
/**
77-
* Unique identifier of the segment.
78-
*/
79-
id: number;
80-
81-
/**
82-
* Average logprob of the segment. If the value is lower than -1, consider the
83-
* logprobs failed.
84-
*/
85-
avg_logprob: number;
86-
87-
/**
88-
* Compression ratio of the segment. If the value is greater than 2.4, consider the
89-
* compression failed.
90-
*/
91-
compression_ratio: number;
92-
93-
/**
94-
* End time of the segment in seconds.
95-
*/
96-
end: number;
97-
98-
/**
99-
* Probability of no speech in the segment. If the value is higher than 1.0 and the
100-
* `avg_logprob` is below -1, consider this segment silent.
101-
*/
102-
no_speech_prob: number;
103-
104-
/**
105-
* Seek offset of the segment.
106-
*/
107-
seek: number;
108-
109-
/**
110-
* Start time of the segment in seconds.
111-
*/
112-
start: number;
113-
114-
/**
115-
* Temperature parameter used for generating the segment.
116-
*/
117-
temperature: number;
118-
119-
/**
120-
* Text content of the segment.
121-
*/
122-
text: string;
123-
124-
/**
125-
* Array of token IDs for the text content.
126-
*/
127-
tokens: Array<number>;
128-
}
129-
130-
export interface Word {
131-
/**
132-
* End time of the word in seconds.
133-
*/
134-
end: number;
135-
136-
/**
137-
* Start time of the word in seconds.
138-
*/
139-
start: number;
140-
141-
/**
142-
* The text content of the word.
143-
*/
144-
word: string;
145-
}
146-
}
147-
}
148-
14931
export interface TranscriptionCreateParams {
15032
/**
15133
* The audio file object (not file name) to transcribe, in one of these formats:
@@ -199,6 +81,5 @@ export interface TranscriptionCreateParams {
19981

20082
export namespace Transcriptions {
20183
export import Transcription = TranscriptionsAPI.Transcription;
202-
export import TranscriptionCreateResponse = TranscriptionsAPI.TranscriptionCreateResponse;
20384
export import TranscriptionCreateParams = TranscriptionsAPI.TranscriptionCreateParams;
20485
}

src/resources/audio/translations.ts

+1-90
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,7 @@ export class Translations extends APIResource {
99
/**
1010
* Translates audio into English.
1111
*/
12-
create(
13-
body: TranslationCreateParams,
14-
options?: Core.RequestOptions,
15-
): Core.APIPromise<TranslationCreateResponse> {
12+
create(body: TranslationCreateParams, options?: Core.RequestOptions): Core.APIPromise<Translation> {
1613
return this._client.post(
1714
'/openai/v1/audio/translations',
1815
multipartFormRequestOptions({ body, ...options }),
@@ -24,91 +21,6 @@ export interface Translation {
2421
text: string;
2522
}
2623

27-
export type TranslationCreateResponse =
28-
| Translation
29-
| TranslationCreateResponse.CreateTranslationResponseVerboseJson;
30-
31-
export namespace TranslationCreateResponse {
32-
export interface CreateTranslationResponseVerboseJson {
33-
/**
34-
* The duration of the input audio.
35-
*/
36-
duration: string;
37-
38-
/**
39-
* The language of the output translation (always `english`).
40-
*/
41-
language: string;
42-
43-
/**
44-
* The translated text.
45-
*/
46-
text: string;
47-
48-
/**
49-
* Segments of the translated text and their corresponding details.
50-
*/
51-
segments?: Array<CreateTranslationResponseVerboseJson.Segment>;
52-
}
53-
54-
export namespace CreateTranslationResponseVerboseJson {
55-
export interface Segment {
56-
/**
57-
* Unique identifier of the segment.
58-
*/
59-
id: number;
60-
61-
/**
62-
* Average logprob of the segment. If the value is lower than -1, consider the
63-
* logprobs failed.
64-
*/
65-
avg_logprob: number;
66-
67-
/**
68-
* Compression ratio of the segment. If the value is greater than 2.4, consider the
69-
* compression failed.
70-
*/
71-
compression_ratio: number;
72-
73-
/**
74-
* End time of the segment in seconds.
75-
*/
76-
end: number;
77-
78-
/**
79-
* Probability of no speech in the segment. If the value is higher than 1.0 and the
80-
* `avg_logprob` is below -1, consider this segment silent.
81-
*/
82-
no_speech_prob: number;
83-
84-
/**
85-
* Seek offset of the segment.
86-
*/
87-
seek: number;
88-
89-
/**
90-
* Start time of the segment in seconds.
91-
*/
92-
start: number;
93-
94-
/**
95-
* Temperature parameter used for generating the segment.
96-
*/
97-
temperature: number;
98-
99-
/**
100-
* Text content of the segment.
101-
*/
102-
text: string;
103-
104-
/**
105-
* Array of token IDs for the text content.
106-
*/
107-
tokens: Array<number>;
108-
}
109-
}
110-
}
111-
11224
export interface TranslationCreateParams {
11325
/**
11426
* The audio file object (not file name) translate, in one of these formats: flac,
@@ -146,6 +58,5 @@ export interface TranslationCreateParams {
14658

14759
export namespace Translations {
14860
export import Translation = TranslationsAPI.Translation;
149-
export import TranslationCreateResponse = TranslationsAPI.TranslationCreateResponse;
15061
export import TranslationCreateParams = TranslationsAPI.TranslationCreateParams;
15162
}

0 commit comments

Comments
 (0)