context.params.SetMaxSegmentLength(int(n))
}
+// Set token timestamps flag
+func (context *context) SetTokenTimestamps(b bool) {
+ context.params.SetTokenTimestamps(b)
+}
+
// Set max tokens per segment (0 = no limit)
func (context *context) SetMaxTokensPerSegment(n uint) {
context.params.SetMaxTokensPerSegment(int(n))
func toTokens(ctx *whisper.Context, n int) []Token {
result := make([]Token, ctx.Whisper_full_n_tokens(n))
for i := 0; i < len(result); i++ {
+ data := ctx.Whisper_full_get_token_data(n, i)
+
result[i] = Token{
- Id: int(ctx.Whisper_full_get_token_id(n, i)),
- Text: strings.TrimSpace(ctx.Whisper_full_get_token_text(n, i)),
- P: ctx.Whisper_full_get_token_p(n, i),
+ Id: int(ctx.Whisper_full_get_token_id(n, i)),
+ Text: ctx.Whisper_full_get_token_text(n, i),
+ P: ctx.Whisper_full_get_token_p(n, i),
+ Start: time.Duration(data.T0()) * time.Millisecond * 10,
+ End: time.Duration(data.T1()) * time.Millisecond * 10,
}
}
return result
SetTokenThreshold(float32) // Set timestamp token probability threshold
SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
SetMaxSegmentLength(uint) // Set max segment length in characters
+ SetTokenTimestamps(bool) // Set token timestamps flag
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
// Process mono audio data and return any errors.
// Token is a text or special token
type Token struct {
- Id int
- Text string
- P float32
+ Id int
+ Text string
+ P float32
+ Start, End time.Duration
}
// Get token data for the specified token in the specified segment.
// This contains probabilities, timestamps, etc.
-func (ctx *Context) whisper_full_get_token_data(segment int, token int) TokenData {
+func (ctx *Context) Whisper_full_get_token_data(segment int, token int) TokenData {
return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
}
}
return true
}
+
+func (t TokenData) T0() int64 {
+ return int64(t.t0)
+}
+
+func (t TokenData) T1() int64 {
+ return int64(t.t1)
+}