go : exposed various parts to the Go Interface (#697)

author Brian Murray <redacted>

Fri, 14 Apr 2023 15:52:10 +0000 (09:52 -0600)

committer GitHub <redacted>

Fri, 14 Apr 2023 15:52:10 +0000 (18:52 +0300)
author Brian Murray <redacted>
Fri, 14 Apr 2023 15:52:10 +0000 (09:52 -0600)
committer GitHub <redacted>
Fri, 14 Apr 2023 15:52:10 +0000 (18:52 +0300)
diff --git a/bindings/go/params.go b/bindings/go/params.go

index c413895c01e2fb271dbd2989940680347654b6e9..1ddcbeaa3dd82294e2653eb311fe545c9e8419d6 100644 (file)
--- a/bindings/go/params.go
+++ b/bindings/go/params.go
@@ -105,6 +105,10 @@ func (p *Params) SetMaxSegmentLength(n int) {
         p.max_len = C.int(n)
  }
  
+func (p *Params) SetTokenTimestamps(b bool) {
+       p.token_timestamps = toBool(b)
+}
+
  // Set max tokens per segment (0 = no limit)
  func (p *Params) SetMaxTokensPerSegment(n int) {
         p.max_tokens = C.int(n)
diff --git a/bindings/go/pkg/whisper/context.go b/bindings/go/pkg/whisper/context.go

index 0a6e9cbc2277c0298d4cc4a2e26008385c8791a7..593b32b37b09d0b9ece3ac4e0dcdc7a08f0d9b17 100644 (file)
--- a/bindings/go/pkg/whisper/context.go
+++ b/bindings/go/pkg/whisper/context.go
@@ -111,6 +111,11 @@ func (context *context) SetMaxSegmentLength(n uint) {
         context.params.SetMaxSegmentLength(int(n))
  }
  
+// Set token timestamps flag
+func (context *context) SetTokenTimestamps(b bool) {
+       context.params.SetTokenTimestamps(b)
+}
+
  // Set max tokens per segment (0 = no limit)
  func (context *context) SetMaxTokensPerSegment(n uint) {
         context.params.SetMaxTokensPerSegment(int(n))
@@ -280,10 +285,14 @@ func toSegment(ctx *whisper.Context, n int) Segment {
  func toTokens(ctx *whisper.Context, n int) []Token {
         result := make([]Token, ctx.Whisper_full_n_tokens(n))
         for i := 0; i < len(result); i++ {
+               data := ctx.Whisper_full_get_token_data(n, i)
+
                 result[i] = Token{
-                       Id:   int(ctx.Whisper_full_get_token_id(n, i)),
-                       Text: strings.TrimSpace(ctx.Whisper_full_get_token_text(n, i)),
-                       P:    ctx.Whisper_full_get_token_p(n, i),
+                       Id:    int(ctx.Whisper_full_get_token_id(n, i)),
+                       Text:  ctx.Whisper_full_get_token_text(n, i),
+                       P:     ctx.Whisper_full_get_token_p(n, i),
+                       Start: time.Duration(data.T0()) * time.Millisecond * 10,
+                       End:   time.Duration(data.T1()) * time.Millisecond * 10,
                 }
         }
         return result
diff --git a/bindings/go/pkg/whisper/interface.go b/bindings/go/pkg/whisper/interface.go

index a1d3f68227813b512052a67cb7c103f5f1786015..e65fed178d5b295349145b00d4cc8b135e6fd01a 100644 (file)
--- a/bindings/go/pkg/whisper/interface.go
+++ b/bindings/go/pkg/whisper/interface.go
@@ -41,6 +41,7 @@ type Context interface {
         SetTokenThreshold(float32)    // Set timestamp token probability threshold
         SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
         SetMaxSegmentLength(uint)     // Set max segment length in characters
+       SetTokenTimestamps(bool)      // Set token timestamps flag
         SetMaxTokensPerSegment(uint)  // Set max tokens per segment (0 = no limit)
  
         // Process mono audio data and return any errors.
@@ -85,7 +86,8 @@ type Segment struct {
  
  // Token is a text or special token
  type Token struct {
-       Id   int
-       Text string
-       P    float32
+       Id         int
+       Text       string
+       P          float32
+       Start, End time.Duration
  }
diff --git a/bindings/go/whisper.go b/bindings/go/whisper.go

index d47f7f76b96b0524f8d1ab7aea0e45b7c124e462..babadf006c2b2ebe71b44ead38a7ad456b27cfec 100644 (file)
--- a/bindings/go/whisper.go
+++ b/bindings/go/whisper.go
@@ -356,7 +356,7 @@ func (ctx *Context) Whisper_full_get_token_id(segment int, token int) Token {
  
  // Get token data for the specified token in the specified segment.
  // This contains probabilities, timestamps, etc.
-func (ctx *Context) whisper_full_get_token_data(segment int, token int) TokenData {
+func (ctx *Context) Whisper_full_get_token_data(segment int, token int) TokenData {
         return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
  }
  
@@ -407,3 +407,11 @@ func callEncoderBegin(user_data unsafe.Pointer) C.bool {
         }
         return true
  }
+
+func (t TokenData) T0() int64 {
+       return int64(t.t0)
+}
+
+func (t TokenData) T1() int64 {
+       return int64(t.t1)
+}
author	Brian Murray <redacted>
	Fri, 14 Apr 2023 15:52:10 +0000 (09:52 -0600)
committer	GitHub <redacted>
	Fri, 14 Apr 2023 15:52:10 +0000 (18:52 +0300)
bindings/go/params.go		patch \| blob \| history
bindings/go/pkg/whisper/context.go		patch \| blob \| history
bindings/go/pkg/whisper/interface.go		patch \| blob \| history
bindings/go/whisper.go		patch \| blob \| history