mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-01-24 06:49:06 +01:00
86a277f78d
* run `go mod tidy` before building examples Running `make examples` after cloning the repository gives the following error: ``` ... [100%] Built target whisper gmake[3]: Leaving directory '/tmp/exp/whisper.cpp/bindings/go/build' gmake[2]: Leaving directory '/tmp/exp/whisper.cpp/bindings/go/build' gmake[1]: Leaving directory '/tmp/exp/whisper.cpp/bindings/go/build' Build example go-model-download Build example go-whisper examples/go-whisper/process.go:11:2: missing go.sum entry for module providing package github.com/go-audio/wav (imported by github.com/ggerganov/whisper.cpp/bindings/go/examples/go-whisper); to add: go get github.com/ggerganov/whisper.cpp/bindings/go/examples/go-whisper make: *** [Makefile:26: examples/go-whisper] Error 1 ``` * remove executable bit from various files
64 lines
1.5 KiB
Go
64 lines
1.5 KiB
Go
package whisper
|
|
|
|
import (
|
|
"io"
|
|
"time"
|
|
)
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// TYPES
|
|
|
|
// SegmentCallback is the callback function for processing segments in real
|
|
// time. It is called during the Process function
|
|
type SegmentCallback func(Segment)
|
|
|
|
// Model is the interface to a whisper model. Create a new model with the
|
|
// function whisper.New(string)
|
|
type Model interface {
|
|
io.Closer
|
|
|
|
// Return a new speech-to-text context.
|
|
NewContext() (Context, error)
|
|
|
|
// Return all languages supported.
|
|
Languages() []string
|
|
}
|
|
|
|
// Context is the speach recognition context.
|
|
type Context interface {
|
|
SetLanguage(string) error // Set the language to use for speech recognition.
|
|
Language() string // Get language
|
|
SetSpeedup(bool) // Set speedup flag
|
|
|
|
// Process mono audio data and return any errors.
|
|
// If defined, newly generated segments are passed to the
|
|
// callback function during processing.
|
|
Process([]float32, SegmentCallback) error
|
|
|
|
// After process is called, return segments until the end of the stream
|
|
// is reached, when io.EOF is returned.
|
|
NextSegment() (Segment, error)
|
|
}
|
|
|
|
// Segment is the text result of a speech recognition.
|
|
type Segment struct {
|
|
// Segment Number
|
|
Num int
|
|
|
|
// Time beginning and end timestamps for the segment.
|
|
Start, End time.Duration
|
|
|
|
// The text of the segment.
|
|
Text string
|
|
|
|
// The tokens of the segment.
|
|
Tokens []Token
|
|
}
|
|
|
|
// Token is a text or special token
|
|
type Token struct {
|
|
Id int
|
|
Text string
|
|
P float32
|
|
}
|