go : add beamsize/entropythold/maxcontext to context interface (#2350)

* feat(go binding): add beamsize/entropythold/maxcontext to context interface

fixes: #2349

* fix go building build

* fix dynamic link .so and header.h

* remove LD_LIBRARY_PATH

* remove ggml obj from whisper dynamic lib

* drop LIB_GGML
This commit is contained in:
hsinhoyeh 2024-08-28 22:09:01 +08:00 committed by GitHub
parent da9809f243
commit c4e1861d2c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 36 additions and 3 deletions

View File

@ -971,7 +971,8 @@ $(LIB_WHISPER): \
$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
$(LIB_WHISPER_S): \
$(OBJ_WHISPER)
$(OBJ_WHISPER) \
$(OBJ_GGML)
ar rcs $(LIB_WHISPER_S) $^
# common

View File

@ -14,7 +14,7 @@ GGML_METAL_PATH_RESOURCES := $(abspath ../..)
BUILD_DIR := build
MODELS_DIR := models
EXAMPLES_DIR := $(wildcard examples/*)
INCLUDE_PATH := $(abspath ../..)
INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
LIBRARY_PATH := $(abspath ../..)
ifeq ($(UNAME_S),Darwin)

View File

@ -119,6 +119,18 @@ func (p *Params) SetAudioCtx(n int) {
p.audio_ctx = C.int(n)
}
func (p *Params) SetMaxContext(n int) {
p.n_max_text_ctx = C.int(n)
}
func (p *Params) SetBeamSize(n int) {
p.beam_search.beam_size = C.int(n)
}
func (p *Params) SetEntropyThold(t float32) {
p.entropy_thold = C.float(t)
}
// Set initial prompt
func (p *Params) SetInitialPrompt(prompt string) {
p.initial_prompt = C.CString(prompt)
@ -149,6 +161,8 @@ func (p *Params) String() string {
str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
str += fmt.Sprintf(" entropy_thold=%f", p.entropy_thold)
str += fmt.Sprintf(" beam_size=%d", p.beam_search.beam_size)
if p.translate {
str += " translate"
}

View File

@ -125,6 +125,21 @@ func (context *context) SetAudioCtx(n uint) {
context.params.SetAudioCtx(int(n))
}
// Set maximum number of text context tokens to store
func (context *context) SetMaxContext(n int) {
context.params.SetMaxContext(n)
}
// Set Beam Size
func (context *context) SetBeamSize(n int) {
context.params.SetBeamSize(n)
}
// Set Entropy threshold
func (context *context) SetEntropyThold(t float32) {
context.params.SetEntropyThold(t)
}
// Set initial prompt
func (context *context) SetInitialPrompt(prompt string) {
context.params.SetInitialPrompt(prompt)

View File

@ -48,6 +48,9 @@ type Context interface {
SetTokenTimestamps(bool) // Set token timestamps flag
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
SetAudioCtx(uint) // Set audio encoder context
SetMaxContext(n int) // Set maximum number of text context tokens to store
SetBeamSize(n int) // Set Beam Size
SetEntropyThold(t float32) // Set Entropy threshold
SetInitialPrompt(prompt string) // Set initial prompt
// Process mono audio data and return any errors.

View File

@ -9,7 +9,7 @@ import (
// CGO
/*
#cgo LDFLAGS: -lwhisper -lm -lstdc++
#cgo LDFLAGS: -lwhisper -lm -lstdc++ -fopenmp
#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
#include <whisper.h>
#include <stdlib.h>