mirror of
https://github.com/ggerganov/whisper.cpp.git
synced 2025-01-07 14:39:38 +01:00
41 lines
1.1 KiB
C
41 lines
1.1 KiB
C
|
#pragma once
|
||
|
|
||
|
// needs to match WHISPER_SAMPLE_RATE
|
||
|
#define COMMON_SAMPLE_RATE 16000
|
||
|
|
||
|
#include <vector>
|
||
|
#include <string>
|
||
|
|
||
|
std::string trim(const std::string & s);
|
||
|
|
||
|
std::string replace(
|
||
|
const std::string & s,
|
||
|
const std::string & from,
|
||
|
const std::string & to);
|
||
|
|
||
|
// Read WAV audio file and store the PCM data into pcmf32
|
||
|
// The sample rate of the audio must be equal to COMMON_SAMPLE_RATE
|
||
|
// If stereo flag is set and the audio has 2 channels, the pcmf32s will contain 2 channel PCM
|
||
|
bool read_wav(
|
||
|
const std::string & fname,
|
||
|
std::vector<float> & pcmf32,
|
||
|
std::vector<std::vector<float>> & pcmf32s,
|
||
|
bool stereo);
|
||
|
|
||
|
// Apply a high-pass frequency filter to PCM audio
|
||
|
// Suppresses frequencies below cutoff Hz
|
||
|
void high_pass_filter(
|
||
|
std::vector<float> & data,
|
||
|
float cutoff,
|
||
|
float sample_rate);
|
||
|
|
||
|
// Basic voice activity detection (VAD) using audio energy adaptive threshold
|
||
|
bool vad_simple(
|
||
|
std::vector<float> & pcmf32,
|
||
|
int sample_rate,
|
||
|
int last_ms,
|
||
|
float vad_thold,
|
||
|
float freq_thold,
|
||
|
bool verbose);
|
||
|
|