Merge pull request from aidanperkins/main

Added the ability to pass the audio buffer to whisper directly instead of a filename. Also merged  and #13's changes.
This commit is contained in:
Luke Southam 2023-05-28 18:07:58 +01:00 committed by GitHub
commit 75240c1e4f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 25 additions and 8 deletions

View File

@ -37,4 +37,4 @@ setup(
'ffmpeg-python',
'requests'
],
)
)

@ -1 +1 @@
Subproject commit 1d716d6e34f3f4ba57bd9706a9258a0bdb008153
Subproject commit 0a2d1210bcb98978214bbf4e100922a413afd39d

View File

@ -71,7 +71,8 @@ cdef extern from "whisper.h" nogil:
whisper_encoder_begin_callback encoder_begin_callback
void* encoder_begin_callback_user_data
whisper_full_params whisper_full_default_params(whisper_sampling_strategy)
cdef whisper_context* whisper_init(char*)
cdef whisper_context* whisper_init_from_file(char*)
cdef whisper_context* whisper_init_from_buffer(voidptr, int)
cdef void whisper_free(whisper_context*)
cdef int whisper_pcm_to_mel(whisper_context*, float*, int, int)
cdef int whisper_set_mel(whisper_context*, float*, int, int)

View File

@ -16,7 +16,7 @@ cimport numpy as cnp
cdef int SAMPLE_RATE = 16000
cdef char* TEST_FILE = 'test.wav'
cdef char* DEFAULT_MODEL = 'tiny'
cdef char* LANGUAGE = b'fr'
cdef char* LANGUAGE = b'en'
cdef int N_THREADS = os.cpu_count()
MODELS = {
@ -84,21 +84,37 @@ cdef class Whisper:
cdef whisper_context * ctx
cdef whisper_full_params params
def __init__(self, model=DEFAULT_MODEL, pb=None):
model_fullname = f'ggml-{model}.bin'.encode('utf8')
def __init__(self, model=DEFAULT_MODEL, pb=None, buf=None):
model_fullname = f'ggml-{model}.bin'
download_model(model_fullname)
model_path = Path(MODELS_DIR).joinpath(model_fullname)
cdef bytes model_b = str(model_path).encode('utf8')
self.ctx = whisper_init(model_b)
if buf is not None:
self.ctx = whisper_init_from_buffer(buf, buf.size)
else:
self.ctx = whisper_init_from_file(model_b)
self.params = default_params()
whisper_print_system_info()
def __dealloc__(self):
whisper_free(self.ctx)
def transcribe(self, filename=TEST_FILE):
print("Loading data..")
cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] frames = load_audio(<bytes>filename)
if (type(filename) == np.ndarray) :
temp = filename
elif (type(filename) == str) :
temp = load_audio(<bytes>filename)
else :
temp = load_audio(<bytes>TEST_FILE)
cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] frames = temp
print("Transcribing..")
return whisper_full(self.ctx, self.params, &frames[0], len(frames))