diff --git a/setup.py b/setup.py index bca94e3..d1a1b9a 100644 --- a/setup.py +++ b/setup.py @@ -37,4 +37,4 @@ setup( 'ffmpeg-python', 'requests' ], -) +) \ No newline at end of file diff --git a/whisper.cpp b/whisper.cpp index 1d716d6..0a2d121 160000 --- a/whisper.cpp +++ b/whisper.cpp @@ -1 +1 @@ -Subproject commit 1d716d6e34f3f4ba57bd9706a9258a0bdb008153 +Subproject commit 0a2d1210bcb98978214bbf4e100922a413afd39d diff --git a/whispercpp.pxd b/whispercpp.pxd index 1a033db..e666eea 100644 --- a/whispercpp.pxd +++ b/whispercpp.pxd @@ -71,7 +71,8 @@ cdef extern from "whisper.h" nogil: whisper_encoder_begin_callback encoder_begin_callback void* encoder_begin_callback_user_data whisper_full_params whisper_full_default_params(whisper_sampling_strategy) - cdef whisper_context* whisper_init(char*) + cdef whisper_context* whisper_init_from_file(char*) + cdef whisper_context* whisper_init_from_buffer(voidptr, int) cdef void whisper_free(whisper_context*) cdef int whisper_pcm_to_mel(whisper_context*, float*, int, int) cdef int whisper_set_mel(whisper_context*, float*, int, int) diff --git a/whispercpp.pyx b/whispercpp.pyx index fba2eff..44d0d85 100644 --- a/whispercpp.pyx +++ b/whispercpp.pyx @@ -16,7 +16,7 @@ cimport numpy as cnp cdef int SAMPLE_RATE = 16000 cdef char* TEST_FILE = 'test.wav' cdef char* DEFAULT_MODEL = 'tiny' -cdef char* LANGUAGE = b'fr' +cdef char* LANGUAGE = b'en' cdef int N_THREADS = os.cpu_count() MODELS = { @@ -84,21 +84,37 @@ cdef class Whisper: cdef whisper_context * ctx cdef whisper_full_params params - def __init__(self, model=DEFAULT_MODEL, pb=None): - model_fullname = f'ggml-{model}.bin'.encode('utf8') + def __init__(self, model=DEFAULT_MODEL, pb=None, buf=None): + + model_fullname = f'ggml-{model}.bin' download_model(model_fullname) model_path = Path(MODELS_DIR).joinpath(model_fullname) cdef bytes model_b = str(model_path).encode('utf8') - self.ctx = whisper_init(model_b) + + if buf is not None: + self.ctx = whisper_init_from_buffer(buf, buf.size) + else: + self.ctx = whisper_init_from_file(model_b) + self.params = default_params() whisper_print_system_info() + def __dealloc__(self): whisper_free(self.ctx) def transcribe(self, filename=TEST_FILE): print("Loading data..") - cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] frames = load_audio(filename) + if (type(filename) == np.ndarray) : + temp = filename + + elif (type(filename) == str) : + temp = load_audio(filename) + else : + temp = load_audio(TEST_FILE) + + + cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] frames = temp print("Transcribing..") return whisper_full(self.ctx, self.params, &frames[0], len(frames))