#include "ggml/ggml.h" #include "common.h" #include #include #include #include #include #include #include #include // default hparams struct mnist_hparams { int32_t n_input = 784; int32_t n_hidden = 500; int32_t n_classes = 10; }; struct mnist_model { mnist_hparams hparams; struct ggml_tensor * fc1_weight; struct ggml_tensor * fc1_bias; struct ggml_tensor * fc2_weight; struct ggml_tensor * fc2_bias; struct ggml_context * ctx; }; // load the model's weights from a file bool mnist_model_load(const std::string & fname, mnist_model & model) { printf("%s: loading model from '%s'\n", __func__, fname.c_str()); auto fin = std::ifstream(fname, std::ios::binary); if (!fin) { fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); return false; } // verify magic { uint32_t magic; fin.read((char *) &magic, sizeof(magic)); if (magic != 0x67676d6c) { fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); return false; } } auto & ctx = model.ctx; size_t ctx_size = 0; { const auto & hparams = model.hparams; const int n_input = hparams.n_input; const int n_hidden = hparams.n_hidden; const int n_classes = hparams.n_classes; ctx_size += n_input * n_hidden * ggml_type_sizef(GGML_TYPE_F32); // fc1 weight ctx_size += n_hidden * ggml_type_sizef(GGML_TYPE_F32); // fc1 bias ctx_size += n_hidden * n_classes * ggml_type_sizef(GGML_TYPE_F32); // fc2 weight ctx_size += n_classes * ggml_type_sizef(GGML_TYPE_F32); // fc2 bias printf("%s: ggml ctx size = %6.2f MB\n", __func__, ctx_size/(1024.0*1024.0)); } // create the ggml context { struct ggml_init_params params = { .mem_size = ctx_size + 1024*1024, .mem_buffer = NULL, .no_alloc = false, }; model.ctx = ggml_init(params); if (!model.ctx) { fprintf(stderr, "%s: ggml_init() failed\n", __func__); return false; } } // Read FC1 layer 1 { // Read dimensions int32_t n_dims; fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); { int32_t ne_weight[2] = { 1, 1 }; for (int i = 0; i < n_dims; ++i) { fin.read(reinterpret_cast(&ne_weight[i]), sizeof(ne_weight[i])); } // FC1 dimensions taken from file, eg. 768x500 model.hparams.n_input = ne_weight[0]; model.hparams.n_hidden = ne_weight[1]; model.fc1_weight = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, model.hparams.n_input, model.hparams.n_hidden); fin.read(reinterpret_cast(model.fc1_weight->data), ggml_nbytes(model.fc1_weight)); ggml_set_name(model.fc1_weight, "fc1_weight"); } { int32_t ne_bias[2] = { 1, 1 }; for (int i = 0; i < n_dims; ++i) { fin.read(reinterpret_cast(&ne_bias[i]), sizeof(ne_bias[i])); } model.fc1_bias = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, model.hparams.n_hidden); fin.read(reinterpret_cast(model.fc1_bias->data), ggml_nbytes(model.fc1_bias)); ggml_set_name(model.fc1_bias, "fc1_bias"); } } // Read FC2 layer 2 { // Read dimensions int32_t n_dims; fin.read(reinterpret_cast(&n_dims), sizeof(n_dims)); { int32_t ne_weight[2] = { 1, 1 }; for (int i = 0; i < n_dims; ++i) { fin.read(reinterpret_cast(&ne_weight[i]), sizeof(ne_weight[i])); } // FC1 dimensions taken from file, eg. 10x500 model.hparams.n_classes = ne_weight[1]; model.fc2_weight = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, model.hparams.n_hidden, model.hparams.n_classes); fin.read(reinterpret_cast(model.fc2_weight->data), ggml_nbytes(model.fc2_weight)); ggml_set_name(model.fc2_weight, "fc2_weight"); } { int32_t ne_bias[2] = { 1, 1 }; for (int i = 0; i < n_dims; ++i) { fin.read(reinterpret_cast(&ne_bias[i]), sizeof(ne_bias[i])); } model.fc2_bias = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, model.hparams.n_classes); fin.read(reinterpret_cast(model.fc2_bias->data), ggml_nbytes(model.fc2_bias)); ggml_set_name(model.fc2_bias, "fc2_bias"); } } fin.close(); return true; } // evaluate the model // // - model: the model // - n_threads: number of threads to use // - digit: 784 pixel values // // returns 0 - 9 prediction int mnist_eval( const mnist_model & model, const int n_threads, std::vector digit ) { const auto & hparams = model.hparams; static size_t buf_size = hparams.n_input * sizeof(float) * 4; static void * buf = malloc(buf_size); struct ggml_init_params params = { .mem_size = buf_size, .mem_buffer = buf, }; struct ggml_context * ctx0 = ggml_init(params); struct ggml_cgraph gf = {}; gf.n_threads = n_threads; struct ggml_tensor * input = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hparams.n_input); memcpy(input->data, digit.data(), ggml_nbytes(input)); ggml_set_name(input, "input"); // fc1 MLP = Ax + b ggml_tensor * fc1 = ggml_add(ctx0, ggml_mul_mat(ctx0, model.fc1_weight, input), model.fc1_bias); ggml_tensor * fc2 = ggml_add(ctx0, ggml_mul_mat(ctx0, model.fc2_weight, ggml_relu(ctx0, fc1)), model.fc2_bias); // soft max ggml_tensor * probs = ggml_soft_max(ctx0, fc2); // run the computation ggml_build_forward_expand(&gf, probs); ggml_graph_compute (ctx0, &gf); //ggml_graph_print (&gf); ggml_graph_dump_dot(&gf, NULL, "mnist.dot"); const float * probs_data = ggml_get_data_f32(probs); const int prediction = std::max_element(probs_data, probs_data + 10) - probs_data; ggml_free(ctx0); return prediction; } #ifdef __cplusplus extern "C" { #endif int wasm_eval(uint8_t *digitPtr) { mnist_model model; if (!mnist_model_load("models/mnist/ggml-model-f32.bin", model)) { fprintf(stderr, "error loading model\n"); return -1; } std::vector digit(digitPtr, digitPtr + 784); int result = mnist_eval(model, 1, digit); ggml_free(model.ctx); return result; } int wasm_random_digit(char *digitPtr) { auto fin = std::ifstream("models/mnist/t10k-images.idx3-ubyte", std::ios::binary); if (!fin) { fprintf(stderr, "failed to open digits file\n"); return 0; } srand(time(NULL)); // Seek to a random digit: 16-byte header + 28*28 * (random 0 - 10000) fin.seekg(16 + 784 * (rand() % 10000)); fin.read(digitPtr, 784); return 1; } #ifdef __cplusplus } #endif int main(int argc, char ** argv) { srand(time(NULL)); ggml_time_init(); if (argc != 3) { fprintf(stderr, "Usage: %s models/mnist/ggml-model-f32.bin models/mnist/t10k-images.idx3-ubyte\n", argv[0]); exit(0); } uint8_t buf[784]; mnist_model model; std::vector digit; // load the model { const int64_t t_start_us = ggml_time_us(); if (!mnist_model_load(argv[1], model)) { fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, "models/ggml-model-f32.bin"); return 1; } const int64_t t_load_us = ggml_time_us() - t_start_us; fprintf(stdout, "%s: loaded model in %8.2f ms\n", __func__, t_load_us / 1000.0f); } // read a random digit from the test set { std::ifstream fin(argv[2], std::ios::binary); if (!fin) { fprintf(stderr, "%s: failed to open '%s'\n", __func__, argv[2]); return 1; } // seek to a random digit: 16-byte header + 28*28 * (random 0 - 10000) fin.seekg(16 + 784 * (rand() % 10000)); fin.read((char *) &buf, sizeof(buf)); } // render the digit in ASCII { digit.resize(sizeof(buf)); for (int row = 0; row < 28; row++) { for (int col = 0; col < 28; col++) { fprintf(stderr, "%c ", (float)buf[row*28 + col] > 230 ? '*' : '_'); digit[row*28 + col] = ((float)buf[row*28 + col]); } fprintf(stderr, "\n"); } fprintf(stderr, "\n"); } fprintf(stdout, "%s: predicted digit is %d\n", __func__, mnist_eval(model, 1, digit)); ggml_free(model.ctx); return 0; }