Add quantization test back, run ggml tests on first context init

This commit is contained in:
saharNooby 2023-04-02 13:05:17 +04:00
parent 1ecbad3a65
commit ee46ad208e
1 changed files with 55 additions and 8 deletions

63
ggml.c
View File

@ -2793,6 +2793,8 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
static bool is_first_call = true; static bool is_first_call = true;
bool run_test_suite = false;
if (is_first_call) { if (is_first_call) {
// initialize time system (required on Windows) // initialize time system (required on Windows)
ggml_time_init(); ggml_time_init();
@ -2833,6 +2835,8 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f); GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f);
} }
run_test_suite = true;
is_first_call = false; is_first_call = false;
} }
@ -2877,6 +2881,10 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
ggml_critical_section_end(); ggml_critical_section_end();
if (run_test_suite) {
ggml_run_test_suite();
}
return ctx; return ctx;
} }
@ -10813,18 +10821,59 @@ int ggml_cpu_has_vsx(void) {
#define GGML_TEST_SET_ELEMENT_F32(tensor, i, value) *(float *) ((char *) tensor->data + 4 * i) = value #define GGML_TEST_SET_ELEMENT_F32(tensor, i, value) *(float *) ((char *) tensor->data + 4 * i) = value
#define GGML_TEST_ASSERT_ELEMENT_F32(tensor, i, expected_value) do {\ #define GGML_TEST_ASSERT(x, ...) do {\
float actual = *(float *) ((char *) tensor->data + 4 * i);\ if (!(x)) {\
if (fabs(actual - expected_value) >= 0.0001F) {\
fprintf(stderr, "*** Assertion failed ***\n");\ fprintf(stderr, "*** Assertion failed ***\n");\
fprintf(stderr, "At %s[%d]: expected %f, actual %f\n", #tensor, i, expected_value, actual);\ fprintf(stderr, __VA_ARGS__);\
fprintf(stderr, "%s:%d\n", __FILE__, __LINE__);\ fprintf(stderr, "\n%s:%d\n", __FILE__, __LINE__);\
abort();\ abort();\
}\ }\
} while (0) } while (0)
#define GGML_TEST_ASSERT_ELEMENT_F32(tensor, i, expected_value) do {\
float actual = *(float *) ((char *) tensor->data + 4 * i);\
GGML_TEST_ASSERT(fabs(actual - expected_value) <= 0.0001F, "At %s[%d]: expected %f, actual %f", #tensor, i, expected_value, actual);\
} while (0)
// Copied from https://github.com/ggerganov/llama.cpp/blob/6e7801d08d81c931a5427bae46f00763e993f54a/tests/test-quantize.c
void ggml_test_quantization() {
#define QK 32
float src[QK];
uint8_t dst[24];
int64_t hist[16];
for (int i = 0; i < QK; i++) {
src[i] = (float) (i + 1);
}
size_t size = ggml_quantize_q4_0(src, dst, QK, QK, hist);
GGML_TEST_ASSERT(size == 20, "%d", size);
float max_result = ((float *) dst)[0];
float max_expected = src[31] / ((1 << 3) - 1);
GGML_TEST_ASSERT(max_result == max_expected, "%f, %f", max_result, max_expected);
for (int i = 0; i < QK; i++) {
uint8_t q4_result = (i % 2) ? (dst[sizeof(float) + i / 2] >> 4) : (dst[sizeof(float) + i / 2] & 0xF);
uint8_t q4_expected = roundf(src[i] / max_expected) + 8;
GGML_TEST_ASSERT(q4_result == q4_expected, "%d, %d", q4_result, q4_expected);
}
size = ggml_quantize_q4_1(src, dst, QK, QK, hist);
GGML_TEST_ASSERT(size == 24, "%d", size);
float delta_result = ((float *) dst)[0];
float delta_expected = (src[31] - src[0]) / ((1 << 4) - 1);
GGML_TEST_ASSERT(delta_result == delta_expected, "%f, %f", delta_result, delta_expected);
float min_result = ((float *) dst)[1];
float min_expected = src[0];
GGML_TEST_ASSERT(min_result == min_expected, "%f, %f", min_result, min_expected);
for (int i = 0; i < QK; i++) {
uint8_t q4_result = (i % 2) ? (dst[sizeof(float) * 2 + i / 2] >> 4) : (dst[sizeof(float) * 2 + i / 2] & 0xF);
uint8_t q4_expected = roundf((src[i] - min_expected) / delta_expected);
GGML_TEST_ASSERT(q4_result == q4_expected, "%d, %d", q4_result, q4_expected);
}
}
void ggml_run_test_suite() { void ggml_run_test_suite() {
fprintf(stderr, "Running ggml test suite...\n"); ggml_test_quantization();
struct ggml_init_params params; struct ggml_init_params params;
params.mem_size = 16 * 1024; params.mem_size = 16 * 1024;
@ -10904,6 +10953,4 @@ void ggml_run_test_suite() {
GGML_TEST_ASSERT_ELEMENT_F32(max_a_b, 5, -0.0446F); GGML_TEST_ASSERT_ELEMENT_F32(max_a_b, 5, -0.0446F);
ggml_free(ctx); ggml_free(ctx);
fprintf(stderr, "All ggml tests pass\n");
} }