34 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			34 lines
		
	
	
		
			1.2 KiB
		
	
	
	
		
			Python
		
	
	
	
| # Quantizes rwkv.cpp model file from FP32 or FP16 to Q4_0, Q4_1, Q4_1_O, Q4_2, Q4_3.
 | |
| # Usage: python quantize.py bin\Release\rwkv.dll C:\rwkv.cpp-169M-float32.bin C:\rwkv.cpp-169M-q4_1_o.bin 4
 | |
| 
 | |
| import argparse
 | |
| import rwkv_cpp_shared_library
 | |
| 
 | |
| def parse_args():
 | |
|     parser = argparse.ArgumentParser(description='Quantize rwkv.cpp model file from FP32 or FP16 to Q4_0 or Q4_1')
 | |
|     parser.add_argument('src_path', help='Path to FP32/FP16 checkpoint file')
 | |
|     parser.add_argument('dest_path', help='Path to resulting checkpoint file, will be overwritten')
 | |
|     parser.add_argument('data_type', help='Data type, '
 | |
|                                           '2 (GGML_TYPE_Q4_0), '
 | |
|                                           '3 (GGML_TYPE_Q4_1), '
 | |
|                                           '4 (GGML_TYPE_Q4_1_O), '
 | |
|                                           '5 (Q4_2), '
 | |
|                                           '6 (Q4_3)', type=int, choices=[2, 3, 4, 5, 6], default=4)
 | |
|     return parser.parse_args()
 | |
| 
 | |
| def main() -> None:
 | |
|     args = parse_args()
 | |
| 
 | |
|     library = rwkv_cpp_shared_library.load_rwkv_shared_library()
 | |
| 
 | |
|     library.rwkv_quantize_model_file(
 | |
|         args.src_path,
 | |
|         args.dest_path,
 | |
|         args.data_type
 | |
|     )
 | |
| 
 | |
|     print('Done')
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 |