fix perplexity after c-api refactor (#390)
* preallocate a buffer of fitting size for tokenization (utils.cpp) * don't create a new std::string (especially here, where it's usually large)
This commit is contained in:
		
							parent
							
								
									40ea807a97
								
							
						
					
					
						commit
						56e659a0b2
					
				
							
								
								
									
										2
									
								
								main.cpp
								
								
								
								
							
							
						
						
									
										2
									
								
								main.cpp
								
								
								
								
							|  | @ -85,7 +85,7 @@ void perplexity(llama_context * ctx, const gpt_params & params) { | |||
|     // Download: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research
 | ||||
|     // Run `./main --perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw`
 | ||||
|     // Output: `perplexity: 13.5106 [114/114]`
 | ||||
|     auto tokens = ::llama_tokenize(ctx, params.prompt.c_str(), true); | ||||
|     auto tokens = ::llama_tokenize(ctx, params.prompt, true); | ||||
| 
 | ||||
|     int count = 0; | ||||
|     double nll = 0.0; | ||||
|  |  | |||
|  | @ -146,8 +146,10 @@ std::string gpt_random_prompt(std::mt19937 & rng) { | |||
| 
 | ||||
| // TODO: not great allocating this every time
 | ||||
| std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) { | ||||
|     std::vector<llama_token> res(8096); | ||||
|     // initialize to prompt numer of chars, since n_tokens <= n_prompt_chars
 | ||||
|     std::vector<llama_token> res(text.size() + (int)add_bos); | ||||
|     int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos); | ||||
|     assert(n >= 0); | ||||
|     res.resize(n); | ||||
| 
 | ||||
|     return res; | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue