Fix bug in world tokenizer (#93)

This commit is contained in:
Mathmagician8191 2023-06-11 18:46:54 +12:00 committed by GitHub
parent 82c4ac78f4
commit b88ae59604
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 2 additions and 1 deletions

View File

@ -106,7 +106,8 @@ class TRIE_TOKENIZER():
def get_tokenizer(tokenizer="20B"):
if tokenizer == "world":
print('Loading world tokenizer')
tokenizer = TRIE_TOKENIZER('rwkv_vocab_v20230424.txt')
tokenizer_path = pathlib.Path(os.path.abspath(__file__)).parent / 'rwkv_vocab_v20230424.txt'
tokenizer = TRIE_TOKENIZER(tokenizer_path)
tokenizer_encode = lambda prompt: tokenizer.encode(prompt)
elif tokenizer == "20B":
print('Loading 20B tokenizer')