Fix bug in world tokenizer (#93)

2023-06-11 18:46:54 +12:00 · 2023-06-11 18:46:54 +12:00 · b88ae59604
parent 82c4ac78f4
commit b88ae59604
1 changed files with 2 additions and 1 deletions
--- a/rwkv/rwkv_tokenizer.py
+++ b/rwkv/rwkv_tokenizer.py
@ -106,7 +106,8 @@ class TRIE_TOKENIZER():
 def get_tokenizer(tokenizer="20B"):
    if tokenizer == "world":
        print('Loading world tokenizer')
-        tokenizer = TRIE_TOKENIZER('rwkv_vocab_v20230424.txt')
+        tokenizer_path = pathlib.Path(os.path.abspath(__file__)).parent / 'rwkv_vocab_v20230424.txt'
+        tokenizer = TRIE_TOKENIZER(tokenizer_path)
        tokenizer_encode = lambda prompt: tokenizer.encode(prompt)
    elif tokenizer == "20B":
        print('Loading 20B tokenizer')