18 lines
676 B
Python
18 lines
676 B
Python
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
|
|
|
models = {
|
|
"en_ru": "Helsinki-NLP/opus-mt-en-ru",
|
|
"ru_en": "Helsinki-NLP/opus-mt-ru-en",
|
|
# Add more models as needed
|
|
}
|
|
|
|
tokenizers = {lang: AutoTokenizer.from_pretrained(model) for lang, model in models.items()}
|
|
translation_models = {lang: AutoModelForSeq2SeqLM.from_pretrained(model) for lang, model in models.items()}
|
|
|
|
def translate(text, lang):
|
|
tokenizer = tokenizers[lang]
|
|
model = translation_models[lang]
|
|
inputs = tokenizer(text, return_tensors="pt")
|
|
outputs = model.generate(**inputs, max_new_tokens = 200)
|
|
return tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|