Extract supported marian models to extra folder
This commit is contained in:
parent
585097059c
commit
b6483cc7f5
|
@ -1,6 +1,48 @@
|
|||
import json
|
||||
from transformers.utils import cached_file
|
||||
|
||||
# NOTE: In total, there are 1440 models available on the HuggingFace hub (https://huggingface.co/Helsinki-NLP/).
|
||||
# We have converted some of these (listed below). If you don't see your model here, feel free to convert it yourself
|
||||
# and make a pull request to this repo.
|
||||
|
||||
SUPPORTED_HELSINKI_NLP_MODELS = [
|
||||
'en-es', 'es-en', # English <-> Spanish
|
||||
'en-fr', 'fr-en', # English <-> French
|
||||
'en-hi', 'hi-en', # English <-> Hindi
|
||||
'en-de', 'de-en', # English <-> German
|
||||
'en-ru', 'ru-en', # English <-> Russian
|
||||
'en-it', 'it-en', # English <-> Italian
|
||||
'en-ar', 'ar-en', # English <-> Arabic
|
||||
'en-zh', 'zh-en', # English <-> Chinese
|
||||
'en-sv', 'sv-en', # English <-> Swedish
|
||||
'en-mul', 'mul-en', # English <-> Multilingual
|
||||
'en-nl', 'nl-en', # English <-> Dutch
|
||||
'en-fi', 'fi-en', # English <-> Finnish
|
||||
'en-jap', 'jap-en', # English <-> Japanese
|
||||
'en-cs', 'cs-en', # English <-> Czech
|
||||
'en-vi', 'vi-en', # English <-> Vietnamese
|
||||
'en-xh', 'xh-en', # English <-> Xhosa
|
||||
'en-hu', 'hu-en', # English <-> Hungarian
|
||||
'en-da', 'da-en', # English <-> Danish
|
||||
'en-id', 'id-en', # English <-> Indonesia
|
||||
'en-uk', 'uk-en', # English <-> Ukranian
|
||||
'en-af', 'af-en', # English <-> Afrikaans
|
||||
'de-es', 'es-de', # German <-> Spanish
|
||||
'fr-es', 'es-fr', # French <-> Spanish
|
||||
'fr-de', 'de-fr', # French <-> German
|
||||
'es-it', 'it-es', # Spanish <-> Italian
|
||||
|
||||
'en-ro', # English --> Romanian
|
||||
'pl-en', # Poland --> English
|
||||
'tr-en', # Turkey --> English
|
||||
'ko-en', # Korean --> English
|
||||
|
||||
'es-ru', 'ru-es', # Spanish <-> Russian
|
||||
'fr-ru', 'ru-fr', # French <-> Russian
|
||||
'fr-ro', 'ro-fr', # French <-> Romanian
|
||||
'uk-ru', 'ru-uk', # Ukranian <-> Russian
|
||||
]
|
||||
|
||||
|
||||
def generate_tokenizer_json(model_path, tokenizer):
|
||||
# Marian models use two separate tokenizers for source and target languages.
|
||||
|
|
100
scripts/tasks.py
100
scripts/tasks.py
|
@ -1,3 +1,4 @@
|
|||
from .extra.marian import SUPPORTED_HELSINKI_NLP_MODELS
|
||||
|
||||
SUPPORTED_TASKS = {
|
||||
# map tasks to automodels
|
||||
|
@ -166,104 +167,11 @@ SUPPORTED_MODELS = {
|
|||
]
|
||||
},
|
||||
'marian': {
|
||||
'Helsinki-NLP/opus-mt-en-es': [
|
||||
f'Helsinki-NLP/opus-mt-{x}': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-es-en': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-en-fr': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-fr-en': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-en-hi': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-hi-en': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-en-de': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-de-en': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-en-ru': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-ru-en': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-en-it': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-it-en': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-en-ar': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-ar-en': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-en-zh': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-zh-en': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-en-sv': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-sv-en': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-en-mul': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-mul-en': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-en-nl': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-nl-en': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-en-fi': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
'Helsinki-NLP/opus-mt-fi-en': [
|
||||
'default',
|
||||
'seq2seq-lm-with-past',
|
||||
],
|
||||
|
||||
# TODO add more models, or dynamically generate this list
|
||||
]
|
||||
for x in SUPPORTED_HELSINKI_NLP_MODELS
|
||||
},
|
||||
'mobilebert': {
|
||||
'google/mobilebert-uncased': [
|
||||
|
|
Loading…
Reference in New Issue