[model_cards] add mine

This commit is contained in:
Julien Chaumond 2020-01-31 17:10:04 -05:00
parent d426b58b9e
commit d6fc34b459
2 changed files with 77 additions and 0 deletions

View File

@ -0,0 +1,25 @@
## How to build a dummy model
```python
from transformers.configuration_bert import BertConfig
from transformers.modeling_bert import BertForMaskedLM
from transformers.modeling_tf_bert import TFBertForMaskedLM
from transformers.tokenization_bert import BertTokenizer
SMALL_MODEL_IDENTIFIER = "julien-c/bert-xsmall-dummy"
DIRNAME = "./bert-xsmall-dummy"
config = BertConfig(10, 20, 1, 1, 40)
model = BertForMaskedLM(config)
model.save_pretrained(DIRNAME)
tf_model = TFBertForMaskedLM.from_pretrained(DIRNAME, from_pt=True)
tf_model.save_pretrained(DIRNAME)
# Slightly different for tokenizer.
# tokenizer = BertTokenizer.from_pretrained(DIRNAME)
# tokenizer.save_pretrained()
```

View File

@ -0,0 +1,52 @@
```python
import json
import os
from transformers.configuration_roberta import RobertaConfig
from transformers import RobertaForMaskedLM, TFRobertaForMaskedLM
DIRNAME = "./dummy-unknown"
config = RobertaConfig(10, 20, 1, 1, 40)
model = RobertaForMaskedLM(config)
model.save_pretrained(DIRNAME)
tf_model = TFRobertaForMaskedLM.from_pretrained(DIRNAME, from_pt=True)
tf_model.save_pretrained(DIRNAME)
# Tokenizer:
vocab = [
"l",
"o",
"w",
"e",
"r",
"s",
"t",
"i",
"d",
"n",
"\u0120",
"\u0120l",
"\u0120n",
"\u0120lo",
"\u0120low",
"er",
"\u0120lowest",
"\u0120newer",
"\u0120wider",
"<unk>",
]
vocab_tokens = dict(zip(vocab, range(len(vocab))))
merges = ["#version: 0.2", "\u0120 l", "\u0120l o", "\u0120lo w", "e r", ""]
vocab_file = os.path.join(DIRNAME, "vocab.json")
merges_file = os.path.join(DIRNAME, "merges.txt")
with open(vocab_file, "w", encoding="utf-8") as fp:
fp.write(json.dumps(vocab_tokens) + "\n")
with open(merges_file, "w", encoding="utf-8") as fp:
fp.write("\n".join(merges))
```