Update README.md (#8815)
The tokenizer called at the input_ids of example 2 is currently encoding text_1. I think this should be changed to text_2.
This commit is contained in:
parent
f8eda599bd
commit
e3ef62bce1
|
@ -91,7 +91,7 @@ print(tokenizer_greek.convert_ids_to_tokens(outputs[0, 5].max(0)[1].item()))
|
|||
# ================ EXAMPLE 2 ================
|
||||
text_2 = 'Είναι ένας [MASK] άνθρωπος.'
|
||||
# EN: 'He is a [MASK] person.'
|
||||
input_ids = tokenizer_greek.encode(text_1)
|
||||
input_ids = tokenizer_greek.encode(text_2)
|
||||
print(tokenizer_greek.convert_ids_to_tokens(input_ids))
|
||||
# ['[CLS]', 'ειναι', 'ενας', '[MASK]', 'ανθρωπος', '.', '[SEP]']
|
||||
outputs = lm_model_greek(torch.tensor([input_ids]))[0]
|
||||
|
|
Loading…
Reference in New Issue