Update README.md (#8815)

The tokenizer called at the input_ids of example 2 is currently encoding text_1. I think this should be changed to text_2.
This commit is contained in:
mdermentzi 2020-11-27 14:34:57 +01:00 committed by GitHub
parent f8eda599bd
commit e3ef62bce1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 1 additions and 1 deletions

View File

@ -91,7 +91,7 @@ print(tokenizer_greek.convert_ids_to_tokens(outputs[0, 5].max(0)[1].item()))
# ================ EXAMPLE 2 ================
text_2 = 'Είναι ένας [MASK] άνθρωπος.'
# EN: 'He is a [MASK] person.'
input_ids = tokenizer_greek.encode(text_1)
input_ids = tokenizer_greek.encode(text_2)
print(tokenizer_greek.convert_ids_to_tokens(input_ids))
# ['[CLS]', 'ειναι', 'ενας', '[MASK]', 'ανθρωπος', '.', '[SEP]']
outputs = lm_model_greek(torch.tensor([input_ids]))[0]