fix unicode in tokenization tests

This commit is contained in:
thomwolf 2019-02-06 00:28:00 +01:00
parent 34bdb7f9cb
commit ba9e4eb354
1 changed files with 6 additions and 5 deletions

View File

@ -12,16 +12,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import, division, print_function, unicode_literals
import os
import unittest
from io import open
from pytorch_pretrained_bert.tokenization import (BertTokenizer, BasicTokenizer, WordpieceTokenizer,
_is_whitespace, _is_control, _is_punctuation)
from pytorch_pretrained_bert.tokenization import (BasicTokenizer,
BertTokenizer,
WordpieceTokenizer,
_is_control, _is_punctuation,
_is_whitespace)
class TokenizationTest(unittest.TestCase):