tensorlayer3/tests/layers/test_layers_embedding.py

98 lines
3.3 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import unittest
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorlayer as tl
import numpy as np
from tests.utils import CustomTestCase
class Layer_Embed_Test(CustomTestCase):
@classmethod
def setUpClass(cls):
pass
@classmethod
def tearDownClass(cls):
pass
def test_onehot(self):
input = tl.layers.Input([32], dtype=tl.int32)
onehot = tl.layers.OneHot(depth=8, on_value=1, off_value=0, axis=-1)
print(onehot)
tensor = tl.layers.OneHot(depth=8)(input)
self.assertEqual(tensor.get_shape().as_list(), [32, 8])
def test_embed(self):
input = tl.layers.Input([8, 100], dtype=tl.int32)
embed = tl.layers.Embedding(vocabulary_size=1000, embedding_size=50, name='embed')
print(embed)
tensor = embed(input)
self.assertEqual(tensor.get_shape().as_list(), [8, 100, 50])
def test_avg_embed(self):
batch_size = 8
length = 5
input = tl.layers.Input([batch_size, length], dtype=tl.int32)
avgembed = tl.layers.AverageEmbedding(vocabulary_size=1000, embedding_size=50, name='avg')
print(avgembed)
tensor = avgembed(input)
# print(tensor)
self.assertEqual(tensor.get_shape().as_list(), [batch_size, 50])
def test_word2vec_nce(self):
batch_size = 8
embedding_size = 50
inputs = tl.layers.Input([batch_size], dtype=tl.int32)
labels = tl.layers.Input([batch_size, 1], dtype=tl.int32)
emb_net = tl.layers.Word2vecEmbedding(
vocabulary_size=10000,
embedding_size=embedding_size,
num_sampled=100,
activate_nce_loss=True, # the nce loss is activated
nce_loss_args={},
E_init=tl.initializers.random_uniform(minval=-1.0, maxval=1.0),
nce_W_init=tl.initializers.truncated_normal(stddev=float(1.0 / np.sqrt(embedding_size))),
nce_b_init=tl.initializers.constant(value=0.0),
)
print(emb_net)
embed_tensor = emb_net([inputs, labels], use_nce_loss=False)
embed_tensor, embed_nce_loss = emb_net([inputs, labels], use_nce_loss=True)
embed_tensor, embed_nce_loss = emb_net([inputs, labels])
self.assertEqual(embed_tensor.get_shape().as_list(), [batch_size, embedding_size])
def test_word2vec_no_nce(self):
batch_size = 8
embedding_size = 50
inputs = tl.layers.Input([batch_size], dtype=tl.int32)
emb_net = tl.layers.Word2vecEmbedding(
vocabulary_size=10000,
embedding_size=embedding_size,
num_sampled=100,
activate_nce_loss=False,
nce_loss_args={},
E_init=tl.initializers.random_uniform(minval=-1.0, maxval=1.0),
nce_W_init=tl.initializers.truncated_normal(stddev=float(1.0 / np.sqrt(embedding_size))),
nce_b_init=tl.initializers.constant(value=0.0),
)
print(emb_net)
embed_tensor = emb_net(inputs)
embed_tensor = emb_net(inputs, use_nce_loss=False)
try:
embed_tensor = emb_net(inputs, use_nce_loss=True)
except AttributeError as e:
print(e)
self.assertEqual(embed_tensor.get_shape().as_list(), [batch_size, embedding_size])
if __name__ == '__main__':
unittest.main()