98 lines
3.3 KiB
Python
98 lines
3.3 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import os
|
|
import unittest
|
|
|
|
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
|
|
|
|
import tensorlayer as tl
|
|
import numpy as np
|
|
|
|
from tests.utils import CustomTestCase
|
|
|
|
|
|
class Layer_Embed_Test(CustomTestCase):
|
|
|
|
@classmethod
|
|
def setUpClass(cls):
|
|
pass
|
|
|
|
@classmethod
|
|
def tearDownClass(cls):
|
|
pass
|
|
|
|
def test_onehot(self):
|
|
input = tl.layers.Input([32], dtype=tl.int32)
|
|
onehot = tl.layers.OneHot(depth=8, on_value=1, off_value=0, axis=-1)
|
|
print(onehot)
|
|
tensor = tl.layers.OneHot(depth=8)(input)
|
|
self.assertEqual(tensor.get_shape().as_list(), [32, 8])
|
|
|
|
def test_embed(self):
|
|
input = tl.layers.Input([8, 100], dtype=tl.int32)
|
|
embed = tl.layers.Embedding(vocabulary_size=1000, embedding_size=50, name='embed')
|
|
print(embed)
|
|
tensor = embed(input)
|
|
self.assertEqual(tensor.get_shape().as_list(), [8, 100, 50])
|
|
|
|
def test_avg_embed(self):
|
|
batch_size = 8
|
|
length = 5
|
|
input = tl.layers.Input([batch_size, length], dtype=tl.int32)
|
|
avgembed = tl.layers.AverageEmbedding(vocabulary_size=1000, embedding_size=50, name='avg')
|
|
print(avgembed)
|
|
tensor = avgembed(input)
|
|
# print(tensor)
|
|
self.assertEqual(tensor.get_shape().as_list(), [batch_size, 50])
|
|
|
|
def test_word2vec_nce(self):
|
|
batch_size = 8
|
|
embedding_size = 50
|
|
inputs = tl.layers.Input([batch_size], dtype=tl.int32)
|
|
labels = tl.layers.Input([batch_size, 1], dtype=tl.int32)
|
|
emb_net = tl.layers.Word2vecEmbedding(
|
|
vocabulary_size=10000,
|
|
embedding_size=embedding_size,
|
|
num_sampled=100,
|
|
activate_nce_loss=True, # the nce loss is activated
|
|
nce_loss_args={},
|
|
E_init=tl.initializers.random_uniform(minval=-1.0, maxval=1.0),
|
|
nce_W_init=tl.initializers.truncated_normal(stddev=float(1.0 / np.sqrt(embedding_size))),
|
|
nce_b_init=tl.initializers.constant(value=0.0),
|
|
)
|
|
print(emb_net)
|
|
|
|
embed_tensor = emb_net([inputs, labels], use_nce_loss=False)
|
|
embed_tensor, embed_nce_loss = emb_net([inputs, labels], use_nce_loss=True)
|
|
embed_tensor, embed_nce_loss = emb_net([inputs, labels])
|
|
self.assertEqual(embed_tensor.get_shape().as_list(), [batch_size, embedding_size])
|
|
|
|
def test_word2vec_no_nce(self):
|
|
batch_size = 8
|
|
embedding_size = 50
|
|
inputs = tl.layers.Input([batch_size], dtype=tl.int32)
|
|
emb_net = tl.layers.Word2vecEmbedding(
|
|
vocabulary_size=10000,
|
|
embedding_size=embedding_size,
|
|
num_sampled=100,
|
|
activate_nce_loss=False,
|
|
nce_loss_args={},
|
|
E_init=tl.initializers.random_uniform(minval=-1.0, maxval=1.0),
|
|
nce_W_init=tl.initializers.truncated_normal(stddev=float(1.0 / np.sqrt(embedding_size))),
|
|
nce_b_init=tl.initializers.constant(value=0.0),
|
|
)
|
|
print(emb_net)
|
|
embed_tensor = emb_net(inputs)
|
|
embed_tensor = emb_net(inputs, use_nce_loss=False)
|
|
try:
|
|
embed_tensor = emb_net(inputs, use_nce_loss=True)
|
|
except AttributeError as e:
|
|
print(e)
|
|
self.assertEqual(embed_tensor.get_shape().as_list(), [batch_size, embedding_size])
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
unittest.main()
|