transformers/notebooks/Comparing-TF-and-PT-models-...

4816 lines
169 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Comparing TensorFlow (original) and PyTorch models\n",
"\n",
"You can use this small notebook to check the conversion of the model's weights from the TensorFlow model to the PyTorch model. In the following, we compare the weights of the last layer on a simple example (in `input.txt`) but both models returns all the hidden layers so you can check every stage of the model.\n",
"\n",
"To run this notebook, follow these instructions:\n",
"- make sure that your Python environment has both TensorFlow and PyTorch installed,\n",
"- download the original TensorFlow implementation,\n",
"- download a pre-trained TensorFlow model as indicaded in the TensorFlow implementation readme,\n",
"- run the script `convert_tf_checkpoint_to_pytorch.py` as indicated in the `README` to convert the pre-trained TensorFlow model to PyTorch.\n",
"\n",
"If needed change the relative paths indicated in this notebook (at the beggining of Sections 1 and 2) to point to the relevent models and code."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:02:26.999106Z",
"start_time": "2018-11-16T10:02:26.985709Z"
}
},
"outputs": [],
"source": [
"import os\n",
"os.chdir('../')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1/ TensorFlow code"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:02:27.664528Z",
"start_time": "2018-11-16T10:02:27.651019Z"
}
},
"outputs": [],
"source": [
"original_tf_inplem_dir = \"./tensorflow_code/\"\n",
"model_dir = \"../google_models/uncased_L-12_H-768_A-12/\"\n",
"\n",
"vocab_file = model_dir + \"vocab.txt\"\n",
"bert_config_file = model_dir + \"bert_config.json\"\n",
"init_checkpoint = model_dir + \"bert_model.ckpt\"\n",
"\n",
"input_file = \"./samples/input.txt\"\n",
"max_seq_length = 128\n",
"max_predictions_per_seq = 20\n",
"\n",
"masked_lm_positions = [6]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:02:30.202182Z",
"start_time": "2018-11-16T10:02:28.112570Z"
}
},
"outputs": [],
"source": [
"import importlib.util\n",
"import sys\n",
"import tensorflow as tf\n",
"import pytorch_transformers as ppb\n",
"\n",
"def del_all_flags(FLAGS):\n",
" flags_dict = FLAGS._flags() \n",
" keys_list = [keys for keys in flags_dict] \n",
" for keys in keys_list:\n",
" FLAGS.__delattr__(keys)\n",
"\n",
"del_all_flags(tf.flags.FLAGS)\n",
"import tensorflow_code.extract_features as ef\n",
"del_all_flags(tf.flags.FLAGS)\n",
"import tensorflow_code.modeling as tfm\n",
"del_all_flags(tf.flags.FLAGS)\n",
"import tensorflow_code.tokenization as tft\n",
"del_all_flags(tf.flags.FLAGS)\n",
"import tensorflow_code.run_pretraining as rp\n",
"del_all_flags(tf.flags.FLAGS)\n",
"import tensorflow_code.create_pretraining_data as cpp"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:02:30.238027Z",
"start_time": "2018-11-16T10:02:30.204943Z"
},
"code_folding": [
15
]
},
"outputs": [],
"source": [
"import re\n",
"class InputExample(object):\n",
" \"\"\"A single instance example.\"\"\"\n",
"\n",
" def __init__(self, tokens, segment_ids, masked_lm_positions,\n",
" masked_lm_labels, is_random_next):\n",
" self.tokens = tokens\n",
" self.segment_ids = segment_ids\n",
" self.masked_lm_positions = masked_lm_positions\n",
" self.masked_lm_labels = masked_lm_labels\n",
" self.is_random_next = is_random_next\n",
" def __repr__(self):\n",
" return '\\n'.join(k + \":\" + str(v) for k, v in self.__dict__.items())\n",
"\n",
"\n",
"def read_examples(input_file, tokenizer, masked_lm_positions):\n",
" \"\"\"Read a list of `InputExample`s from an input file.\"\"\"\n",
" examples = []\n",
" unique_id = 0\n",
" with tf.gfile.GFile(input_file, \"r\") as reader:\n",
" while True:\n",
" line = reader.readline()\n",
" if not line:\n",
" break\n",
" line = line.strip()\n",
" text_a = None\n",
" text_b = None\n",
" m = re.match(r\"^(.*) \\|\\|\\| (.*)$\", line)\n",
" if m is None:\n",
" text_a = line\n",
" else:\n",
" text_a = m.group(1)\n",
" text_b = m.group(2)\n",
" tokens_a = tokenizer.tokenize(text_a)\n",
" tokens_b = None\n",
" if text_b:\n",
" tokens_b = tokenizer.tokenize(text_b)\n",
" tokens = tokens_a + tokens_b\n",
" masked_lm_labels = []\n",
" for m_pos in masked_lm_positions:\n",
" masked_lm_labels.append(tokens[m_pos])\n",
" tokens[m_pos] = '[MASK]'\n",
" examples.append(\n",
" InputExample(\n",
" tokens = tokens,\n",
" segment_ids = [0] * len(tokens_a) + [1] * len(tokens_b),\n",
" masked_lm_positions = masked_lm_positions,\n",
" masked_lm_labels = masked_lm_labels,\n",
" is_random_next = False))\n",
" unique_id += 1\n",
" return examples"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:02:30.304018Z",
"start_time": "2018-11-16T10:02:30.240189Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tokens:['who', 'was', 'jim', 'henson', '?', 'jim', '[MASK]', 'was', 'a', 'puppet', '##eer']\n",
"segment_ids:[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]\n",
"masked_lm_positions:[6]\n",
"masked_lm_labels:['henson']\n",
"is_random_next:False\n"
]
}
],
"source": [
"bert_config = tfm.BertConfig.from_json_file(bert_config_file)\n",
"tokenizer = ppb.BertTokenizer(\n",
" vocab_file=vocab_file, do_lower_case=True)\n",
"examples = read_examples(input_file, tokenizer, masked_lm_positions=masked_lm_positions)\n",
"\n",
"print(examples[0])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:02:33.324167Z",
"start_time": "2018-11-16T10:02:33.291909Z"
},
"code_folding": [
16
]
},
"outputs": [],
"source": [
"class InputFeatures(object):\n",
" \"\"\"A single set of features of data.\"\"\"\n",
"\n",
" def __init__(self, input_ids, input_mask, segment_ids, masked_lm_positions,\n",
" masked_lm_ids, masked_lm_weights, next_sentence_label):\n",
" self.input_ids = input_ids\n",
" self.input_mask = input_mask\n",
" self.segment_ids = segment_ids\n",
" self.masked_lm_positions = masked_lm_positions\n",
" self.masked_lm_ids = masked_lm_ids\n",
" self.masked_lm_weights = masked_lm_weights\n",
" self.next_sentence_labels = next_sentence_label\n",
"\n",
" def __repr__(self):\n",
" return '\\n'.join(k + \":\" + str(v) for k, v in self.__dict__.items())\n",
"\n",
"def pretraining_convert_examples_to_features(instances, tokenizer, max_seq_length,\n",
" max_predictions_per_seq):\n",
" \"\"\"Create TF example files from `TrainingInstance`s.\"\"\"\n",
" features = []\n",
" for (inst_index, instance) in enumerate(instances):\n",
" input_ids = tokenizer.convert_tokens_to_ids(instance.tokens)\n",
" input_mask = [1] * len(input_ids)\n",
" segment_ids = list(instance.segment_ids)\n",
" assert len(input_ids) <= max_seq_length\n",
"\n",
" while len(input_ids) < max_seq_length:\n",
" input_ids.append(0)\n",
" input_mask.append(0)\n",
" segment_ids.append(0)\n",
"\n",
" assert len(input_ids) == max_seq_length\n",
" assert len(input_mask) == max_seq_length\n",
" assert len(segment_ids) == max_seq_length\n",
"\n",
" masked_lm_positions = list(instance.masked_lm_positions)\n",
" masked_lm_ids = tokenizer.convert_tokens_to_ids(instance.masked_lm_labels)\n",
" masked_lm_weights = [1.0] * len(masked_lm_ids)\n",
"\n",
" while len(masked_lm_positions) < max_predictions_per_seq:\n",
" masked_lm_positions.append(0)\n",
" masked_lm_ids.append(0)\n",
" masked_lm_weights.append(0.0)\n",
"\n",
" next_sentence_label = 1 if instance.is_random_next else 0\n",
"\n",
" features.append(\n",
" InputFeatures(input_ids, input_mask, segment_ids,\n",
" masked_lm_positions, masked_lm_ids,\n",
" masked_lm_weights, next_sentence_label))\n",
"\n",
" if inst_index < 5:\n",
" tf.logging.info(\"*** Example ***\")\n",
" tf.logging.info(\"tokens: %s\" % \" \".join(\n",
" [str(x) for x in instance.tokens]))\n",
" tf.logging.info(\"features: %s\" % str(features[-1]))\n",
" return features"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:02:34.185367Z",
"start_time": "2018-11-16T10:02:34.155046Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:*** Example ***\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:34 - INFO - tensorflow - *** Example ***\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:tokens: who was jim henson ? jim [MASK] was a puppet ##eer\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:34 - INFO - tensorflow - tokens: who was jim henson ? jim [MASK] was a puppet ##eer\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:features: input_ids:[2040, 2001, 3958, 27227, 1029, 3958, 103, 2001, 1037, 13997, 11510, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"input_mask:[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"segment_ids:[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"masked_lm_positions:[6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"masked_lm_ids:[27227, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"masked_lm_weights:[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n",
"next_sentence_labels:0\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:34 - INFO - tensorflow - features: input_ids:[2040, 2001, 3958, 27227, 1029, 3958, 103, 2001, 1037, 13997, 11510, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"input_mask:[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"segment_ids:[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"masked_lm_positions:[6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"masked_lm_ids:[27227, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n",
"masked_lm_weights:[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n",
"next_sentence_labels:0\n"
]
}
],
"source": [
"features = pretraining_convert_examples_to_features(\n",
" instances=examples, max_seq_length=max_seq_length, \n",
" max_predictions_per_seq=max_predictions_per_seq, tokenizer=tokenizer)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:02:34.912005Z",
"start_time": "2018-11-16T10:02:34.882111Z"
}
},
"outputs": [],
"source": [
"def input_fn_builder(features, seq_length, max_predictions_per_seq, tokenizer):\n",
" \"\"\"Creates an `input_fn` closure to be passed to TPUEstimator.\"\"\"\n",
"\n",
" all_input_ids = []\n",
" all_input_mask = []\n",
" all_segment_ids = []\n",
" all_masked_lm_positions = []\n",
" all_masked_lm_ids = []\n",
" all_masked_lm_weights = []\n",
" all_next_sentence_labels = []\n",
"\n",
" for feature in features:\n",
" all_input_ids.append(feature.input_ids)\n",
" all_input_mask.append(feature.input_mask)\n",
" all_segment_ids.append(feature.segment_ids)\n",
" all_masked_lm_positions.append(feature.masked_lm_positions)\n",
" all_masked_lm_ids.append(feature.masked_lm_ids)\n",
" all_masked_lm_weights.append(feature.masked_lm_weights)\n",
" all_next_sentence_labels.append(feature.next_sentence_labels)\n",
"\n",
" def input_fn(params):\n",
" \"\"\"The actual input function.\"\"\"\n",
" batch_size = params[\"batch_size\"]\n",
"\n",
" num_examples = len(features)\n",
"\n",
" # This is for demo purposes and does NOT scale to large data sets. We do\n",
" # not use Dataset.from_generator() because that uses tf.py_func which is\n",
" # not TPU compatible. The right way to load data is with TFRecordReader.\n",
" d = tf.data.Dataset.from_tensor_slices({\n",
" \"input_ids\":\n",
" tf.constant(\n",
" all_input_ids, shape=[num_examples, seq_length],\n",
" dtype=tf.int32),\n",
" \"input_mask\":\n",
" tf.constant(\n",
" all_input_mask,\n",
" shape=[num_examples, seq_length],\n",
" dtype=tf.int32),\n",
" \"segment_ids\":\n",
" tf.constant(\n",
" all_segment_ids,\n",
" shape=[num_examples, seq_length],\n",
" dtype=tf.int32),\n",
" \"masked_lm_positions\":\n",
" tf.constant(\n",
" all_masked_lm_positions,\n",
" shape=[num_examples, max_predictions_per_seq],\n",
" dtype=tf.int32),\n",
" \"masked_lm_ids\":\n",
" tf.constant(\n",
" all_masked_lm_ids,\n",
" shape=[num_examples, max_predictions_per_seq],\n",
" dtype=tf.int32),\n",
" \"masked_lm_weights\":\n",
" tf.constant(\n",
" all_masked_lm_weights,\n",
" shape=[num_examples, max_predictions_per_seq],\n",
" dtype=tf.float32),\n",
" \"next_sentence_labels\":\n",
" tf.constant(\n",
" all_next_sentence_labels,\n",
" shape=[num_examples, 1],\n",
" dtype=tf.int32),\n",
" })\n",
"\n",
" d = d.batch(batch_size=batch_size, drop_remainder=False)\n",
" return d\n",
"\n",
" return input_fn\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:02:35.671603Z",
"start_time": "2018-11-16T10:02:35.626167Z"
},
"code_folding": [
64,
77
]
},
"outputs": [],
"source": [
"def model_fn_builder(bert_config, init_checkpoint, learning_rate,\n",
" num_train_steps, num_warmup_steps, use_tpu,\n",
" use_one_hot_embeddings):\n",
" \"\"\"Returns `model_fn` closure for TPUEstimator.\"\"\"\n",
"\n",
" def model_fn(features, labels, mode, params): # pylint: disable=unused-argument\n",
" \"\"\"The `model_fn` for TPUEstimator.\"\"\"\n",
"\n",
" tf.logging.info(\"*** Features ***\")\n",
" for name in sorted(features.keys()):\n",
" tf.logging.info(\" name = %s, shape = %s\" % (name, features[name].shape))\n",
"\n",
" input_ids = features[\"input_ids\"]\n",
" input_mask = features[\"input_mask\"]\n",
" segment_ids = features[\"segment_ids\"]\n",
" masked_lm_positions = features[\"masked_lm_positions\"]\n",
" masked_lm_ids = features[\"masked_lm_ids\"]\n",
" masked_lm_weights = features[\"masked_lm_weights\"]\n",
" next_sentence_labels = features[\"next_sentence_labels\"]\n",
"\n",
" is_training = (mode == tf.estimator.ModeKeys.TRAIN)\n",
"\n",
" model = tfm.BertModel(\n",
" config=bert_config,\n",
" is_training=is_training,\n",
" input_ids=input_ids,\n",
" input_mask=input_mask,\n",
" token_type_ids=segment_ids,\n",
" use_one_hot_embeddings=use_one_hot_embeddings)\n",
"\n",
" (masked_lm_loss,\n",
" masked_lm_example_loss, masked_lm_log_probs) = rp.get_masked_lm_output(\n",
" bert_config, model.get_sequence_output(), model.get_embedding_table(),\n",
" masked_lm_positions, masked_lm_ids, masked_lm_weights)\n",
"\n",
" (next_sentence_loss, next_sentence_example_loss,\n",
" next_sentence_log_probs) = rp.get_next_sentence_output(\n",
" bert_config, model.get_pooled_output(), next_sentence_labels)\n",
"\n",
" total_loss = masked_lm_loss + next_sentence_loss\n",
"\n",
" tvars = tf.trainable_variables()\n",
"\n",
" initialized_variable_names = {}\n",
" scaffold_fn = None\n",
" if init_checkpoint:\n",
" (assignment_map,\n",
" initialized_variable_names) = tfm.get_assigment_map_from_checkpoint(\n",
" tvars, init_checkpoint)\n",
" if use_tpu:\n",
"\n",
" def tpu_scaffold():\n",
" tf.train.init_from_checkpoint(init_checkpoint, assignment_map)\n",
" return tf.train.Scaffold()\n",
"\n",
" scaffold_fn = tpu_scaffold\n",
" else:\n",
" tf.train.init_from_checkpoint(init_checkpoint, assignment_map)\n",
"\n",
" tf.logging.info(\"**** Trainable Variables ****\")\n",
" for var in tvars:\n",
" init_string = \"\"\n",
" if var.name in initialized_variable_names:\n",
" init_string = \", *INIT_FROM_CKPT*\"\n",
" tf.logging.info(\" name = %s, shape = %s%s\", var.name, var.shape,\n",
" init_string)\n",
"\n",
" output_spec = None\n",
" if mode == tf.estimator.ModeKeys.TRAIN:\n",
" masked_lm_positions = features[\"masked_lm_positions\"]\n",
" masked_lm_ids = features[\"masked_lm_ids\"]\n",
" masked_lm_weights = features[\"masked_lm_weights\"]\n",
" next_sentence_labels = features[\"next_sentence_labels\"]\n",
" train_op = optimization.create_optimizer(\n",
" total_loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu)\n",
"\n",
" output_spec = tf.contrib.tpu.TPUEstimatorSpec(\n",
" mode=mode,\n",
" loss=total_loss,\n",
" train_op=train_op,\n",
" scaffold_fn=scaffold_fn)\n",
" elif mode == tf.estimator.ModeKeys.EVAL:\n",
" masked_lm_positions = features[\"masked_lm_positions\"]\n",
" masked_lm_ids = features[\"masked_lm_ids\"]\n",
" masked_lm_weights = features[\"masked_lm_weights\"]\n",
" next_sentence_labels = features[\"next_sentence_labels\"]\n",
"\n",
" def metric_fn(masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,\n",
" masked_lm_weights, next_sentence_example_loss,\n",
" next_sentence_log_probs, next_sentence_labels):\n",
" \"\"\"Computes the loss and accuracy of the model.\"\"\"\n",
" masked_lm_log_probs = tf.reshape(masked_lm_log_probs,\n",
" [-1, masked_lm_log_probs.shape[-1]])\n",
" masked_lm_predictions = tf.argmax(\n",
" masked_lm_log_probs, axis=-1, output_type=tf.int32)\n",
" masked_lm_example_loss = tf.reshape(masked_lm_example_loss, [-1])\n",
" masked_lm_ids = tf.reshape(masked_lm_ids, [-1])\n",
" masked_lm_weights = tf.reshape(masked_lm_weights, [-1])\n",
" masked_lm_accuracy = tf.metrics.accuracy(\n",
" labels=masked_lm_ids,\n",
" predictions=masked_lm_predictions,\n",
" weights=masked_lm_weights)\n",
" masked_lm_mean_loss = tf.metrics.mean(\n",
" values=masked_lm_example_loss, weights=masked_lm_weights)\n",
"\n",
" next_sentence_log_probs = tf.reshape(\n",
" next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]])\n",
" next_sentence_predictions = tf.argmax(\n",
" next_sentence_log_probs, axis=-1, output_type=tf.int32)\n",
" next_sentence_labels = tf.reshape(next_sentence_labels, [-1])\n",
" next_sentence_accuracy = tf.metrics.accuracy(\n",
" labels=next_sentence_labels, predictions=next_sentence_predictions)\n",
" next_sentence_mean_loss = tf.metrics.mean(\n",
" values=next_sentence_example_loss)\n",
"\n",
" return {\n",
" \"masked_lm_accuracy\": masked_lm_accuracy,\n",
" \"masked_lm_loss\": masked_lm_mean_loss,\n",
" \"next_sentence_accuracy\": next_sentence_accuracy,\n",
" \"next_sentence_loss\": next_sentence_mean_loss,\n",
" }\n",
"\n",
" eval_metrics = (metric_fn, [\n",
" masked_lm_example_loss, masked_lm_log_probs, masked_lm_ids,\n",
" masked_lm_weights, next_sentence_example_loss,\n",
" next_sentence_log_probs, next_sentence_labels\n",
" ])\n",
" output_spec = tf.contrib.tpu.TPUEstimatorSpec(\n",
" mode=mode,\n",
" loss=total_loss,\n",
" eval_metrics=eval_metrics,\n",
" scaffold_fn=scaffold_fn)\n",
" elif mode == tf.estimator.ModeKeys.PREDICT:\n",
" masked_lm_log_probs = tf.reshape(masked_lm_log_probs,\n",
" [-1, masked_lm_log_probs.shape[-1]])\n",
" masked_lm_predictions = tf.argmax(\n",
" masked_lm_log_probs, axis=-1, output_type=tf.int32)\n",
"\n",
" next_sentence_log_probs = tf.reshape(\n",
" next_sentence_log_probs, [-1, next_sentence_log_probs.shape[-1]])\n",
" next_sentence_predictions = tf.argmax(\n",
" next_sentence_log_probs, axis=-1, output_type=tf.int32)\n",
"\n",
" masked_lm_predictions = tf.reshape(masked_lm_predictions,\n",
" [1, masked_lm_positions.shape[-1]])\n",
" next_sentence_predictions = tf.reshape(next_sentence_predictions,\n",
" [1, 1])\n",
"\n",
" predictions = {\n",
" \"masked_lm_predictions\": masked_lm_predictions,\n",
" \"next_sentence_predictions\": next_sentence_predictions\n",
" }\n",
"\n",
" output_spec = tf.contrib.tpu.TPUEstimatorSpec(\n",
" mode=mode, predictions=predictions, scaffold_fn=scaffold_fn)\n",
" return output_spec\n",
" else:\n",
" raise ValueError(\"Only TRAIN, EVAL and PREDICT modes are supported: %s\" % (mode))\n",
"\n",
" return output_spec\n",
"\n",
" return model_fn"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:02:40.328700Z",
"start_time": "2018-11-16T10:02:36.289676Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:Estimator's model_fn (<function model_fn_builder.<locals>.model_fn at 0x12a864ae8>) includes params argument, but params are not passed to Estimator.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - WARNING - tensorflow - Estimator's model_fn (<function model_fn_builder.<locals>.model_fn at 0x12a864ae8>) includes params argument, but params are not passed to Estimator.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:Using temporary folder as model directory: /var/folders/yx/cw8n_njx3js5jksyw_qlp8p00000gn/T/tmp4x8r3x3d\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - WARNING - tensorflow - Using temporary folder as model directory: /var/folders/yx/cw8n_njx3js5jksyw_qlp8p00000gn/T/tmp4x8r3x3d\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Using config: {'_model_dir': '/var/folders/yx/cw8n_njx3js5jksyw_qlp8p00000gn/T/tmp4x8r3x3d', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true\n",
"graph_options {\n",
" rewrite_options {\n",
" meta_optimizer_iterations: ONE\n",
" }\n",
"}\n",
", '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x12dbb5ac8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_tpu_config': TPUConfig(iterations_per_loop=2, num_shards=1, num_cores_per_replica=None, per_host_input_for_training=3, tpu_job_name=None, initial_infeed_sleep_secs=None, input_partition_dims=None), '_cluster': None}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - INFO - tensorflow - Using config: {'_model_dir': '/var/folders/yx/cw8n_njx3js5jksyw_qlp8p00000gn/T/tmp4x8r3x3d', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true\n",
"graph_options {\n",
" rewrite_options {\n",
" meta_optimizer_iterations: ONE\n",
" }\n",
"}\n",
", '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x12dbb5ac8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_tpu_config': TPUConfig(iterations_per_loop=2, num_shards=1, num_cores_per_replica=None, per_host_input_for_training=3, tpu_job_name=None, initial_infeed_sleep_secs=None, input_partition_dims=None), '_cluster': None}\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:Setting TPUConfig.num_shards==1 is an unsupported behavior. Please fix as soon as possible (leaving num_shards as None.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - WARNING - tensorflow - Setting TPUConfig.num_shards==1 is an unsupported behavior. Please fix as soon as possible (leaving num_shards as None.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:_TPUContext: eval_on_tpu True\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - INFO - tensorflow - _TPUContext: eval_on_tpu True\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:eval_on_tpu ignored because use_tpu is False.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - WARNING - tensorflow - eval_on_tpu ignored because use_tpu is False.\n"
]
}
],
"source": [
"is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2\n",
"run_config = tf.contrib.tpu.RunConfig(\n",
" master=None,\n",
" tpu_config=tf.contrib.tpu.TPUConfig(\n",
" num_shards=1,\n",
" per_host_input_for_training=is_per_host))\n",
"\n",
"model_fn = model_fn_builder(\n",
" bert_config=bert_config,\n",
" init_checkpoint=init_checkpoint,\n",
" learning_rate=0,\n",
" num_train_steps=1,\n",
" num_warmup_steps=1,\n",
" use_tpu=False,\n",
" use_one_hot_embeddings=False)\n",
"\n",
"# If TPU is not available, this will fall back to normal Estimator on CPU\n",
"# or GPU.\n",
"estimator = tf.contrib.tpu.TPUEstimator(\n",
" use_tpu=False,\n",
" model_fn=model_fn,\n",
" config=run_config,\n",
" predict_batch_size=1)\n",
"\n",
"input_fn = input_fn_builder(\n",
" features=features, seq_length=max_seq_length, max_predictions_per_seq=max_predictions_per_seq,\n",
"tokenizer=tokenizer)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:02:46.596956Z",
"start_time": "2018-11-16T10:02:40.331008Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Could not find trained model in model_dir: /var/folders/yx/cw8n_njx3js5jksyw_qlp8p00000gn/T/tmp4x8r3x3d, running initialization to predict.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - INFO - tensorflow - Could not find trained model in model_dir: /var/folders/yx/cw8n_njx3js5jksyw_qlp8p00000gn/T/tmp4x8r3x3d, running initialization to predict.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Calling model_fn.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - INFO - tensorflow - Calling model_fn.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Running infer on CPU\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - INFO - tensorflow - Running infer on CPU\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:*** Features ***\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - INFO - tensorflow - *** Features ***\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = input_ids, shape = (?, 128)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - INFO - tensorflow - name = input_ids, shape = (?, 128)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = input_mask, shape = (?, 128)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - INFO - tensorflow - name = input_mask, shape = (?, 128)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = masked_lm_ids, shape = (?, 20)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - INFO - tensorflow - name = masked_lm_ids, shape = (?, 20)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = masked_lm_positions, shape = (?, 20)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - INFO - tensorflow - name = masked_lm_positions, shape = (?, 20)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = masked_lm_weights, shape = (?, 20)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - INFO - tensorflow - name = masked_lm_weights, shape = (?, 20)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = next_sentence_labels, shape = (?, 1)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - INFO - tensorflow - name = next_sentence_labels, shape = (?, 1)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = segment_ids, shape = (?, 128)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:40 - INFO - tensorflow - name = segment_ids, shape = (?, 128)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:**** Trainable Variables ****\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - **** Trainable Variables ****\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/embeddings/word_embeddings:0, shape = (30522, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/embeddings/word_embeddings:0, shape = (30522, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/embeddings/token_type_embeddings:0, shape = (2, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/embeddings/token_type_embeddings:0, shape = (2, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/embeddings/position_embeddings:0, shape = (512, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/embeddings/position_embeddings:0, shape = (512, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/embeddings/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/embeddings/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/embeddings/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/embeddings/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_0/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_0/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_0/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_0/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_0/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_0/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_0/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_0/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_0/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_0/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_0/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_0/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_0/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_0/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_0/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_0/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_0/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_0/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_0/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_0/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_0/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_0/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_0/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_0/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_0/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_0/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_0/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_0/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_0/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_0/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_0/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_0/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_1/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_1/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_1/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_1/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_1/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_1/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_1/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_1/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_1/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_1/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_1/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_1/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_1/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_1/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_1/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_1/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_1/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_1/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_1/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_1/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_1/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_1/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_1/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_1/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_1/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_1/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_1/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_1/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_1/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_1/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_1/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_1/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_2/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_2/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_2/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_2/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_2/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_2/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_2/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_2/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_2/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_2/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_2/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_2/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_2/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_2/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_2/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_2/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_2/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_2/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_2/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_2/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_2/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_2/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_2/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_2/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_2/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_2/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_2/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_2/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_2/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_2/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_2/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_2/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_3/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_3/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_3/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_3/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_3/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_3/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_3/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_3/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_3/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_3/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_3/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_3/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_3/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_3/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_3/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_3/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_3/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_3/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_3/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_3/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_3/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_3/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_3/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_3/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_3/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_3/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_3/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_3/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_3/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_3/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_3/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_3/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_4/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_4/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_4/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_4/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_4/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_4/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_4/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_4/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_4/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_4/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_4/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_4/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_4/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_4/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_4/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_4/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_4/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_4/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_4/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_4/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_4/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_4/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_4/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_4/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_4/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_4/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_4/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_4/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_4/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_4/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_4/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_4/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_5/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_5/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_5/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_5/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_5/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_5/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_5/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_5/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_5/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_5/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_5/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_5/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_5/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_5/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_5/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_5/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_5/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_5/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_5/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_5/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_5/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_5/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_5/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_5/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_5/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_5/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_5/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_5/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_5/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_5/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_5/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_5/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_6/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_6/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_6/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_6/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_6/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_6/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_6/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_6/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_6/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_6/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_6/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_6/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_6/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_6/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_6/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_6/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_6/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_6/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_6/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_6/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_6/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_6/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_6/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_6/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_6/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_6/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_6/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_6/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_6/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_6/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_6/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_6/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_7/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_7/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_7/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_7/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_7/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_7/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_7/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_7/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_7/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_7/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_7/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_7/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_7/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_7/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_7/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_7/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_7/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_7/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_7/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_7/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_7/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_7/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_7/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_7/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_7/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_7/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_7/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_7/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_7/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_7/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_7/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_7/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_8/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_8/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_8/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_8/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_8/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_8/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_8/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_8/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_8/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_8/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_8/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_8/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_8/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_8/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_8/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_8/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_8/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_8/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_8/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_8/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_8/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_8/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_8/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_8/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_8/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_8/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_8/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_8/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_8/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_8/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_8/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_8/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_9/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_9/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_9/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_9/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_9/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_9/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_9/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_9/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_9/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_9/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_9/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_9/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_9/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_9/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_9/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_9/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_9/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_9/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_9/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_9/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_9/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_9/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_9/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_9/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_9/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_9/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_9/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_9/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_9/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_9/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_9/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_9/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_10/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_10/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_10/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_10/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_10/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_10/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_10/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_10/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_10/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_10/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_10/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_10/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_10/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_10/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_10/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_10/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_10/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_10/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_10/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_10/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_10/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_10/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_10/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_10/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_10/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_10/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_10/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_10/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_10/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_10/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_10/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_10/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_11/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_11/attention/self/query/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_11/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_11/attention/self/query/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_11/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_11/attention/self/key/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_11/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_11/attention/self/key/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_11/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_11/attention/self/value/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_11/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_11/attention/self/value/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_11/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_11/attention/output/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_11/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_11/attention/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_11/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_11/attention/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_11/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_11/attention/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_11/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_11/intermediate/dense/kernel:0, shape = (768, 3072), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_11/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_11/intermediate/dense/bias:0, shape = (3072,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_11/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_11/output/dense/kernel:0, shape = (3072, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_11/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_11/output/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_11/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_11/output/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/encoder/layer_11/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/encoder/layer_11/output/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/pooler/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/pooler/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = bert/pooler/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = bert/pooler/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = cls/predictions/transform/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = cls/predictions/transform/dense/kernel:0, shape = (768, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = cls/predictions/transform/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = cls/predictions/transform/dense/bias:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = cls/predictions/transform/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = cls/predictions/transform/LayerNorm/beta:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = cls/predictions/transform/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = cls/predictions/transform/LayerNorm/gamma:0, shape = (768,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = cls/predictions/output_bias:0, shape = (30522,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = cls/predictions/output_bias:0, shape = (30522,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = cls/seq_relationship/output_weights:0, shape = (2, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = cls/seq_relationship/output_weights:0, shape = (2, 768), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow: name = cls/seq_relationship/output_bias:0, shape = (2,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - name = cls/seq_relationship/output_bias:0, shape = (2,), *INIT_FROM_CKPT*\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Done calling model_fn.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:43 - INFO - tensorflow - Done calling model_fn.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Graph was finalized.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:44 - INFO - tensorflow - Graph was finalized.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Running local_init_op.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:45 - INFO - tensorflow - Running local_init_op.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Done running local_init_op.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:45 - INFO - tensorflow - Done running local_init_op.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:prediction_loop marked as finished\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:46 - INFO - tensorflow - prediction_loop marked as finished\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:prediction_loop marked as finished\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:02:46 - INFO - tensorflow - prediction_loop marked as finished\n"
]
}
],
"source": [
"tensorflow_all_out = []\n",
"for result in estimator.predict(input_fn, yield_single_examples=True):\n",
" tensorflow_all_out.append(result)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:02:46.634304Z",
"start_time": "2018-11-16T10:02:46.598800Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1\n",
"2\n",
"dict_keys(['masked_lm_predictions', 'next_sentence_predictions'])\n",
"masked_lm_predictions [27227 1010 1010 1010 1010 1010 1010 1010 1010 1010 1010 1010\n",
" 1010 1010 1010 1010 1010 1010 1010 1010]\n",
"predicted token ['henson', ',', ',', ',', ',', ',', ',', ',', ',', ',', ',', ',', ',', ',', ',', ',', ',', ',', ',', ',']\n"
]
}
],
"source": [
"print(len(tensorflow_all_out))\n",
"print(len(tensorflow_all_out[0]))\n",
"print(tensorflow_all_out[0].keys())\n",
"print(\"masked_lm_predictions\", tensorflow_all_out[0]['masked_lm_predictions'])\n",
"print(\"predicted token\", tokenizer.convert_ids_to_tokens(tensorflow_all_out[0]['masked_lm_predictions']))"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:02:46.671229Z",
"start_time": "2018-11-16T10:02:46.637102Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensorflow_output: ['henson']\n"
]
}
],
"source": [
"tensorflow_outputs = tokenizer.convert_ids_to_tokens(tensorflow_all_out[0]['masked_lm_predictions'])[:len(masked_lm_positions)]\n",
"print(\"tensorflow_output:\", tensorflow_outputs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2/ PyTorch code"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:03:03.556557Z",
"start_time": "2018-11-16T10:03:03.519654Z"
}
},
"outputs": [],
"source": [
"from examples import extract_features\n",
"from examples.extract_features import *"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:03:03.952710Z",
"start_time": "2018-11-16T10:03:03.921917Z"
}
},
"outputs": [],
"source": [
"init_checkpoint_pt = \"../google_models/uncased_L-12_H-768_A-12/pytorch_model.bin\""
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:03:12.307673Z",
"start_time": "2018-11-16T10:03:04.439317Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"11/16/2018 11:03:05 - INFO - pytorch_transformers.modeling_bert - loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /Users/thomaswolf/.pytorch_transformers/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba\n",
"11/16/2018 11:03:05 - INFO - pytorch_transformers.modeling_bert - extracting archive file /Users/thomaswolf/.pytorch_transformers/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /var/folders/yx/cw8n_njx3js5jksyw_qlp8p00000gn/T/tmpaqgsm566\n",
"11/16/2018 11:03:08 - INFO - pytorch_transformers.modeling_bert - Model config {\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"max_position_embeddings\": 512,\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"type_vocab_size\": 2,\n",
" \"vocab_size\": 30522\n",
"}\n",
"\n"
]
},
{
"data": {
"text/plain": [
"BertForPreTraining(\n",
" (bert): BertModel(\n",
" (embeddings): BertEmbeddings(\n",
" (word_embeddings): Embedding(30522, 768)\n",
" (position_embeddings): Embedding(512, 768)\n",
" (token_type_embeddings): Embedding(2, 768)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (encoder): BertEncoder(\n",
" (layer): ModuleList(\n",
" (0): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (1): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (2): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (3): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (4): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (5): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (6): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (7): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (8): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (9): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (10): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (11): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (pooler): BertPooler(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (activation): Tanh()\n",
" )\n",
" )\n",
" (cls): BertPreTrainingHeads(\n",
" (predictions): BertLMPredictionHead(\n",
" (transform): BertPredictionHeadTransform(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" )\n",
" (decoder): Linear(in_features=768, out_features=30522, bias=False)\n",
" )\n",
" (seq_relationship): Linear(in_features=768, out_features=2, bias=True)\n",
" )\n",
")"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"device = torch.device(\"cpu\")\n",
"model = ppb.BertForPreTraining.from_pretrained('bert-base-uncased')\n",
"model.to(device)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:03:12.351625Z",
"start_time": "2018-11-16T10:03:12.310736Z"
},
"code_folding": []
},
"outputs": [
{
"data": {
"text/plain": [
"BertForPreTraining(\n",
" (bert): BertModel(\n",
" (embeddings): BertEmbeddings(\n",
" (word_embeddings): Embedding(30522, 768)\n",
" (position_embeddings): Embedding(512, 768)\n",
" (token_type_embeddings): Embedding(2, 768)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (encoder): BertEncoder(\n",
" (layer): ModuleList(\n",
" (0): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (1): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (2): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (3): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (4): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (5): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (6): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (7): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (8): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (9): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (10): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (11): BertLayer(\n",
" (attention): BertAttention(\n",
" (self): BertSelfAttention(\n",
" (query): Linear(in_features=768, out_features=768, bias=True)\n",
" (key): Linear(in_features=768, out_features=768, bias=True)\n",
" (value): Linear(in_features=768, out_features=768, bias=True)\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" (output): BertSelfOutput(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" (intermediate): BertIntermediate(\n",
" (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
" )\n",
" (output): BertOutput(\n",
" (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" (dropout): Dropout(p=0.1)\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (pooler): BertPooler(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (activation): Tanh()\n",
" )\n",
" )\n",
" (cls): BertPreTrainingHeads(\n",
" (predictions): BertLMPredictionHead(\n",
" (transform): BertPredictionHeadTransform(\n",
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
" (LayerNorm): BertLayerNorm()\n",
" )\n",
" (decoder): Linear(in_features=768, out_features=30522, bias=False)\n",
" )\n",
" (seq_relationship): Linear(in_features=768, out_features=2, bias=True)\n",
" )\n",
")"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)\n",
"all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)\n",
"all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)\n",
"all_masked_lm_positions = torch.tensor([f.masked_lm_positions for f in features], dtype=torch.long)\n",
"\n",
"eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_masked_lm_positions)\n",
"eval_sampler = SequentialSampler(eval_data)\n",
"eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=1)\n",
"\n",
"model.eval()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:03:12.792741Z",
"start_time": "2018-11-16T10:03:12.354253Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([[ 2040, 2001, 3958, 27227, 1029, 3958, 103, 2001, 1037, 13997,\n",
" 11510, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0]])\n",
"tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0]])\n",
"tensor([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0]])\n",
"(1, 20, 30522)\n",
"[27227, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1010, 1010]\n"
]
}
],
"source": [
"import numpy as np\n",
"pytorch_all_out = []\n",
"for input_ids, input_mask, segment_ids, tensor_masked_lm_positions in eval_dataloader:\n",
" print(input_ids)\n",
" print(input_mask)\n",
" print(segment_ids)\n",
" input_ids = input_ids.to(device)\n",
" input_mask = input_mask.to(device)\n",
" segment_ids = segment_ids.to(device)\n",
"\n",
" prediction_scores, _ = model(input_ids, token_type_ids=segment_ids, attention_mask=input_mask)\n",
" prediction_scores = prediction_scores[0, tensor_masked_lm_positions].detach().cpu().numpy()\n",
" print(prediction_scores.shape)\n",
" masked_lm_predictions = np.argmax(prediction_scores, axis=-1).squeeze().tolist()\n",
" print(masked_lm_predictions)\n",
" pytorch_all_out.append(masked_lm_predictions)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"ExecuteTime": {
"end_time": "2018-11-16T10:03:12.828439Z",
"start_time": "2018-11-16T10:03:12.795420Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"pytorch_output: ['henson']\n",
"tensorflow_output: ['henson']\n"
]
}
],
"source": [
"pytorch_outputs = tokenizer.convert_ids_to_tokens(pytorch_all_out[0])[:len(masked_lm_positions)]\n",
"print(\"pytorch_output:\", pytorch_outputs)\n",
"print(\"tensorflow_output:\", tensorflow_outputs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.7"
},
"toc": {
"colors": {
"hover_highlight": "#DAA520",
"running_highlight": "#FF0000",
"selected_highlight": "#FFD700"
},
"moveMenuLeft": true,
"nav_menu": {
"height": "48px",
"width": "252px"
},
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 4,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}