small tweaks
This commit is contained in:
parent
9343a2311b
commit
2c731fd129
|
@ -349,7 +349,6 @@ class BertModel(nn.Module):
|
||||||
"""BERT model ("Bidirectional Embedding Representations from a Transformer").
|
"""BERT model ("Bidirectional Embedding Representations from a Transformer").
|
||||||
|
|
||||||
Example usage:
|
Example usage:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# Already been converted into WordPiece token ids
|
# Already been converted into WordPiece token ids
|
||||||
input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
|
input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
|
||||||
|
@ -359,16 +358,10 @@ class BertModel(nn.Module):
|
||||||
config = modeling.BertConfig(vocab_size=32000, hidden_size=512,
|
config = modeling.BertConfig(vocab_size=32000, hidden_size=512,
|
||||||
num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024)
|
num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024)
|
||||||
|
|
||||||
model = modeling.BertModel(config=config, is_training=True,
|
model = modeling.BertModel(config=config)
|
||||||
input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids)
|
all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask)
|
||||||
|
|
||||||
label_embeddings = tf.get_variable(...)
|
|
||||||
pooled_output = model.get_pooled_output()
|
|
||||||
logits = tf.matmul(pooled_output, label_embeddings)
|
|
||||||
...
|
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, config: BertConfig):
|
def __init__(self, config: BertConfig):
|
||||||
"""Constructor for BertModel.
|
"""Constructor for BertModel.
|
||||||
|
|
||||||
|
@ -400,7 +393,26 @@ class BertModel(nn.Module):
|
||||||
return all_encoder_layers, pooled_output
|
return all_encoder_layers, pooled_output
|
||||||
|
|
||||||
class BertForSequenceClassification(nn.Module):
|
class BertForSequenceClassification(nn.Module):
|
||||||
def __init__(self, config, num_labels):
|
"""BERT model for classification.
|
||||||
|
This module is composed of the BERT model with a linear layer on top of
|
||||||
|
the pooled output.
|
||||||
|
|
||||||
|
Example usage:
|
||||||
|
```python
|
||||||
|
# Already been converted into WordPiece token ids
|
||||||
|
input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]])
|
||||||
|
input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]])
|
||||||
|
token_type_ids = torch.LongTensor([[0, 0, 1], [0, 2, 0]])
|
||||||
|
|
||||||
|
config = modeling.BertConfig(vocab_size=32000, hidden_size=512,
|
||||||
|
num_hidden_layers=8, num_attention_heads=6, intermediate_size=1024)
|
||||||
|
|
||||||
|
num_labels = 2
|
||||||
|
|
||||||
|
model = modeling.BertModel(config, num_labels)
|
||||||
|
logits = model(input_ids, token_type_ids, input_mask)
|
||||||
|
```
|
||||||
|
""" def __init__(self, config, num_labels):
|
||||||
super(BertForSequenceClassification, self).__init__()
|
super(BertForSequenceClassification, self).__init__()
|
||||||
self.bert = BertModel(config)
|
self.bert = BertModel(config)
|
||||||
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
self.dropout = nn.Dropout(config.hidden_dropout_prob)
|
||||||
|
|
|
@ -115,16 +115,10 @@ parser.add_argument("--save_checkpoints_steps",
|
||||||
default = 1000,
|
default = 1000,
|
||||||
type = int,
|
type = int,
|
||||||
help = "How often to save the model checkpoint.")
|
help = "How often to save the model checkpoint.")
|
||||||
parser.add_argument("--iterations_per_loop",
|
|
||||||
default = 1000,
|
|
||||||
type = int,
|
|
||||||
help = "How many steps to make in each estimator call.")
|
|
||||||
|
|
||||||
parser.add_argument("--no_cuda",
|
parser.add_argument("--no_cuda",
|
||||||
default = False,
|
default = False,
|
||||||
type = bool,
|
type = bool,
|
||||||
help = "Whether not to use CUDA when available")
|
help = "Whether not to use CUDA when available")
|
||||||
|
|
||||||
parser.add_argument("--local_rank",
|
parser.add_argument("--local_rank",
|
||||||
type=int,
|
type=int,
|
||||||
default=-1,
|
default=-1,
|
||||||
|
@ -518,6 +512,7 @@ def main():
|
||||||
|
|
||||||
model.train()
|
model.train()
|
||||||
global_step = 0
|
global_step = 0
|
||||||
|
for epoch in args.num_train_epochs:
|
||||||
for input_ids, input_mask, segment_ids, label_ids in train_dataloader:
|
for input_ids, input_mask, segment_ids, label_ids in train_dataloader:
|
||||||
input_ids = input_ids.to(device)
|
input_ids = input_ids.to(device)
|
||||||
input_mask = input_mask.float().to(device)
|
input_mask = input_mask.float().to(device)
|
||||||
|
|
Loading…
Reference in New Issue