Add support for `mpnet` models (#221)
This commit is contained in:
parent
09ff83b90e
commit
f163f1a318
|
@ -15,6 +15,7 @@
|
|||
1. **[MarianMT](https://huggingface.co/docs/transformers/model_doc/marian)** Machine translation models trained using [OPUS](http://opus.nlpl.eu/) data by Jörg Tiedemann. The [Marian Framework](https://marian-nmt.github.io/) is being developed by the Microsoft Translator Team.
|
||||
1. **[MobileBERT](https://huggingface.co/docs/transformers/model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou.
|
||||
1. **[MobileViT](https://huggingface.co/docs/transformers/model_doc/mobilevit)** (from Apple) released with the paper [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari.
|
||||
1. **[MPNet](https://huggingface.co/docs/transformers/model_doc/mpnet)** (from Microsoft Research) released with the paper [MPNet: Masked and Permuted Pre-training for Language Understanding](https://arxiv.org/abs/2004.09297) by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
|
||||
1. **[MT5](https://huggingface.co/docs/transformers/model_doc/mt5)** (from Google AI) released with the paper [mT5: A massively multilingual pre-trained text-to-text transformer](https://arxiv.org/abs/2010.11934) by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya Siddhant, Aditya Barua, Colin Raffel.
|
||||
1. **[NLLB](https://huggingface.co/docs/transformers/model_doc/nllb)** (from Meta) released with the paper [No Language Left Behind: Scaling Human-Centered Machine Translation](https://arxiv.org/abs/2207.04672) by the NLLB team.
|
||||
1. **[RoBERTa](https://huggingface.co/docs/transformers/model_doc/roberta)** (from Facebook), released together with the paper [RoBERTa: A Robustly Optimized BERT Pretraining Approach](https://arxiv.org/abs/1907.11692) by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov.
|
||||
|
|
|
@ -1394,6 +1394,81 @@ export class MobileBertForQuestionAnswering extends MobileBertPreTrainedModel {
|
|||
}
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// MPNet models
|
||||
export class MPNetPreTrainedModel extends PreTrainedModel { }
|
||||
|
||||
/**
|
||||
* The bare MPNet Model transformer outputting raw hidden-states without any specific head on top.
|
||||
* @extends MPNetPreTrainedModel
|
||||
*/
|
||||
export class MPNetModel extends MPNetPreTrainedModel { }
|
||||
|
||||
/**
|
||||
* MPNetForMaskedLM is a class representing a MPNet model for masked language modeling.
|
||||
* @extends MPNetPreTrainedModel
|
||||
*/
|
||||
export class MPNetForMaskedLM extends MPNetPreTrainedModel {
|
||||
/**
|
||||
* Calls the model on new inputs.
|
||||
*
|
||||
* @param {Object} model_inputs The inputs to the model.
|
||||
* @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
|
||||
*/
|
||||
async _call(model_inputs) {
|
||||
return new MaskedLMOutput(await super._call(model_inputs));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* MPNetForSequenceClassification is a class representing a MPNet model for sequence classification.
|
||||
* @extends MPNetPreTrainedModel
|
||||
*/
|
||||
export class MPNetForSequenceClassification extends MPNetPreTrainedModel {
|
||||
/**
|
||||
* Calls the model on new inputs.
|
||||
*
|
||||
* @param {Object} model_inputs The inputs to the model.
|
||||
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
|
||||
*/
|
||||
async _call(model_inputs) {
|
||||
return new SequenceClassifierOutput(await super._call(model_inputs));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* MPNetForTokenClassification is a class representing a MPNet model for token classification.
|
||||
* @extends MPNetPreTrainedModel
|
||||
*/
|
||||
export class MPNetForTokenClassification extends MPNetPreTrainedModel {
|
||||
/**
|
||||
* Calls the model on new inputs.
|
||||
*
|
||||
* @param {Object} model_inputs The inputs to the model.
|
||||
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
|
||||
*/
|
||||
async _call(model_inputs) {
|
||||
return new TokenClassifierOutput(await super._call(model_inputs));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* MPNetForQuestionAnswering is a class representing a MPNet model for question answering.
|
||||
* @extends MPNetPreTrainedModel
|
||||
*/
|
||||
export class MPNetForQuestionAnswering extends MPNetPreTrainedModel {
|
||||
/**
|
||||
* Calls the model on new inputs.
|
||||
*
|
||||
* @param {Object} model_inputs The inputs to the model.
|
||||
* @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
|
||||
*/
|
||||
async _call(model_inputs) {
|
||||
return new QuestionAnsweringModelOutput(await super._call(model_inputs));
|
||||
}
|
||||
}
|
||||
//////////////////////////////////////////////////
|
||||
|
||||
|
||||
//////////////////////////////////////////////////
|
||||
// SqueezeBert models
|
||||
|
@ -2805,6 +2880,7 @@ export class PretrainedMixin {
|
|||
|
||||
const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
|
||||
['bert', BertModel],
|
||||
['mpnet', MPNetModel],
|
||||
['albert', AlbertModel],
|
||||
['distilbert', DistilBertModel],
|
||||
['roberta', RobertaModel],
|
||||
|
@ -2834,6 +2910,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
|
|||
|
||||
const MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = new Map([
|
||||
['bert', BertForSequenceClassification],
|
||||
['mpnet', MPNetForSequenceClassification],
|
||||
['albert', AlbertForSequenceClassification],
|
||||
['distilbert', DistilBertForSequenceClassification],
|
||||
['roberta', RobertaForSequenceClassification],
|
||||
|
@ -2845,6 +2922,7 @@ const MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = new Map([
|
|||
|
||||
const MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = new Map([
|
||||
['bert', BertForTokenClassification],
|
||||
['mpnet', MPNetForTokenClassification],
|
||||
['distilbert', DistilBertForTokenClassification],
|
||||
['roberta', RobertaForTokenClassification],
|
||||
['xlm-roberta', XLMRobertaForTokenClassification],
|
||||
|
@ -2867,6 +2945,7 @@ const MODEL_WITH_LM_HEAD_MAPPING_NAMES = new Map([
|
|||
|
||||
const MODEL_FOR_MASKED_LM_MAPPING_NAMES = new Map([
|
||||
['bert', BertForMaskedLM],
|
||||
['mpnet', MPNetForMaskedLM],
|
||||
['albert', AlbertForMaskedLM],
|
||||
['distilbert', DistilBertForMaskedLM],
|
||||
['roberta', RobertaForMaskedLM],
|
||||
|
@ -2877,6 +2956,7 @@ const MODEL_FOR_MASKED_LM_MAPPING_NAMES = new Map([
|
|||
|
||||
const MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
|
||||
['bert', BertForQuestionAnswering],
|
||||
['mpnet', MPNetForQuestionAnswering],
|
||||
['albert', AlbertForQuestionAnswering],
|
||||
['distilbert', DistilBertForQuestionAnswering],
|
||||
['roberta', RobertaForQuestionAnswering],
|
||||
|
|
Loading…
Reference in New Issue