Add support for `mpnet` models (#221)

This commit is contained in:
Joshua Lochner 2023-07-27 05:59:23 +02:00 committed by GitHub
parent 09ff83b90e
commit f163f1a318
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 81 additions and 0 deletions

View File

@ -15,6 +15,7 @@
1. **[MarianMT](https://huggingface.co/docs/transformers/model_doc/marian)** Machine translation models trained using [OPUS](http://opus.nlpl.eu/) data by Jörg Tiedemann. The [Marian Framework](https://marian-nmt.github.io/) is being developed by the Microsoft Translator Team.
1. **[MobileBERT](https://huggingface.co/docs/transformers/model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou.
1. **[MobileViT](https://huggingface.co/docs/transformers/model_doc/mobilevit)** (from Apple) released with the paper [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari.
1. **[MPNet](https://huggingface.co/docs/transformers/model_doc/mpnet)** (from Microsoft Research) released with the paper [MPNet: Masked and Permuted Pre-training for Language Understanding](https://arxiv.org/abs/2004.09297) by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
1. **[MT5](https://huggingface.co/docs/transformers/model_doc/mt5)** (from Google AI) released with the paper [mT5: A massively multilingual pre-trained text-to-text transformer](https://arxiv.org/abs/2010.11934) by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya Siddhant, Aditya Barua, Colin Raffel.
1. **[NLLB](https://huggingface.co/docs/transformers/model_doc/nllb)** (from Meta) released with the paper [No Language Left Behind: Scaling Human-Centered Machine Translation](https://arxiv.org/abs/2207.04672) by the NLLB team.
1. **[RoBERTa](https://huggingface.co/docs/transformers/model_doc/roberta)** (from Facebook), released together with the paper [RoBERTa: A Robustly Optimized BERT Pretraining Approach](https://arxiv.org/abs/1907.11692) by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov.

View File

@ -1394,6 +1394,81 @@ export class MobileBertForQuestionAnswering extends MobileBertPreTrainedModel {
}
//////////////////////////////////////////////////
//////////////////////////////////////////////////
// MPNet models
export class MPNetPreTrainedModel extends PreTrainedModel { }
/**
* The bare MPNet Model transformer outputting raw hidden-states without any specific head on top.
* @extends MPNetPreTrainedModel
*/
export class MPNetModel extends MPNetPreTrainedModel { }
/**
* MPNetForMaskedLM is a class representing a MPNet model for masked language modeling.
* @extends MPNetPreTrainedModel
*/
export class MPNetForMaskedLM extends MPNetPreTrainedModel {
/**
* Calls the model on new inputs.
*
* @param {Object} model_inputs The inputs to the model.
* @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
*/
async _call(model_inputs) {
return new MaskedLMOutput(await super._call(model_inputs));
}
}
/**
* MPNetForSequenceClassification is a class representing a MPNet model for sequence classification.
* @extends MPNetPreTrainedModel
*/
export class MPNetForSequenceClassification extends MPNetPreTrainedModel {
/**
* Calls the model on new inputs.
*
* @param {Object} model_inputs The inputs to the model.
* @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
*/
async _call(model_inputs) {
return new SequenceClassifierOutput(await super._call(model_inputs));
}
}
/**
* MPNetForTokenClassification is a class representing a MPNet model for token classification.
* @extends MPNetPreTrainedModel
*/
export class MPNetForTokenClassification extends MPNetPreTrainedModel {
/**
* Calls the model on new inputs.
*
* @param {Object} model_inputs The inputs to the model.
* @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
*/
async _call(model_inputs) {
return new TokenClassifierOutput(await super._call(model_inputs));
}
}
/**
* MPNetForQuestionAnswering is a class representing a MPNet model for question answering.
* @extends MPNetPreTrainedModel
*/
export class MPNetForQuestionAnswering extends MPNetPreTrainedModel {
/**
* Calls the model on new inputs.
*
* @param {Object} model_inputs The inputs to the model.
* @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
*/
async _call(model_inputs) {
return new QuestionAnsweringModelOutput(await super._call(model_inputs));
}
}
//////////////////////////////////////////////////
//////////////////////////////////////////////////
// SqueezeBert models
@ -2805,6 +2880,7 @@ export class PretrainedMixin {
const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
['bert', BertModel],
['mpnet', MPNetModel],
['albert', AlbertModel],
['distilbert', DistilBertModel],
['roberta', RobertaModel],
@ -2834,6 +2910,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([
const MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = new Map([
['bert', BertForSequenceClassification],
['mpnet', MPNetForSequenceClassification],
['albert', AlbertForSequenceClassification],
['distilbert', DistilBertForSequenceClassification],
['roberta', RobertaForSequenceClassification],
@ -2845,6 +2922,7 @@ const MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = new Map([
const MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = new Map([
['bert', BertForTokenClassification],
['mpnet', MPNetForTokenClassification],
['distilbert', DistilBertForTokenClassification],
['roberta', RobertaForTokenClassification],
['xlm-roberta', XLMRobertaForTokenClassification],
@ -2867,6 +2945,7 @@ const MODEL_WITH_LM_HEAD_MAPPING_NAMES = new Map([
const MODEL_FOR_MASKED_LM_MAPPING_NAMES = new Map([
['bert', BertForMaskedLM],
['mpnet', MPNetForMaskedLM],
['albert', AlbertForMaskedLM],
['distilbert', DistilBertForMaskedLM],
['roberta', RobertaForMaskedLM],
@ -2877,6 +2956,7 @@ const MODEL_FOR_MASKED_LM_MAPPING_NAMES = new Map([
const MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
['bert', BertForQuestionAnswering],
['mpnet', MPNetForQuestionAnswering],
['albert', AlbertForQuestionAnswering],
['distilbert', DistilBertForQuestionAnswering],
['roberta', RobertaForQuestionAnswering],