Add support for `mpnet` models (#221)

2023-07-27 05:59:23 +02:00 · 2023-07-27 05:59:23 +02:00 · f163f1a318
parent 09ff83b90e
commit f163f1a318
2 changed files with 81 additions and 0 deletions
--- a/docs/snippets/6_supported-models.snippet
+++ b/docs/snippets/6_supported-models.snippet
@ -15,6 +15,7 @@
 1. **[MarianMT](https://huggingface.co/docs/transformers/model_doc/marian)** Machine translation models trained using [OPUS](http://opus.nlpl.eu/) data by Jörg Tiedemann. The [Marian Framework](https://marian-nmt.github.io/) is being developed by the Microsoft Translator Team.
 1. **[MobileBERT](https://huggingface.co/docs/transformers/model_doc/mobilebert)** (from CMU/Google Brain) released with the paper [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) by Zhiqing Sun, Hongkun Yu, Xiaodan Song, Renjie Liu, Yiming Yang, and Denny Zhou.
 1. **[MobileViT](https://huggingface.co/docs/transformers/model_doc/mobilevit)** (from Apple) released with the paper [MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer](https://arxiv.org/abs/2110.02178) by Sachin Mehta and Mohammad Rastegari.
+1. **[MPNet](https://huggingface.co/docs/transformers/model_doc/mpnet)** (from Microsoft Research) released with the paper [MPNet: Masked and Permuted Pre-training for Language Understanding](https://arxiv.org/abs/2004.09297) by Kaitao Song, Xu Tan, Tao Qin, Jianfeng Lu, Tie-Yan Liu.
 1. **[MT5](https://huggingface.co/docs/transformers/model_doc/mt5)** (from Google AI) released with the paper [mT5: A massively multilingual pre-trained text-to-text transformer](https://arxiv.org/abs/2010.11934) by Linting Xue, Noah Constant, Adam Roberts, Mihir Kale, Rami Al-Rfou, Aditya Siddhant, Aditya Barua, Colin Raffel.
 1. **[NLLB](https://huggingface.co/docs/transformers/model_doc/nllb)** (from Meta) released with the paper [No Language Left Behind: Scaling Human-Centered Machine Translation](https://arxiv.org/abs/2207.04672) by the NLLB team.
 1. **[RoBERTa](https://huggingface.co/docs/transformers/model_doc/roberta)** (from Facebook), released together with the paper [RoBERTa: A Robustly Optimized BERT Pretraining Approach](https://arxiv.org/abs/1907.11692) by Yinhan Liu, Myle Ott, Naman Goyal, Jingfei Du, Mandar Joshi, Danqi Chen, Omer Levy, Mike Lewis, Luke Zettlemoyer, Veselin Stoyanov.
--- a/src/models.js
+++ b/src/models.js
@ -1394,6 +1394,81 @@ export class MobileBertForQuestionAnswering extends MobileBertPreTrainedModel {
 }
 //////////////////////////////////////////////////

+//////////////////////////////////////////////////
+// MPNet models
+export class MPNetPreTrainedModel extends PreTrainedModel { }
+
+/**
+ * The bare MPNet Model transformer outputting raw hidden-states without any specific head on top.
+ * @extends MPNetPreTrainedModel
+ */
+export class MPNetModel extends MPNetPreTrainedModel { }
+
+/**
+ * MPNetForMaskedLM is a class representing a MPNet model for masked language modeling.
+ * @extends MPNetPreTrainedModel
+ */
+export class MPNetForMaskedLM extends MPNetPreTrainedModel {
+    /**
+     * Calls the model on new inputs.
+     *
+     * @param {Object} model_inputs The inputs to the model.
+     * @returns {Promise<MaskedLMOutput>} An object containing the model's output logits for masked language modeling.
+     */
+    async _call(model_inputs) {
+        return new MaskedLMOutput(await super._call(model_inputs));
+    }
+}
+
+/**
+ * MPNetForSequenceClassification is a class representing a MPNet model for sequence classification.
+ * @extends MPNetPreTrainedModel
+ */
+export class MPNetForSequenceClassification extends MPNetPreTrainedModel {
+    /**
+     * Calls the model on new inputs.
+     *
+     * @param {Object} model_inputs The inputs to the model.
+     * @returns {Promise<SequenceClassifierOutput>} An object containing the model's output logits for sequence classification.
+     */
+    async _call(model_inputs) {
+        return new SequenceClassifierOutput(await super._call(model_inputs));
+    }
+}
+
+/**
+ * MPNetForTokenClassification is a class representing a MPNet model for token classification.
+ * @extends MPNetPreTrainedModel
+ */
+export class MPNetForTokenClassification extends MPNetPreTrainedModel {
+    /**
+     * Calls the model on new inputs.
+     *
+     * @param {Object} model_inputs The inputs to the model.
+     * @returns {Promise<TokenClassifierOutput>} An object containing the model's output logits for token classification.
+     */
+    async _call(model_inputs) {
+        return new TokenClassifierOutput(await super._call(model_inputs));
+    }
+}
+
+/**
+ * MPNetForQuestionAnswering is a class representing a MPNet model for question answering.
+ * @extends MPNetPreTrainedModel
+ */
+export class MPNetForQuestionAnswering extends MPNetPreTrainedModel {
+    /**
+     * Calls the model on new inputs.
+     *
+     * @param {Object} model_inputs The inputs to the model.
+     * @returns {Promise<QuestionAnsweringModelOutput>} An object containing the model's output logits for question answering.
+     */
+    async _call(model_inputs) {
+        return new QuestionAnsweringModelOutput(await super._call(model_inputs));
+    }
+}
+//////////////////////////////////////////////////
+

 //////////////////////////////////////////////////
 // SqueezeBert models
@ -2805,6 +2880,7 @@ export class PretrainedMixin {

 const MODEL_MAPPING_NAMES_ENCODER_ONLY = new Map([
    ['bert', BertModel],
+    ['mpnet', MPNetModel],
    ['albert', AlbertModel],
    ['distilbert', DistilBertModel],
    ['roberta', RobertaModel],
@ -2834,6 +2910,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([

 const MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = new Map([
    ['bert', BertForSequenceClassification],
+    ['mpnet', MPNetForSequenceClassification],
    ['albert', AlbertForSequenceClassification],
    ['distilbert', DistilBertForSequenceClassification],
    ['roberta', RobertaForSequenceClassification],
@ -2845,6 +2922,7 @@ const MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES = new Map([

 const MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES = new Map([
    ['bert', BertForTokenClassification],
+    ['mpnet', MPNetForTokenClassification],
    ['distilbert', DistilBertForTokenClassification],
    ['roberta', RobertaForTokenClassification],
    ['xlm-roberta', XLMRobertaForTokenClassification],
@ -2867,6 +2945,7 @@ const MODEL_WITH_LM_HEAD_MAPPING_NAMES = new Map([

 const MODEL_FOR_MASKED_LM_MAPPING_NAMES = new Map([
    ['bert', BertForMaskedLM],
+    ['mpnet', MPNetForMaskedLM],
    ['albert', AlbertForMaskedLM],
    ['distilbert', DistilBertForMaskedLM],
    ['roberta', RobertaForMaskedLM],
@ -2877,6 +2956,7 @@ const MODEL_FOR_MASKED_LM_MAPPING_NAMES = new Map([

 const MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES = new Map([
    ['bert', BertForQuestionAnswering],
+    ['mpnet', MPNetForQuestionAnswering],
    ['albert', AlbertForQuestionAnswering],
    ['distilbert', DistilBertForQuestionAnswering],
    ['roberta', RobertaForQuestionAnswering],