transformers/notebooks/05-benchmark.ipynb

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "05-benchmark",
      "provenance": [],
      "collapsed_sections": [],
      "authorship_tag": "ABX9TyNQ2BQG0erOGhTFF/2Mdn5a",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "975f42d7b55c4d0caf229cd4c16df5d2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_69b36685703342eaa80b6f0e01f94e04",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_c8acb33d6a254607a6340c0aa33446f3",
              "IPY_MODEL_a6c3647736554beea36db798827203b2"
            ]
          }
        },
        "69b36685703342eaa80b6f0e01f94e04": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "c8acb33d6a254607a6340c0aa33446f3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_e812aaf8214c4ad983f41804cb82562b",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 908,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 908,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_eed2ce14188a453ca296601ca39133b6"
          }
        },
        "a6c3647736554beea36db798827203b2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_548f91729b8d4f3aa81f78c7a1620101",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 908/908 [00:00&lt;00:00, 30.1kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_900c1cb473f54b48a59226c61fafd626"
          }
        },
        "e812aaf8214c4ad983f41804cb82562b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "eed2ce14188a453ca296601ca39133b6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "548f91729b8d4f3aa81f78c7a1620101": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "900c1cb473f54b48a59226c61fafd626": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/huggingface/transformers/blob/add_benchmark_notebook/05_benchmark.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "jG-SjOQTskcX",
        "colab_type": "text"
      },
      "source": [
        "## **How to benchmark models with Transformers**\n",
        "\n",
        "With ever-larger language models, it is no longer enough to just \n",
        "compare models on their performance on a specific task. One should always be aware of the computational cost that is attached to a specific model. For a given computation environment (*e.g.* type of GPU), the computational cost of training a model or deploying it in inference usually depends only on **the required memory** and **the required time**. \n",
        "\n",
        "Being able to accurately benchmark language models on both *speed* and *required memory* is therefore very important.\n",
        "\n",
        "HuggingFace's Transformer library allows users to benchmark models for both TensorFlow 2 and PyTorch using the `PyTorchBenchmark` and `TensorFlowBenchmark` classes.\n",
        "\n",
        "The currently available features for `PyTorchBenchmark` are summarized in the following table.\n",
        "\n",
        "\n",
        "| | CPU | CPU + torchscript | GPU | GPU + torchscript | GPU + FP16 | TPU |\n",
        ":-- | :--- | :--- | :--- | :--- | :--- | :--- |\n",
        "**Speed - Inference** | ✔ | ✔ | ✔ | ✔ | ✔ | ✔ |\n",
        "**Memory - Inference** | ✔ | ✔ | ✔ | ✔ | ✔ | ✘ |\n",
        "**Speed - Train** | ✔ | ✘ | ✔ | ✘ | ✔ | ✔ |\n",
        "**Memory - Train** | ✔ | ✘ | ✔ | ✘ | ✔ | ✘ |\n",
        "\n",
        "\n",
        "*   *FP16* stands for mixed-precision meaning that computations within the model are done using a mixture of 16-bit and 32-bit floating-point operations, see [here](https://pytorch.org/docs/stable/nn.html#torch.nn.Module.half) for more detail.\n",
        "\n",
        "*   *torchscript* corresponds to PyTorch's torchscript format, see [here](https://pytorch.org/docs/stable/jit.html).\n",
        "\n",
        "The currently available features for `TensorFlowBenchmark` are summarized in the following table.\n",
        "\n",
        "| | CPU | CPU + eager execution | GPU | GPU + eager execution | GPU + XLA | GPU + FP16 | TPU |\n",
        ":-- | :--- | :--- | :--- | :--- | :--- | :--- | :--- |\n",
        "**Speed - Inference** | ✔ | ✔ | ✔ | ✔ | ✔ | ✘ | ✔ |\n",
        "**Memory - Inference** | ✔ | ✔ | ✔ | ✔ | ✔ | ✘ | ✘ |\n",
        "**Speed - Train** | ✘ | ✘ | ✘ | ✘ | ✘ | ✘ | ✘ |\n",
        "**Memory - Train** | ✘ | ✘ | ✘ | ✘ | ✘ | ✘ | ✘ |\n",
        "\n",
        "*   *eager execution* means that the function is run in the eager execution environment of TensorFlow 2, see [here](https://www.tensorflow.org/guide/eager).\n",
        "\n",
        "*   *XLA* stands for TensorFlow's Accelerated Linear Algebra (XLA) compiler, see [here](https://www.tensorflow.org/xla)\n",
        "\n",
        "*   *FP16* stands for TensorFlow's mixed-precision package and is analogous to PyTorch's FP16 feature, see [here](https://www.tensorflow.org/guide/mixed_precision).\n",
        "\n",
        "***Note***: In ~1,2 weeks it will also be possible to benchmark training in TensorFlow.\n",
        "\n",
        "\n",
        "This notebook will show the user how to use `PyTorchBenchmark` and `TensorFlowBenchmark` for two different scenarios:\n",
        "\n",
        "1. **Inference - Pre-trained Model Comparison** - *A user wants to implement a pre-trained model in production for inference. She wants to compare different models on speed and required memory.*\n",
        "\n",
        "2. **Training - Configuration Comparison** - *A user wants to train a specific model and searches that for himself most effective model configuration.*\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "j-jvAvZ1-GIh",
        "colab_type": "text"
      },
      "source": [
        "### **Inference - Pre-trained Model Comparison**\n",
        "\n",
        "Let's say we want to employ a question-answering model in production. The questions are expected to be of the same format as in **SQuAD v2**, so that the model to choose should have been fine-tuned on this dataset. \n",
        "\n",
        "HuggingFace's new dataset [webpage](https://huggingface.co/datasets) lets the user see all relevant information about a dataset and even links the models that have been fine-tuned on this specific dataset. Let's check out the dataset webpage of SQuAD v2 [here](https://huggingface.co/datasets/squad_v2).\n",
        "\n",
        "Nice, we can see that there are 7 available models.\n",
        "\n",
        "![Texte alternatif…](https://raw.githubusercontent.com/patrickvonplaten/scientific_images/master/squad_v2_dataset.png)\n",
        "\n",
        "Let's assume that we have decided to restrict our pipeline to \"encoder-only\" models so that we are left with:\n",
        "\n",
        "- `a-ware/roberta-large-squad-classification`\n",
        "- `a-ware/xlmroberta-squadv2`\n",
        "- `aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2`\n",
        "- `deepset/roberta-base-squad2`\n",
        "- `mrm8488/longformer-base-4096-finetuned-squadv2`\n",
        "\n",
        "Great! In this notebook, we will now benchmark these models on both peak memory consumption and inference time to decide which model should be employed in production.\n",
        "\n",
        "***Note***: None of the models has been tested on performance so that we will just assume that all models perform more or less equally well. The purpose of this notebook is not to find the best model for SQuAD v2, but to showcase how Transformers benchmarking tools can be leveraged.\n",
        "\n",
        "First, we assume to be limited by the available GPU on this google colab, which in this copy amounts to 16 GB of RAM."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "2l9C7d7K5-G4",
        "colab_type": "text"
      },
      "source": [
        "In a first step, we will check which models are the most memory-efficient ones.\n",
        "Let's make sure 100% of the GPU is available to us in this notebook."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "M7cQmgM5TvlO",
        "colab_type": "code",
        "cellView": "form",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 67
        },
        "outputId": "2797c14e-a62d-42cc-97a6-6c61b015d569"
      },
      "source": [
        "#@title Check available memory of GPU\n",
        "# Check that we are using 100% of GPU\n",
        "# memory footprint support libraries/code\n",
        "!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi\n",
        "!pip -q install gputil\n",
        "!pip -q install psutil\n",
        "!pip -q install humanize\n",
        "import psutil\n",
        "import humanize\n",
        "import os\n",
        "import GPUtil as GPU\n",
        "GPUs = GPU.getGPUs()\n",
        "# XXX: only one GPU on Colab and isn’t guaranteed\n",
        "gpu = GPUs[0]\n",
        "def printm():\n",
        " process = psutil.Process(os.getpid())\n",
        " print(\"Gen RAM Free: \" + humanize.naturalsize( psutil.virtual_memory().available ), \" | Proc size: \" + humanize.naturalsize( process.memory_info().rss))\n",
        " print(\"GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB\".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))\n",
        "printm()"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "  Building wheel for gputil (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Gen RAM Free: 12.8 GB  | Proc size: 160.0 MB\n",
            "GPU RAM Free: 16280MB | Used: 0MB | Util   0% | Total 16280MB\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "NuS2CKuQ4qSk",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# If GPU RAM Util > 0% => crash notebook on purpose\n",
        "# !kill -9 -1"
      ],
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ikdYDXsj6Nzv",
        "colab_type": "text"
      },
      "source": [
        "Looks good! Now we import `transformers` and download the scripts `run_benchmark.py`, `run_benchmark_tf.py`, and `plot_csv_file.py` which can be found under `transformers/examples/benchmarking`.\n",
        "\n",
        "`run_benchmark_tf.py` and `run_benchmark.py` are very simple scripts leveraging the `PyTorchBenchmark` and `TensorFlowBenchmark` classes, respectively."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Dylftiyd1IG1",
        "colab_type": "code",
        "cellView": "both",
        "colab": {}
      },
      "source": [
        "# install transformes\n",
        "!pip uninstall -y transformers\n",
        "!pip install -q git+https://github.com/huggingface/transformers.git\n",
        "\n",
        "# install py3nvml to track GPU memory usage\n",
        "!pip install -q py3nvml\n",
        "\n",
        "!rm -f run_benchmark.py\n",
        "!rm -f run_benchmark_tf.py\n",
        "!rm -f plot_csv_file.py\n",
        "!wget https://raw.githubusercontent.com/huggingface/transformers/master/examples/benchmarking/run_benchmark.py -qq\n",
        "!wget https://raw.githubusercontent.com/huggingface/transformers/master/examples/benchmarking/run_benchmark_tf.py -qq\n",
        "!wget https://raw.githubusercontent.com/huggingface/transformers/master/examples/benchmarking/plot_csv_file.py -qq\n",
        "\n",
        "# import pandas to pretty print csv files\n",
        "import pandas as pd"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "C4nz5nGFkOrK",
        "colab_type": "text"
      },
      "source": [
        "Information about the input arguments to the *run_benchmark* scripts can be accessed by running `!python run_benchmark.py --help` for PyTorch and `!python run_benchmark_tf.py --help` for TensorFlow."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "zu7Oufe0jcAj",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "bc52dea5-b721-410c-cf3b-8a7b983a558e"
      },
      "source": [
        "!python run_benchmark.py --help"
      ],
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "2020-06-26 11:51:47.129203: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1\n",
            "usage: run_benchmark.py [-h] [--models MODELS [MODELS ...]]\n",
            "                        [--batch_sizes BATCH_SIZES [BATCH_SIZES ...]]\n",
            "                        [--sequence_lengths SEQUENCE_LENGTHS [SEQUENCE_LENGTHS ...]]\n",
            "                        [--no_inference] [--no_cuda] [--no_tpu] [--fp16]\n",
            "                        [--training] [--verbose] [--no_speed] [--no_memory]\n",
            "                        [--trace_memory_line_by_line] [--save_to_csv]\n",
            "                        [--log_print] [--no_env_print] [--no_multi_process]\n",
            "                        [--with_lm_head]\n",
            "                        [--inference_time_csv_file INFERENCE_TIME_CSV_FILE]\n",
            "                        [--inference_memory_csv_file INFERENCE_MEMORY_CSV_FILE]\n",
            "                        [--train_time_csv_file TRAIN_TIME_CSV_FILE]\n",
            "                        [--train_memory_csv_file TRAIN_MEMORY_CSV_FILE]\n",
            "                        [--env_info_csv_file ENV_INFO_CSV_FILE]\n",
            "                        [--log_filename LOG_FILENAME] [--repeat REPEAT]\n",
            "                        [--only_pretrain_model] [--torchscript]\n",
            "                        [--torch_xla_tpu_print_metrics]\n",
            "                        [--fp16_opt_level FP16_OPT_LEVEL]\n",
            "\n",
            "optional arguments:\n",
            "  -h, --help            show this help message and exit\n",
            "  --models MODELS [MODELS ...]\n",
            "                        Model checkpoints to be provided to the AutoModel\n",
            "                        classes. Leave blank to benchmark the base version of\n",
            "                        all available models\n",
            "  --batch_sizes BATCH_SIZES [BATCH_SIZES ...]\n",
            "                        List of batch sizes for which memory and time\n",
            "                        performance will be evaluated\n",
            "  --sequence_lengths SEQUENCE_LENGTHS [SEQUENCE_LENGTHS ...]\n",
            "                        List of sequence lengths for which memory and time\n",
            "                        performance will be evaluated\n",
            "  --no_inference        Don't benchmark inference of model\n",
            "  --no_cuda             Whether to run on available cuda devices\n",
            "  --no_tpu              Whether to run on available tpu devices\n",
            "  --fp16                Use FP16 to accelerate inference.\n",
            "  --training            Benchmark training of model\n",
            "  --verbose             Verbose memory tracing\n",
            "  --no_speed            Don't perform speed measurments\n",
            "  --no_memory           Don't perform memory measurments\n",
            "  --trace_memory_line_by_line\n",
            "                        Trace memory line by line\n",
            "  --save_to_csv         Save result to a CSV file\n",
            "  --log_print           Save all print statements in a log file\n",
            "  --no_env_print        Don't print environment information\n",
            "  --no_multi_process    Don't use multiprocessing for memory and speed\n",
            "                        measurement. It is highly recommended to use\n",
            "                        multiprocessing for accurate CPU and GPU memory\n",
            "                        measurements. This option should only be used for\n",
            "                        debugging / testing and on TPU.\n",
            "  --with_lm_head        Use model with its language model head\n",
            "                        (MODEL_WITH_LM_HEAD_MAPPING instead of MODEL_MAPPING)\n",
            "  --inference_time_csv_file INFERENCE_TIME_CSV_FILE\n",
            "                        CSV filename used if saving time results to csv.\n",
            "  --inference_memory_csv_file INFERENCE_MEMORY_CSV_FILE\n",
            "                        CSV filename used if saving memory results to csv.\n",
            "  --train_time_csv_file TRAIN_TIME_CSV_FILE\n",
            "                        CSV filename used if saving time results to csv for\n",
            "                        training.\n",
            "  --train_memory_csv_file TRAIN_MEMORY_CSV_FILE\n",
            "                        CSV filename used if saving memory results to csv for\n",
            "                        training.\n",
            "  --env_info_csv_file ENV_INFO_CSV_FILE\n",
            "                        CSV filename used if saving environment information.\n",
            "  --log_filename LOG_FILENAME\n",
            "                        Log filename used if print statements are saved in\n",
            "                        log.\n",
            "  --repeat REPEAT       Times an experiment will be run.\n",
            "  --only_pretrain_model\n",
            "                        Instead of loading the model as defined in\n",
            "                        `config.architectures` if exists, just load the\n",
            "                        pretrain model weights.\n",
            "  --torchscript         Trace the models using torchscript\n",
            "  --torch_xla_tpu_print_metrics\n",
            "                        Print Xla/PyTorch tpu metrics\n",
            "  --fp16_opt_level FP16_OPT_LEVEL\n",
            "                        For fp16: Apex AMP optimization level selected in\n",
            "                        ['O0', 'O1', 'O2', and 'O3'].See details at\n",
            "                        https://nvidia.github.io/apex/amp.html\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Q_3TZshjcrjP",
        "colab_type": "text"
      },
      "source": [
        "Great, we are ready to run our first memory benchmark. By default, both the *required memory* and *time* for inference is enabled. To disable benchmarking on *time*, we add `--no_speed`.\n",
        "\n",
        "The only required parameter is `--models` which expects a list of model identifiers as defined on the [model hub](https://huggingface.co/models). Here we add the five model identifiers listed above.\n",
        "\n",
        "Next, we define the `sequence_lengths` and `batch_sizes` for which the peak memory is calculated.\n",
        "\n",
        "Finally, because the results should be stored in a *CSV* file, the option `--save_to_csv` is added and the path to save the results is added via the `--inference_memory_csv_file` argument. \n",
        "Whenever a benchmark is run, the environment information, *e.g.* GPU type, library versions, ... can be saved using the `--env_info_csv_file` argument."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ykJqt7MEbHIq",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# create plots folder in content\n",
        "!mkdir -p plots_pt"
      ],
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "TSJgpQxBe-Fj",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# run benchmark\n",
        "!python run_benchmark.py --no_speed --save_to_csv \\\n",
        "                                --models a-ware/roberta-large-squad-classification \\\n",
        "                                  a-ware/xlmroberta-squadv2 \\\n",
        "                                  aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2 \\\n",
        "                                  deepset/roberta-base-squad2 \\\n",
        "                                  mrm8488/longformer-base-4096-finetuned-squadv2 \\\n",
        "                                --sequence_lengths 32 128 512 1024 \\\n",
        "                                --batch_sizes 32 \\\n",
        "                                --inference_memory_csv_file plots_pt/required_memory.csv \\\n",
        "                                --env_info_csv_file plots_pt/env.csv >/dev/null 2>&1  # redirect all prints"
      ],
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ESHrlnKik396",
        "colab_type": "text"
      },
      "source": [
        "Under `plots_pt`, two files are now created: `required_memory.csv` and `env.csv`. Let's check out `required_memory.csv` first."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "rPg_7fPnuDUa",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 639
        },
        "outputId": "b6272763-7235-43c6-c457-0a4a13bb02e5"
      },
      "source": [
        "df = pd.read_csv('plots_pt/required_memory.csv')\n",
        "df"
      ],
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>model</th>\n",
              "      <th>batch_size</th>\n",
              "      <th>sequence_length</th>\n",
              "      <th>result</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>a-ware/roberta-large-squad-classification</td>\n",
              "      <td>32</td>\n",
              "      <td>32</td>\n",
              "      <td>2219.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>a-ware/roberta-large-squad-classification</td>\n",
              "      <td>32</td>\n",
              "      <td>128</td>\n",
              "      <td>2455.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>a-ware/roberta-large-squad-classification</td>\n",
              "      <td>32</td>\n",
              "      <td>512</td>\n",
              "      <td>3641.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>a-ware/roberta-large-squad-classification</td>\n",
              "      <td>32</td>\n",
              "      <td>1024</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>a-ware/xlmroberta-squadv2</td>\n",
              "      <td>32</td>\n",
              "      <td>32</td>\n",
              "      <td>2999.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>a-ware/xlmroberta-squadv2</td>\n",
              "      <td>32</td>\n",
              "      <td>128</td>\n",
              "      <td>3235.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>a-ware/xlmroberta-squadv2</td>\n",
              "      <td>32</td>\n",
              "      <td>512</td>\n",
              "      <td>4421.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>a-ware/xlmroberta-squadv2</td>\n",
              "      <td>32</td>\n",
              "      <td>1024</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200...</td>\n",
              "      <td>32</td>\n",
              "      <td>32</td>\n",
              "      <td>1025.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200...</td>\n",
              "      <td>32</td>\n",
              "      <td>128</td>\n",
              "      <td>1143.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200...</td>\n",
              "      <td>32</td>\n",
              "      <td>512</td>\n",
              "      <td>1719.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200...</td>\n",
              "      <td>32</td>\n",
              "      <td>1024</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>deepset/roberta-base-squad2</td>\n",
              "      <td>32</td>\n",
              "      <td>32</td>\n",
              "      <td>1373.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13</th>\n",
              "      <td>deepset/roberta-base-squad2</td>\n",
              "      <td>32</td>\n",
              "      <td>128</td>\n",
              "      <td>1533.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>14</th>\n",
              "      <td>deepset/roberta-base-squad2</td>\n",
              "      <td>32</td>\n",
              "      <td>512</td>\n",
              "      <td>2433.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>15</th>\n",
              "      <td>deepset/roberta-base-squad2</td>\n",
              "      <td>32</td>\n",
              "      <td>1024</td>\n",
              "      <td>NaN</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>16</th>\n",
              "      <td>mrm8488/longformer-base-4096-finetuned-squadv2</td>\n",
              "      <td>32</td>\n",
              "      <td>32</td>\n",
              "      <td>3783.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>17</th>\n",
              "      <td>mrm8488/longformer-base-4096-finetuned-squadv2</td>\n",
              "      <td>32</td>\n",
              "      <td>128</td>\n",
              "      <td>3783.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>18</th>\n",
              "      <td>mrm8488/longformer-base-4096-finetuned-squadv2</td>\n",
              "      <td>32</td>\n",
              "      <td>512</td>\n",
              "      <td>3783.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>19</th>\n",
              "      <td>mrm8488/longformer-base-4096-finetuned-squadv2</td>\n",
              "      <td>32</td>\n",
              "      <td>1024</td>\n",
              "      <td>6427.0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "                                                model  ...  result\n",
              "0           a-ware/roberta-large-squad-classification  ...  2219.0\n",
              "1           a-ware/roberta-large-squad-classification  ...  2455.0\n",
              "2           a-ware/roberta-large-squad-classification  ...  3641.0\n",
              "3           a-ware/roberta-large-squad-classification  ...     NaN\n",
              "4                           a-ware/xlmroberta-squadv2  ...  2999.0\n",
              "5                           a-ware/xlmroberta-squadv2  ...  3235.0\n",
              "6                           a-ware/xlmroberta-squadv2  ...  4421.0\n",
              "7                           a-ware/xlmroberta-squadv2  ...     NaN\n",
              "8   aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200...  ...  1025.0\n",
              "9   aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200...  ...  1143.0\n",
              "10  aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200...  ...  1719.0\n",
              "11  aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200...  ...     NaN\n",
              "12                        deepset/roberta-base-squad2  ...  1373.0\n",
              "13                        deepset/roberta-base-squad2  ...  1533.0\n",
              "14                        deepset/roberta-base-squad2  ...  2433.0\n",
              "15                        deepset/roberta-base-squad2  ...     NaN\n",
              "16     mrm8488/longformer-base-4096-finetuned-squadv2  ...  3783.0\n",
              "17     mrm8488/longformer-base-4096-finetuned-squadv2  ...  3783.0\n",
              "18     mrm8488/longformer-base-4096-finetuned-squadv2  ...  3783.0\n",
              "19     mrm8488/longformer-base-4096-finetuned-squadv2  ...  6427.0\n",
              "\n",
              "[20 rows x 4 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 7
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "o2LnaVpyW9TB",
        "colab_type": "text"
      },
      "source": [
        "Each row in the csv file lists one data point showing the *peak memory* usage for a given model, batch_size and sequence_length. As can be seen, some values have a *NaN* result meaning that an *Out-of-Memory* Error occurred. To better visualize the results, one can make use of the `plot_csv_file.py` script.\n",
        "\n",
        "Before, let's take a look at the information about our computation environment."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "y6n49pbIXI6E",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 639
        },
        "outputId": "495f011c-87c9-43a1-e1d4-a6501c327e76"
      },
      "source": [
        "df = pd.read_csv('plots_pt/env.csv')\n",
        "df"
      ],
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>transformers_version</th>\n",
              "      <th>2.11.0</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>framework</td>\n",
              "      <td>PyTorch</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>use_torchscript</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>framework_version</td>\n",
              "      <td>1.5.1+cu101</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>python_version</td>\n",
              "      <td>3.6.9</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>system</td>\n",
              "      <td>Linux</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>cpu</td>\n",
              "      <td>x86_64</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>architecture</td>\n",
              "      <td>64bit</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>date</td>\n",
              "      <td>2020-06-26</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>time</td>\n",
              "      <td>11:56:37.277009</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>fp16</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>use_multiprocessing</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>only_pretrain_model</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>cpu_ram_mb</td>\n",
              "      <td>13021</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13</th>\n",
              "      <td>use_gpu</td>\n",
              "      <td>True</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>14</th>\n",
              "      <td>num_gpus</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>15</th>\n",
              "      <td>gpu</td>\n",
              "      <td>Tesla P100-PCIE-16GB</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>16</th>\n",
              "      <td>gpu_ram_mb</td>\n",
              "      <td>16280</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>17</th>\n",
              "      <td>gpu_power_watts</td>\n",
              "      <td>250.0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>18</th>\n",
              "      <td>gpu_performance_state</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>19</th>\n",
              "      <td>use_tpu</td>\n",
              "      <td>False</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "     transformers_version                2.11.0\n",
              "0               framework               PyTorch\n",
              "1         use_torchscript                 False\n",
              "2       framework_version           1.5.1+cu101\n",
              "3          python_version                 3.6.9\n",
              "4                  system                 Linux\n",
              "5                     cpu                x86_64\n",
              "6            architecture                 64bit\n",
              "7                    date            2020-06-26\n",
              "8                    time       11:56:37.277009\n",
              "9                    fp16                 False\n",
              "10    use_multiprocessing                  True\n",
              "11    only_pretrain_model                 False\n",
              "12             cpu_ram_mb                 13021\n",
              "13                use_gpu                  True\n",
              "14               num_gpus                     1\n",
              "15                    gpu  Tesla P100-PCIE-16GB\n",
              "16             gpu_ram_mb                 16280\n",
              "17        gpu_power_watts                 250.0\n",
              "18  gpu_performance_state                     0\n",
              "19                use_tpu                 False"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 8
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "z316Xf2oXTZz",
        "colab_type": "text"
      },
      "source": [
        "We can see all relevant information here: the PyTorch version, the Python version, the system, the type of GPU, and available RAM on the GPU, etc...\n",
        "\n",
        "**Note**: A different GPU is likely assigned to a copy of this notebook, so that all of the following results may be different. It is very important to always include the environment information when benchmarking your models for both reproducibility and transparency to other users.\n",
        "\n",
        "Alright, let's plot the results."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "yHYUqRzWy8sp",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 514
        },
        "outputId": "22499f33-bafc-42b3-f1b7-fcb202df9cd2"
      },
      "source": [
        "# plot graph and save as image\n",
        "!python plot_csv_file.py --csv_file plots_pt/required_memory.csv --figure_png_file=plots_pt/required_memory_plot.png --no_log_scale --short_model_names a-ware-roberta a-aware-xlm aodiniz-bert deepset-roberta mrm8488-long\n",
        "\n",
        "# show image\n",
        "from IPython.display import Image\n",
        "Image('plots_pt/required_memory_plot.png')"
      ],
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "2020-06-26 11:56:39.671579: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAoAAAAHgCAYAAAA10dzkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdd1gU19cH8O8uZekdaSLVAioWjAKCaNRgwy4oiYAaNdGIml80ahIVa4gaNRoLJjG2RFTsiYpRUFQkmtiwIBKsYAEEFJG25/2DdycOuzTFYML5PA+Pzp07d+6UvXP2zp1ZCRERGGOMMcZYvSGt6wowxhhjjLF/FgeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1DAeAjDHGGGP1TIUBoL29PUJDQ1+64EWLFsHR0RFqampo3br1S5fD6jd7e3v06dOnrqvB6rHOnTujc+fOdV2Nf7Uff/wREokEN2/eFNJeZb+GhobC3t6+Vuo2e/ZsSCQSZGZm1kp5/2USiQSzZ8+u62r8J7wJ591r6QGMiYnB1KlT0bFjR6xfvx4LFix4HathrFalp6dj9uzZOH/+fF1XhTH2H7Vq1Sr8+OOPdV0N9oJTp05h9uzZyMnJqeuqvDaqzjv1ijInJydDKn25+PDo0aOQSqX4/vvvoamp+VJlMPZPS09PR3h4OOzt7bnXmrHXLCYm5qWXXbduHeRyeS3W5p+zatUqmJmZvdIdNla7Tp06hfDwcISGhsLIyKiuq/NaqDrvKozwZDIZNDQ0XmpFDx8+hLa2dq0Gf8+ePau1sljlSkpKUFRU9FrKzs/Pfy3lvorXub2s9r2J5xCrOU1NzZe+RmhoaEAmk9VyjV4vvob9t72p15HKzrtqjwFUjOE4efIkPv74Y5ibm0NXVxcDBgzAo0ePhHwSiQTr169Hfn4+JBIJJBKJqNtx8+bNcHd3h7a2NkxMTDB06FDcuXNHtO7OnTujRYsW+OOPP9CpUyfo6OhgxowZAIDCwkLMmjULzs7OkMlksLW1xdSpU1FYWCgqQyKR4KOPPsLu3bvRokULyGQyNG/eHAcPHlTa1nv37mHUqFGwtraGTCaDg4MDPvzwQ9HBzMnJwaRJk2BrawuZTAZnZ2dERERU61vonj170Lt3b6F8JycnzJ07F6WlpVUu+/HHH8PU1BREJKRNmDABEokE33zzjZD24MEDSCQSrF69GgBQVFSEmTNnwt3dHYaGhtDV1YWPjw9iY2NF5d+8eRMSiQSLFy/GsmXL4OTkBJlMhitXrgAArl27hsGDB8PExARaWlpo164d9u7dW2W9gb/HOFy5cgVBQUEwNjaGt7c3gLIPy9y5c4X12dvbY8aMGUrHUSEmJgatW7eGlpYWXF1dsXPnTqU81TlGFW3vqlWr8NZbbwEARowYoXTuxsfHY8iQIWjUqJFw3k2ePBkFBQWV7oOzZ89CIpFgw4YNSvMOHToEiUSC/fv3AwCePHmCSZMmwd7eHjKZDA0aNED37t3x559/Vr2zy6nu8X/V5du2bYuBAweK0lq2bAmJRIKLFy8KaVFRUZBIJLh69SoA4NatWxg3bhyaNm0KbW1tmJqaYsiQIaIxYsDf7c6xY8cwbtw4NGjQAA0bNhTmHzhwAD4+PtDV1YW+vj569+6Ny5cvV7l9V69ehba2NoKDg0XpJ06cgJqaGj799NMKl42Li4NEIsG2bdsQHh4OGxsb6OvrY/DgwcjNzUVhYSEmTZqEBg0aQE9PDyNGjKjwvFb46KOPoKenp7KxHjZsGCwtLYX24uzZs/Dz84OZmRm0tbXh4OCAkSNHVrnNqly8eBGhoaFwdHSElpYWLC0tMXLkSGRlZSnlPXfuHHr27AkDAwPo6emha9euOH36tFK+y5cv4+2334a2tjYaNmyIefPmqWwny48BfHG/zp8/Hw0bNoSWlha6du2KGzduiJYtPwawc+fOwme2/F91b7dmZmYiICAABgYGMDU1xcSJE/H8+XOlfK9yDbO3t8fly5dx7NgxoX4VjYMsLi6GiYkJRowYoTQvLy8PWlpa+OSTT4S0FStWoHnz5tDR0YGxsTHatWuHn376qVrbXl5hYSEmT54Mc3Nz6Ovro2/fvrh7967KvPfu3cPIkSNhYWEhXGd/+OEHlWXW5Nq9ZcsWNG3aFFpaWnB3d8fx48dF+arbXiYmJqJHjx4wNDSEjo4OfH19cfLkSWH+7NmzMWXKFACAg4ODcFzKt0Uvquq6efToUaFdMjIyQr9+/YS2r7w6Pe+oAnZ2dhQSEiJMr1+/ngBQmzZt6O2336YVK1bQ//73P1JTU6OAgAAh36ZNm8jHx4dkMhlt2rSJNm3aRKmpqURENG/ePJJIJBQYGEirVq2i8PBwMjMzI3t7e3r8+LFQhq+vL1laWpK5uTlNmDCB1q5dS7t376bS0lJ65513SEdHhyZNmkRr166ljz76iNTV1alfv36i+gOgVq1akZWVFc2dO5eWLVtGjo6OpKOjQ5mZmUK+e/fukbW1tVDmmjVr6IsvviAXFxehTvn5+eTm5kampqY0Y8YMWrNmDQUHB5NEIqGJEydWtAsF/fv3p4CAAFq0aBGtXr2ahgwZQgDok08+qXLZnTt3EgC6dOmSkNaqVSuSSqU0ePBgIW379u0EgJKSkoiI6NGjR2RlZUUff/wxrV69mr766itq2rQpaWho0Llz54Tl0tLSCAC5urqSo6Mjffnll7R06VK6desWJSUlkaGhIbm6ulJERAStXLmSOnXqRBKJhHbu3Fll3WfNmiWU3a9fP1q1ahV9++23REQUEhJCAGjw4MH07bffUnBwMAGg/v37i8qws7OjJk2akJGREU2bNo2+/vpratmyJUmlUoqJiRHyVfcYVbS9N2/epDlz5hAAGjNmjNK5O2HCBOrVqxctWLCA1q5dS6NGjSI1NTXRMaiIo6Mj9erVSyl9xIgRZGxsTEVFRUREFBQURJqamvTxxx/Td999RxEREeTv70+bN2+uch3lVff4v+ryYWFhZG5uLkxnZWWRRCIhqVRKK1euFNLHjx8vyrd9+3Zq1aoVzZw5kyIjI2nGjBlkbGxMdnZ2lJ+fL+RTtDuurq7k6+tLK1asoC+//JKIiDZu3EgSiYR69OhBK1asoIiICLK3tycjIyNKS0urchsXLVpEAGjPnj1ERPT06VNycnIiV1dXev78uZDP19eXfH19henY2FgCQK1btyZPT0/65ptvKCwsjCQSCQ0dOpSCgoKoZ8+e9O2339Lw4cMJAIWHh1dal+PHjxMA2rZtmyg9Pz+fdHV1afz48URE9ODBAzI2NqYmTZrQokWLaN26dfTZZ5+Ri4tLlduryuLFi8nHx4fmzJlDkZGRNHHiRNLW1qb27duTXC4X8iUlJZGurq7Qnn755Zfk4OBAMpmMTp8+LeTLyMggc3NzMjY2ptmzZ9OiRYuocePG5ObmRgBEx6Wi/dqmTRtyd3enpUuX0uzZs0lHR4fat28vqndISAjZ2dkJ0zExMcJnVvHn5+dHAOiXX36pdB8o2qmWLVuSv78/rVy5kt577z0CQMOHDxflfdVr2K5du6hhw4bUrFkzoZ4vtmPljRw5koyMjKiwsFCUvmHDBgJAZ86cISKiyMhIoT1du3YtLV++nEaNGkVhYWGVbntFFNsfFBREK1eupIEDBwrHcNasWUK++/fvU8OGDcnW1pbmz
            "text/plain": [
              "<IPython.core.display.Image object>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 9
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "RKZhRMmJmNH_",
        "colab_type": "text"
      },
      "source": [
        "At this point, it is important to understand how the peak memory is measured. The benchmarking tools measure the peak memory usage the same way the command `nvidia-smi` does - see [here](https://developer.nvidia.com/nvidia-system-management-interface) for more information. \n",
        "In short, all memory that is allocated for a given *model identifier*, *batch size* and *sequence length* is measured in a separate process. This way it can be ensured that there is no previously unreleased memory falsely included in the measurement. One should also note that the measured memory even includes the memory allocated by the CUDA driver to load PyTorch and TensorFlow and is, therefore, higher than library-specific memory measurement function, *e.g.* this one for [PyTorch](https://pytorch.org/docs/stable/cuda.html#torch.cuda.max_memory_allocated).\n",
        "\n",
        "Alright, let's analyze the results. It can be noted that the models `aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2` and `deepset/roberta-base-squad2` require significantly less memory than the other three models. Besides `mrm8488/longformer-base-4096-finetuned-squadv2` all models more or less follow the same memory consumption pattern with `aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2` seemingly being able to better scale to larger sequence lengths. \n",
        "`mrm8488/longformer-base-4096-finetuned-squadv2` is a *Longformer* model, which makes use of *LocalAttention* (check this blog post to learn more about local attention) so that the model scales much better to longer input sequences.\n",
        "\n",
        "For the sake of this notebook, we assume that the longest required input will be less than 512 tokens so that we settle on the models `aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2` and `deepset/roberta-base-squad2`. \n",
        "\n",
        "To better understand how many API requests of our *question-answering* pipeline can be run in parallel, we are interested in finding out how many batches the two models run out of memory."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "9Nwmb57M4wIG",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 356
        },
        "outputId": "4c074607-5200-4cca-bbd5-c39d32ce0451"
      },
      "source": [
        "!python run_benchmark.py --no_speed --save_to_csv \\\n",
        "                                --inference_memory_csv_file plots_pt/required_memory_2.csv \\\n",
        "                                --env_info_csv_file plots_pt/env.csv \\\n",
        "                                --models aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2 \\\n",
        "                                  deepset/roberta-base-squad2 \\\n",
        "                                --sequence_lengths 512 \\\n",
        "                                --batch_sizes 64 128 256 512\\\n",
        "                                --no_env_print"
      ],
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "2020-06-26 11:56:44.781155: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1\n",
            "1 / 2\n",
            "2 / 2\n",
            "Doesn't fit on GPU. CUDA out of memory. Tried to allocate 6.00 GiB (GPU 0; 15.90 GiB total capacity; 9.47 GiB already allocated; 5.60 GiB free; 9.52 GiB reserved in total by PyTorch)\n",
            "\n",
            "====================      INFERENCE - MEMORY - RESULT       ====================\n",
            "--------------------------------------------------------------------------------\n",
            "          Model Name             Batch Size     Seq Length    Memory in MB \n",
            "--------------------------------------------------------------------------------\n",
            "aodiniz/bert_uncased_L-10_H-51       64             512             2455     \n",
            "aodiniz/bert_uncased_L-10_H-51      128             512             3929     \n",
            "aodiniz/bert_uncased_L-10_H-51      256             512             6875     \n",
            "aodiniz/bert_uncased_L-10_H-51      512             512            12783     \n",
            " deepset/roberta-base-squad2         64             512             3539     \n",
            " deepset/roberta-base-squad2        128             512             5747     \n",
            " deepset/roberta-base-squad2        256             512            10167     \n",
            " deepset/roberta-base-squad2        512             512             N/A      \n",
            "--------------------------------------------------------------------------------\n",
            "Saving results to csv.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "P4JFKLZXqmss",
        "colab_type": "text"
      },
      "source": [
        "Let's plot the results again, this time changing the x-axis to `batch_size` however."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "tNtvHpE67pgH",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 514
        },
        "outputId": "092c4dac-5002-4603-8eba-cd4bca727744"
      },
      "source": [
        "# plot graph and save as image\n",
        "!python plot_csv_file.py --csv_file plots_pt/required_memory_2.csv \\\n",
        "                          --figure_png_file=plots_pt/required_memory_plot_2.png \\\n",
        "                          --no_log_scale \\\n",
        "                          --short_model_names aodiniz-bert deepset-roberta \\\n",
        "                          --plot_along_batch\n",
        "\n",
        "# show image\n",
        "from IPython.display import Image\n",
        "Image('plots_pt/required_memory_plot_2.png')"
      ],
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "2020-06-26 11:57:51.876810: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAoAAAAHgCAYAAAA10dzkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdeVxN+f8H8NftVrfbLm23pFTaVJYYg5KvNUvSIGJUGMtgMNYxC2IYW2RsMTOYwZAlxpgZe1lizFiyjC0m2UqIFmm79/37w++e6XYrRSl6Px+P++Cc8znnfM45n3Puu8/ncz5XREQExhhjjDFWa2hUdwYYY4wxxtibxQEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgY4wxxlgtwwEgYxWQnZ2Njz76CJaWlhCJRBg/fnyV7s/Ozg5hYWGvvP7ChQthb28PsViMJk2aVF7GWKnCwsJgZ2enMk8kEmHmzJmvtL3XLQNFtWvXDu7u7pWyrXdZXFwcRCIR4uLiqjsr7wQudzUTB4AVsH79eohEIohEIhw/flxtORHBxsYGIpEIPXr0qIYcsqo2d+5crF+/Hh9//DE2bNiAQYMGVXeWSrV//35MmTIFbdq0wbp16zB37tzqzhKrRXJycjBz5kwOomqYn3/+GZGRkdWdjSrD5a78NKs7A28jHR0d/Pzzz/D29laZf+TIEdy9excSiaSacsaq2uHDh/H+++9jxowZb2R/165dg4bGq/2ddvjwYWhoaOCHH36AtrZ2JeeMVcTz58+hqflqj9vXKQPVKScnB+Hh4QBe1ACxmuHnn3/GpUuXqrz1orpwuSu/t++pUgN069YN27ZtQ2Fhocr8n3/+GV5eXrC0tKymnL2eZ8+eVXcWary0tDQYGxtX2vYKCwuRn59f6nKJRAItLa1X2nZaWhqkUmmlBn85OTmVtq3aREdH55UDwNcpA9VBoVAgNze3urPBqlBN/K7gcldxHAC+guDgYDx+/BgHDhwQ5uXn52P79u0YMGBAiesoFApERkaiUaNG0NHRgYWFBUaMGIEnT56opLOzs0OPHj0QFxeH5s2bQyqVwsPDQ6jOjomJgYeHB3R0dODl5YVz586p7evw4cPw8fGBnp4ejI2NERAQgCtXrqikmTlzJkQiES5fvowBAwagTp068Pb2xrp16yASiUrc7ty5cyEWi3Hv3r1Sz01J/Z+K7q+oAwcOwNvbG8bGxtDX14ezszM+//xzYXl+fj6mT58OLy8vGBkZQU9PDz4+PoiNjVXb/uPHjzFo0CAYGhrC2NgYoaGhOH/+PEQiEdavX6+S9urVq+jTpw9MTEygo6OD5s2bY/fu3aUeE/Bfn6CkpCT89ttvQleAW7duAXgRbA0dOhQWFhbQ0dFB48aN8eOPP6ps49atWxCJRFi0aBEiIyPh4OAAiUSCy5cvl7rf4v2/lN0Q4uPjMWHCBJiZmUFPTw+BgYF4+PChkE4kEmHdunV49uyZkNei52Hjxo3w8vKCVCqFiYkJ+vfvjzt37qjsW9lv58yZM2jbti10dXWF65OXl4cZM2bA0dEREokENjY2mDJlCvLy8lS2IRKJMGbMGOzatQvu7u6QSCRo1KgR9u7dq3as9+7dw9ChQ2FlZQWJRIIGDRrg448/VgmQnz59ivHjx8PGxgYSiQSOjo6YP38+FApFqedQ6ZdffkH37t2F7Ts4OGD27NmQy+Vqabdt2yacH1NTU3z44Ycllnvlceno6MDd3R07d+4scd/F+wAq74cbN24gLCwMxsbGMDIywuDBg9WC7OJlQHk9S/ooy+PLnDlzBq1bt4ZUKkWDBg0QFRWllqai13jTpk1o1KgRJBIJoqKiYGZmBgAIDw8X8ldaP8jTp09DJBKp3TMAsG/fPohEIuzZswcAkJWVhfHjx8POzg4SiQTm5ubo1KkTzp49W65jL+7u3bvo1asX9PT0YG5ujk8//VTtGJVOnToFPz8/GBkZQVdXF76+voiPj1dLd+/ePQwZMgQWFhZCmV+7dq1KGuUzJTo6Gp9//jksLS2hp6eHnj17qt2LiYmJ6N27NywtLaGjo4N69eqhf//+yMjIUEn3svu6Xbt2+O2335CcnCxck5Ke10UpnzlHjhzBqFGjYG5ujnr16gnLV65cKVx3KysrjB49Gk+fPi1xW9Vd7i5cuICwsDDY29tDR0cHlpaWGDJkCB4/flzmOXhnESu3devWEQD6+++/qXXr1jRo0CBh2a5du0hDQ4Pu3btHtra21L17d5V1P/roI9LU1KRhw4ZRVFQUTZ06lfT09KhFixaUn58vpLO1tSVnZ2eSyWQ0c+ZMWrJkCVlbW5O+vj5t3LiR6tevT/PmzaN58+aRkZEROTo6klwuF9Y/cOAAaWpqkpOTEy1YsIDCw8PJ1NSU6tSpQ0lJSUK6GTNmEAByc3OjgIAAWrlyJa1YsYIyMzNJKpXSxIkT1Y7fzc2N2rdvX+Y5Cg0NJVtbW7X5yv0pXbp0ibS1tal58+a0dOlSioqKokmTJlHbtm2FNA8fPiSZTEYTJkygVatW0YIFC8jZ2Zm0tLTo3LlzQjq5XE6tWrUisVhMY8aMoeXLl1OnTp2ocePGBIDWrVunsl8jIyNyc3Oj+fPn0/Lly6lt27YkEokoJiam1ONKTU2lDRs2kKmpKTVp0oQ2bNhAGzZsoOzsbMrJySFXV1fS0tKiTz/9lL799lvy8fEhABQZGSlsIykpSTjn9vb2NG/ePFqyZAklJyeXul9bW1sKDQ0VppVlsGnTptS+fXtatmwZTZw4kcRiMQUFBQnpNmzYQD4+PiSRSIS83rx5k4iIvv76axKJRNSvXz9auXKlUEbs7OzoyZMnwjZ8fX3J0tKSzMzM6JNPPqHVq1fTrl27SC6XU+fOnUlXV5fGjx9Pq1evpjFjxpCmpiYFBASo5B8ANW7cmGQyGc2ePZsiIyPJ3t6edHV16dGjR0K6e/fukZWVlbDNqKgo+uqrr8jV1VXI07Nnz8jT05Pq1q1Ln3/+OUVFRVFISAiJRCIaN25cqedQqVevXhQUFEQLFy6kVatWUd++fQkATZo0SSWd8hy3aNGClixZQp999hlJpVK187Nv3z7S0NAgd3d3Wrx4MX3xxRdkZGREjRo1UrsHANCMGTOEaeX90LRpU/rggw9o5cqV9NFHHxEAmjJlSpllQHk9i35sbW1JKpXSw4cPyzwHvr6+ZGVlRebm5jRmzBj69ttvydvbmwDQDz/8IKSr6DV2dXUlMzMzCg8PpxUrVtDx48dp1apVBIACAwOFfJ4/f77UvNnb21O3bt3U5g8ePJjq1KkjPCcHDBhA2traNGHCBPr+++9p/vz55O/vTxs3bizz2EuSk5NDTk5OpKOjQ1OmTKHIyEjy8vIiT09PAkCxsbFC2kOHDpG2tja1atWKIiIiaMmSJeTp6Una2tp06tQpIV1qairVq1ePbGxsaNasWbRq1Srq2bMnAaAlS5YI6WJjYwkAeXh4kKenJy1evJg+++wz0tHRIScnJ8rJySEiory8PGrQoAFZWVnR119/Td9//z2Fh4dTixYt6NatW8L2ynNf79+/n5o0aUKmpqbCNdm5c2eZ50h5P7i5uZGvry8tW7aM5s2bR0T/leOOHTvSsmXLaMyYMSQWi9W+12pKuVu0aBH5+PjQrFmzaM2aNTRu3DiSSqX03nvvkUKhKFeZe
            "text/plain": [
              "<IPython.core.display.Image object>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 11
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "bdoTRF7Yq8oV",
        "colab_type": "text"
      },
      "source": [
        "Interesting! `aodiniz/bert_uncased_L-10_H-51` clearly scales better for higher batch sizes and does not even run out of memory for 512 tokens.\n",
        "\n",
        "For comparison, let's run the same benchmarking on TensorFlow."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "752y4onm-gpy",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 726
        },
        "outputId": "a65c4bc1-f88e-46ae-cb80-27e29a0a1954"
      },
      "source": [
        "# create plots folder in content\n",
        "!mkdir -p plots_tf\n",
        "\n",
        "!TF_CPP_MIN_LOG_LEVEL=3 python run_benchmark_tf.py --no_speed --save_to_csv \\\n",
        "                                --inference_memory_csv_file plots_tf/required_memory_2.csv \\\n",
        "                                --env_info_csv_file plots_tf/env.csv \\\n",
        "                                --models aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2 \\\n",
        "                                         deepset/roberta-base-squad2 \\\n",
        "                                --sequence_lengths 512 \\\n",
        "                                --batch_sizes 64 128 256 512 \\\n",
        "                                --no_env_print \\"
      ],
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "1 / 2\n",
            "Doesn't fit on GPU.  OOM when allocating tensor with shape[512,8,512,512] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc\n",
            "\t [[node tf_bert_model/bert/encoder/layer_._0/attention/self/Softmax (defined at /usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py:267) ]]\n",
            "Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.\n",
            " [Op:__inference_run_in_graph_mode_4243]\n",
            "\n",
            "Errors may have originated from an input operation.\n",
            "Input Source operations connected to node tf_bert_model/bert/encoder/layer_._0/attention/self/Softmax:\n",
            " tf_bert_model/bert/encoder/layer_._0/attention/self/add (defined at /usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py:264)\n",
            "\n",
            "Function call stack:\n",
            "run_in_graph_mode\n",
            "\n",
            "2 / 2\n",
            "Doesn't fit on GPU.  OOM when allocating tensor with shape[512,12,512,512] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc\n",
            "\t [[node tf_roberta_model/roberta/encoder/layer_._0/attention/self/Softmax (defined at /usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py:267) ]]\n",
            "Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.\n",
            " [Op:__inference_run_in_graph_mode_5047]\n",
            "\n",
            "Errors may have originated from an input operation.\n",
            "Input Source operations connected to node tf_roberta_model/roberta/encoder/layer_._0/attention/self/Softmax:\n",
            " tf_roberta_model/roberta/encoder/layer_._0/attention/self/add (defined at /usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_bert.py:264)\n",
            "\n",
            "Function call stack:\n",
            "run_in_graph_mode\n",
            "\n",
            "\n",
            "====================      INFERENCE - MEMORY - RESULT       ====================\n",
            "--------------------------------------------------------------------------------\n",
            "          Model Name             Batch Size     Seq Length    Memory in MB \n",
            "--------------------------------------------------------------------------------\n",
            "aodiniz/bert_uncased_L-10_H-51       64             512             2885     \n",
            "aodiniz/bert_uncased_L-10_H-51      128             512             4933     \n",
            "aodiniz/bert_uncased_L-10_H-51      256             512             9029     \n",
            "aodiniz/bert_uncased_L-10_H-51      512             512             N/A      \n",
            " deepset/roberta-base-squad2         64             512             4933     \n",
            " deepset/roberta-base-squad2        128             512             9029     \n",
            " deepset/roberta-base-squad2        256             512            15391     \n",
            " deepset/roberta-base-squad2        512             512             N/A      \n",
            "--------------------------------------------------------------------------------\n",
            "Saving results to csv.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "3h5JqW2osAQ7",
        "colab_type": "text"
      },
      "source": [
        "Let's see the same plot for TensorFlow."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "hkw-EOOvA52R",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 514
        },
        "outputId": "3947ccf0-b91c-43bf-8569-d6afe0232185"
      },
      "source": [
        "# plot graph and save as image\n",
        "!python plot_csv_file.py --csv_file plots_tf/required_memory_2.csv --figure_png_file=plots_tf/required_memory_plot_2.png --no_log_scale --short_model_names aodiniz-bert deepset-roberta --plot_along_batch\n",
        "\n",
        "# show image\n",
        "from IPython.display import Image\n",
        "Image('plots_tf/required_memory_plot_2.png')"
      ],
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "2020-06-26 11:59:28.790462: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAoAAAAHgCAYAAAA10dzkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdeVxN+f8H8NctdbvtpT2UihYVTQxRMraQJEu2UZaxDMa+jPnOIMY+RsYWY4YZy0wixjCDUBrLYCI0tpjKVkKUpFL3/fuj3z3T6bYSNXo/H4/7qPM5n3PO56z3fc/5fD5HQkQExhhjjDFWZ6jUdAEYY4wxxtjbxQEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY4wxxlgdwwEgY1WQnZ2Njz76CGZmZpBIJJg8efIbXZ61tTWGDRv2ytMvX74cNjY2UFVVRYsWLaqvYKxMw4YNg7W1tShNIpFg3rx5rzS/1z0GiuvQoQOcnZ2rZV7vspiYGEgkEsTExNR0Ud4JfNzVThwAVsGWLVsgkUggkUhw4sQJpfFEhIYNG0IikaBnz541UEL2pi1atAhbtmzBxx9/jK1bt2Lo0KE1XaQyHT58GDNnzkS7du2wefNmLFq0qKaLxOqQnJwczJs3j4OoWmbHjh0IDQ2t6WK8MXzcVV69mi7Af5GGhgZ27NgBT09PUfrx48dx9+5dSKXSGioZe9OOHTuGNm3aYO7cuW9ledevX4eKyqv9Tjt27BhUVFTw3XffQV1dvZpLxqrixYsXqFfv1S63r3MM1KScnByEhIQAKLoDxGqHHTt2ICEh4Y0/vagpfNxV3n/vqlIL9OjRAxERESgoKBCl79ixA+7u7jAzM6uhkr2e58+f13QRar309HTo6+tX2/wKCgqQn59f5nipVAo1NbVXmnd6ejpkMlm1Bn85OTnVNq+6REND45UDwNc5BmqCXC5Hbm5uTReDvUG18buCj7uq4wDwFQwaNAiPHz9GVFSUkJafn49du3Zh8ODBpU4jl8sRGhqKZs2aQUNDA6amphgzZgyePHkiymdtbY2ePXsiJiYGLVu2hEwmg4uLi3A7OzIyEi4uLtDQ0IC7uzsuXLigtKxjx47By8sLWlpa0NfXh7+/P65evSrKM2/ePEgkEly5cgWDBw+GgYEBPD09sXnzZkgkklLnu2jRIqiqquLevXtlbpvS6j8VX15xUVFR8PT0hL6+PrS1tWFvb4/PPvtMGJ+fn485c+bA3d0denp60NLSgpeXF6Kjo5Xm//jxYwwdOhS6urrQ19dHcHAwLl68CIlEgi1btojyXrt2Df369YOhoSE0NDTQsmVL7Nu3r8x1Av6tE5SUlIQDBw4IVQGSk5MBFAVbI0eOhKmpKTQ0NNC8eXP88MMPonkkJydDIpHgq6++QmhoKGxtbSGVSnHlypUyl1uy/peiGsLJkycxdepUGBsbQ0tLCwEBAXj48KGQTyKRYPPmzXj+/LlQ1uLbYdu2bXB3d4dMJoOhoSEGDhyIO3fuiJatqLcTFxeH9u3bQ1NTU9g/eXl5mDt3Luzs7CCVStGwYUPMnDkTeXl5onlIJBJMmDABe/fuhbOzM6RSKZo1a4aDBw8qreu9e/cwcuRIWFhYQCqVonHjxvj4449FAfLTp08xefJkNGzYEFKpFHZ2dli6dCnkcnmZ21Dhl19+ga+vrzB/W1tbLFiwAIWFhUp5IyIihO1jZGSEDz/8sNTjXrFeGhoacHZ2xp49e0pddsk6gIrz4ebNmxg2bBj09fWhp6eH4cOHKwXZJY8Bxf4s7aM4HisSFxeHtm3bQiaToXHjxggLC1PKU9V9vH37djRr1gxSqRRhYWEwNjYGAISEhAjlK6se5F9//QWJRKJ0zgDAoUOHIJFIsH//fgDAs2fPMHnyZFhbW0MqlcLExARdunTB+fPnK7XuJd29exe9e/eGlpYWTExMMGXKFKV1VDhz5gy6desGPT09aGpqwtvbGydPnlTKd+/ePYwYMQKmpqbCMf/999+L8iiuKeHh4fjss89gZmYGLS0t9OrVS+lcTExMRN++fWFmZgYNDQ00aNAAAwcORGZmpihfRed1hw4dcODAAaSkpAj7pLTrdXGKa87x48cxbtw4mJiYoEGDBsL4devWCfvdwsIC48ePx9OnT0udV00fd5cuXcKwYcNgY2MDDQ0NmJmZYcSIEXj8+HG52+CdRazSNm/eTADo3Llz1LZtWxo6dKgwbu/evaSiokL37t0jKysr8vX1FU370UcfUb169WjUqFEUFhZGs2bNIi0tLWrVqhXl5+cL+aysrMje3p7Mzc1p3rx5tHLlSrK0tCRtbW3atm0bNWrUiJYsWUJLliwhPT09srOzo8LCQmH6qKgoqlevHjVt2pSWLVtGISEhZGRkRAYGBpSUlCTkmzt3LgEgJycn8vf3p3Xr1tHatWspKyuLZDIZTZs2TWn9nZycqGPHjuVuo+DgYLKyslJKVyxPISEhgdTV1ally5a0atUqCgsLo+nTp1P79u2FPA8fPiRzc3OaOnUqrV+/npYtW0b29vakpqZGFy5cEPIVFhaSh4cHqaqq0oQJE2jNmjXUpUsXat68OQGgzZs3i5arp6dHTk5OtHTpUlqzZg21b9+eJBIJRUZGlrleaWlptHXrVjIyMqIWLVrQ1q1baevWrZSdnU05OTnk6OhIampqNGXKFPrmm2/Iy8uLAFBoaKgwj6SkJGGb29jY0JIlS2jlypWUkpJS5nKtrKwoODhYGFYcg25ubtSxY0davXo1TZs2jVRVVSkwMFDIt3XrVvLy8iKpVCqU9datW0RE9OWXX5JEIqEBAwbQunXrhGPE2tqanjx5IszD29ubzMzMyNjYmD755BPasGED7d27lwoLC6lr166kqalJkydPpg0bNtCECROoXr165O/vLyo/AGrevDmZm5vTggULKDQ0lGxsbEhTU5MePXok5Lt37x5ZWFgI8wwLC6MvvviCHB0dhTI9f/6cXF1dqX79+vTZZ59RWFgYBQUFkUQioUmTJpW5DRV69+5NgYGBtHz5clq/fj3179+fAND06dNF+RTbuFWrVrRy5Ur69NNPSSaTKW2fQ4cOkYqKCjk7O9PXX39N//vf/0hPT4+aNWumdA4AoLlz5wrDivPBzc2N+vTpQ+vWraOPPvqIANDMmTPLPQYU+7P4x8rKimQyGT18+LDcbeDt7U0WFhZkYmJCEyZMoG+++YY8PT0JAH333XdCvqruY0dHRzI2NqaQkBBau3YtnThxgtavX08AKCAgQCjnxYsXyyybjY0N9ejRQyl9+PDhZGBgIFwnBw8eTOrq6jR16lTatGkTLV26lPz8/Gjbtm3lrntpcnJyqGnTpqShoUEzZ86k0NBQcnd3J1dXVwJA0dHRQt6jR4+Suro6eXh40IoVK2jlypXk6upK6urqdObMGSFfWloaNWjQgBo2bEjz58+n9evXU69evQgArVy5UsgXHR1NAMjFxYVcXV3p66+/pk8//ZQ0NDSoadOmlJOTQ0REeXl51LhxY7KwsKAvv/ySNm3aRCEhIdSqVStKTk4W5leZ8/rw4cPUokULMjIyEvbJnj17yt1GivPBycmJvL29afXq1bRkyRIi+vc47ty5M61evZomTJhAqqqqSt9rteW4++qrr8jLy4vmz59PGzdupEmTJpFMJqP333+f5HJ5pY6Zd
            "text/plain": [
              "<IPython.core.display.Image object>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 13
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ybqol62LsVrF",
        "colab_type": "text"
      },
      "source": [
        "The model implemented in TensorFlow requires more memory than the one implemented in PyTorch. Let's say for whatever reason we have decided to use TensorFlow instead of PyTorch. \n",
        "\n",
        "The next step is to measure the inference time of these two models. Instead of disabling time measurement with `--no_speed`, we will now disable memory measurement with `--no_memory`."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "m8qfllt9uPZg",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 302
        },
        "outputId": "b185f547-fbe6-4287-b8a0-6229d3eec377"
      },
      "source": [
        "!TF_CPP_MIN_LOG_LEVEL=3 python run_benchmark_tf.py --no_memory --save_to_csv \\\n",
        "                                --inference_time_csv_file plots_tf/time_2.csv \\\n",
        "                                --env_info_csv_file plots_tf/env.csv \\\n",
        "                                --models aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2 \\\n",
        "                                         deepset/roberta-base-squad2 \\\n",
        "                                --sequence_lengths 8 32 128 512 \\\n",
        "                                --batch_sizes 256 \\\n",
        "                                --no_env_print \\"
      ],
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "1 / 2\n",
            "2 / 2\n",
            "\n",
            "====================       INFERENCE - SPEED - RESULT       ====================\n",
            "--------------------------------------------------------------------------------\n",
            "          Model Name             Batch Size     Seq Length     Time in s   \n",
            "--------------------------------------------------------------------------------\n",
            "aodiniz/bert_uncased_L-10_H-51      256              8             0.033     \n",
            "aodiniz/bert_uncased_L-10_H-51      256              32            0.119     \n",
            "aodiniz/bert_uncased_L-10_H-51      256             128            0.457     \n",
            "aodiniz/bert_uncased_L-10_H-51      256             512             2.21     \n",
            " deepset/roberta-base-squad2        256              8             0.064     \n",
            " deepset/roberta-base-squad2        256              32             0.25     \n",
            " deepset/roberta-base-squad2        256             128             1.01     \n",
            " deepset/roberta-base-squad2        256             512             4.65     \n",
            "--------------------------------------------------------------------------------\n",
            "Saving results to csv.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "-bPClv873lrW",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 514
        },
        "outputId": "152f14c7-288a-4471-9cc0-5108cb24804c"
      },
      "source": [
        "# plot graph and save as image\n",
        "!python plot_csv_file.py --csv_file plots_tf/time_2.csv --figure_png_file=plots_tf/time_plot_2.png --no_log_scale --short_model_names aodiniz-bert deepset-roberta --is_time\n",
        "\n",
        "# show image\n",
        "from IPython.display import Image\n",
        "Image('plots_tf/time_plot_2.png')"
      ],
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "2020-06-26 12:04:58.002654: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAoAAAAHgCAYAAAA10dzkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdd1gU19cH8O+y9C5VmiCoFHtQsWMswYJij0YF7AVrrElMsESxl58xtsRuoqBgjFFUolhj7w3UAJZgV5rU3fP+sS8jwwICgovu+TzPPjB37sycKbt7dmbuHQkRERhjjDHGmNrQUHUAjDHGGGPsw+IEkDHGGGNMzXACyBhjjDGmZjgBZIwxxhhTM5wAMsYYY4ypGU4AGWOMMcbUDCeAjDHGGGNqhhNAxhhjjDE1wwkgY4wxxpia4QSQMcYYY0zNcALIGGOMMaZmOAFkjDHGGFMznAAyxhhjjKkZTgAZY4wxxtQMJ4CMMcYYY2qGE0DGGGOMMTXDCSBjjDHGmJrhBJAxxhhjTM1wAsgYY4wxpmY4AWSMMcYYUzOcADLGGGOMqRlOABljjDHG1AwngIwxxhhjaoYTQMYYY4wxNcMJIGOMMcaYmuEEkDHGGGNMzXACyBhjjDGmZjgBZIwxxhhTM5wAMsYYY4ypGU4AGWOMMcbUDCeAjDHGGGNqhhNAxhhjjDE1wwkgY4wxxpia4QSQMcYYY0zNcAJYgQQGBsLJyUnVYaiNnJwcTJkyBQ4ODtDQ0EDXrl3LdXmtWrVCq1atSj39li1b4ObmBi0tLZiampZdYKxQM2bMgEQiEZU5OTkhMDCwVPN732Mgr8DAQBgaGpbJvD5l8fHxkEgk2Lhxo6pD+STwcffp0FR1AJ+6/F8ehTly5Eg5R8LyW79+PRYuXIjx48fjs88+Q5UqVVQdUqFu376NwMBAtG/fHtOmTYO+vr6qQ2JqZu7cufDw8Cj3H0qs+Pbt24ezZ89ixowZqg6l3PBxV344ASxnW7ZsEQ1v3rwZhw4dUip3d3fHunXrIJfLP2R4au3w4cOws7PD0qVLP8jyDh48WOppo6OjIZfLsXz5clSrVq0Mo2IlFRMTAw2N0l08eZ9jQNXmzp2Lnj178hdxBbJv3z6sXLnyk08A+bgrH5wAlrP+/fuLhk+fPo1Dhw4plbMP7+nTp2V6KVUulyMrKwu6uroFjtfW1i71vJ8+fQoAZRpvWloaDAwMymx+6kJHR6fU077PMaAKRISMjAzo6empOhRWTjIyMqCtrV3qHzXlgY+7D6Pi7HGmdA9g7r0rixYtwsqVK+Hs7Ax9fX188cUXePDgAYgIs2fPhr29PfT09ODn54eXL18qzXf//v1o0aIFDAwMYGRkhE6dOuHGjRvvjKeg+58AYOPGjZBIJIiPjxfKzp8/Dx8fH1hYWEBPTw9Vq1bFoEGDRNMtWrQITZs2hbm5OfT09ODp6YmdO3cqzT89PR1jx46FhYUFjIyM0KVLFzx69AgSiUTpl+6jR48waNAgWFtbQ0dHBzVr1sT69euLXK/c7XrkyBHcuHEDEokEEokE0dHRABSJ0cSJE+Hg4AAdHR24urpi0aJFICLRfCQSCUaPHo1t27ahZs2a0NHRQWRkZKHLzX//V3R0NCQSCUJDQzFnzhzY29tDV1cXbdq0wd27d4V6Tk5OCA4OBgBYWloqbYfi7N/c+3bu3buHjh07wsjICP369QOgSFyXLVuGmjVrQldXF9bW1hg+fDhevXolmoeTkxN8fX1x4sQJNGrUCLq6unB2dsbmzZuV1vX169eYMGECnJycoKOjA3t7e/j7++P58+dCnczMTAQHB6NatWrQ0dGBg4MDpkyZgszMzEK3Ya7jx4+jV69eqFKlijDthAkTkJ6erlT38OHDwvYxNTWFn58fbt26pVTvxIkTaNiwIXR1deHi4oI1a9YUuOz89wDmvh9OnjyJr7/+GpaWljAwMEC3bt3w7Nkz0bT5jwEnJyfh+Mv/yj0e3+Xff/+Fj48PDAwMYGtri1mzZikdqyXdxwcOHECDBg2gp6eHNWvWQCKRIC0tDZs2bRLiK+w+yCdPnkBTUxMzZ85UGhcTEwOJRIKffvoJAJCdnY2ZM2eievXq0NXVhbm5OZo3b45Dhw4Va93ze/36NQIDA2FiYgJTU1MEBATg9evXBda9ffs2evbsCTMzM+jq6qJBgwbYs2dPgfMcP3688HlQrVo1zJ8/X3S1Ju9n9dKlS+Ho6Ag9PT14e3vj+vXrovk9fvwYAwcOhL29PXR0dGBjYwM/Pz/R5ynw7vd1YGAgVq5cCQCi46YouZ8527dvx/Tp02FnZwd9fX0kJycDAMLCwuDp6Qk9PT1YWFigf//+ePToUYHzUvVxl5CQgFGjRsHV1RV6enowNzdHr169lLYjKxyfAfwIbNu2DVlZWRgzZgxevnyJBQsWoHfv3mjdujWio6MxdepU3L17FytWrMCkSZNECdCWLVsQEBAAHx8fzJ8/H2/evMGqVavQvHlzXLp0qUwanTx9+hRffPEFLC0tMW3aNJiamiI+Ph7h4eGiesuXL0eXLl3Qr18/ZGVlYfv27ejVqxf27t2LTp06CfUCAwMRGhqKAQMGoHHjxjh69KhofK4nT56gcePGQiJmaWmJ/fv3Y/DgwUhOTsb48eMLjNfS0hJbtmzBnDlzkJqaipCQEACKy/BEhC5duuDIkSMYPHgw6tWrhwMHDmDy5Ml49OiR0uXiw4cPIzQ0FKNHj4aFhUWptue8efOgoaGBSZMmISkpCQsWLEC/fv1w5swZAMCyZcuwefNmREREYNWqVTA0NESdOnUAlGz/5uTkwMfHB82bN8eiRYuE+wiHDx+OjRs3YuDAgRg7dizi4uLw008/4dKlSzh58iS0tLSEedy9exc9e/bE4MGDERAQgPXr1yMwMBCenp6oWbMmACA1NRUtWrTArVu3MGjQIHz22Wd4/vw59uzZg4cPH8LCwgJyuRxdunTBiRMnMGzYMLi7u+PatWtYunQpYmNjsXv37iK3WVhYGN68eYORI0fC3NwcZ8+exYoVK/Dw4UOEhYUJ9aKiotChQwc4OztjxowZSE9Px4oVK9CsWTNcvHhR2D7Xrl0TjuEZM2YgJycHwcHBsLa2LvZ+HDNmDCpVqoTg4GDEx8dj2bJlGD16NHbs2FHoNMuWLUNqaqqobOnSpbh8+TLMzc3fuUyZTIb27dujcePGWLBgASIjIxEcHIycnBzMmjVLqFeSfRwTE4O+ffti+PDhGDp0KFxdXbFlyxYMGTIEjRo1wrBhwwAALi4uBcZkbW0Nb29vhIaGCj9ccu3YsQNSqRS9evUCoPiRGRISIsw7OTkZ58+fx8WLF9GuXbt3rn9eRAQ/Pz+cOHECI0aMgLu7OyIiIhAQEKBU98aNG2jWrBns7Owwbdo0GBgYIDQ0FF27dsWuXbvQrVs3AMCbN2/g7e2NR48eYfjw4ahSpQpOnTqFb775BomJiVi2bJlovps3b0ZKSgqCgoKQkZGB5cuXo3Xr1rh27ZpwLPXo0QM3btzAmDFj4OTkhKdPn+LQoUO4f/++cDwW5309fPhw/PfffwXeUvQus2fPhra2NiZNmoTMzExoa2sLx0fDhg0REhKCJ0+eYPny5Th58iQuXbokuvpQEY67c+fO4dSpU+jTpw/s7e0RHx+PVatWoVWrVrh58ybfJ10cxD6ooKAgKmyzBwQEkKOjozAcFxdHAMjS0pJev34tlH/zzTcEgOrWrUvZ2dlCed++fUlbW5syMjKIiCglJYVMTU1p6NChouU8fvyYTExMlMrzCw4OLjDWDRs2EACKi4sjIqKIiAgCQOfOnStyfm/evBENZ2VlUa1atah169ZC2YULFwgAjR8/XlQ3MDCQAFBwcLBQNnjwYLKxsaHnz5+L6vbp04dMTEyUlpeft7c31axZU1S2e/duAkA//vijqLxnz54kkUjo7t27QhkA0tDQoBs3bhS5nLzL8/b2FoaPHDlCAMjd3Z0yMzOF8uXLlxMAunbtmlCWuy+ePXsmlJVk/wYEBBAAmjZtm
            "text/plain": [
              "<IPython.core.display.Image object>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 15
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "f9sIjRWd4Me1",
        "colab_type": "text"
      },
      "source": [
        "Ok, this took some time... time measurements take much longer than memory measurements because the forward pass is called multiple times for stable results. Timing measurements leverage Python's [timeit module](https://docs.python.org/2/library/timeit.html#timeit.Timer.repeat) and run 10 times the value given to the `--repeat` argument (defaults to 3), so in our case 30 times.\n",
        "\n",
        "Let's focus on the resulting plot. It becomes obvious that `aodiniz/bert_uncased_L-10_H-51` is around twice as fast as `deepset/roberta-base-squad2`. Given that the model is also more memory efficient and assuming that the model performs reasonably well, for the sake of this notebook we will settle on `aodiniz/bert_uncased_L-10_H-51`. Our model should be able to process input sequences of up to 512 tokens. Latency time of around 2 seconds might be too long though, so let's compare the time for different batch sizes and using TensorFlows XLA package for more speed."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "aPeMsHJb3t2g",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 202
        },
        "outputId": "56276801-6d56-444c-8ac8-75471136aa84"
      },
      "source": [
        "!TF_CPP_MIN_LOG_LEVEL=3 python run_benchmark_tf.py --no_memory --save_to_csv \\\n",
        "                                --inference_time_csv_file plots_tf/time_xla_1.csv \\\n",
        "                                --env_info_csv_file plots_tf/env.csv \\\n",
        "                                --models aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616_squad2 \\\n",
        "                                --sequence_lengths 512 \\\n",
        "                                --batch_sizes 8 64 256 \\\n",
        "                                --no_env_print \\\n",
        "                                --use_xla"
      ],
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "1 / 1\n",
            "\n",
            "====================       INFERENCE - SPEED - RESULT       ====================\n",
            "--------------------------------------------------------------------------------\n",
            "          Model Name             Batch Size     Seq Length     Time in s   \n",
            "--------------------------------------------------------------------------------\n",
            "aodiniz/bert_uncased_L-10_H-51       8              512            0.056     \n",
            "aodiniz/bert_uncased_L-10_H-51       64             512            0.402     \n",
            "aodiniz/bert_uncased_L-10_H-51      256             512            1.591     \n",
            "--------------------------------------------------------------------------------\n",
            "Saving results to csv.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "_KrzL6y_6Z2T",
        "colab_type": "text"
      },
      "source": [
        "First of all, it can be noted that XLA reduces latency time by a factor of ca. 1.3 (which is more than observed for other models by TensorFlow [here](https://www.tensorflow.org/xla)). A batch size of 64 looks like a good choice. More or less half a second for the forward pass is good enough.\n",
        "\n",
        "Cool, now it should be straightforward to benchmark your favorite models. All the inference time measurements can also be done using the `run_benchmark.py` script for PyTorch."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Drht35ylINuK",
        "colab_type": "text"
      },
      "source": [
        "### **Training - Configuration Comparison**\n",
        "\n",
        "Next, we will look at how a model can be benchmarked on different configurations. This is especially helpful when one wants to decide how to most efficiently choose the model's configuration parameters for training.\n",
        "In the following different configurations of a *Bart MNLI* model will be compared to each other using `PyTorchBenchmark`. \n",
        "\n",
        "Training in `PyTorchBenchmark` is defined by running one forward pass to compute the loss: `loss = model(input_ids, labels=labels)[0]` and one backward pass to compute the gradients `loss.backward()`.\n",
        "\n",
        "Let's see how to most efficiently train a Bart MNLI model from scratch."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "YTKW0Ml3Wpwq",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "# Imports\n",
        "from transformers import BartConfig, PyTorchBenchmark, PyTorchBenchmarkArguments"
      ],
      "execution_count": 17,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "6Uw92tMRq6MV",
        "colab_type": "text"
      },
      "source": [
        "For the sake of the notebook, we assume that we are looking for a more efficient version of Facebook's `bart-large-mnli` model.\n",
        "Let's load its configuration and check out the important parameters."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "nukyLU7iXBzN",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 637,
          "referenced_widgets": [
            "975f42d7b55c4d0caf229cd4c16df5d2",
            "69b36685703342eaa80b6f0e01f94e04",
            "c8acb33d6a254607a6340c0aa33446f3",
            "a6c3647736554beea36db798827203b2",
            "e812aaf8214c4ad983f41804cb82562b",
            "eed2ce14188a453ca296601ca39133b6",
            "548f91729b8d4f3aa81f78c7a1620101",
            "900c1cb473f54b48a59226c61fafd626"
          ]
        },
        "outputId": "ae4ecae5-bd30-4eb4-e4b3-34447036e98d"
      },
      "source": [
        "BartConfig.from_pretrained(\"facebook/bart-large-mnli\").to_diff_dict()"
      ],
      "execution_count": 18,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "975f42d7b55c4d0caf229cd4c16df5d2",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=908.0, style=ProgressStyle(description_…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'_num_labels': 3,\n",
              " 'activation_dropout': 0.0,\n",
              " 'activation_function': 'gelu',\n",
              " 'add_bias_logits': False,\n",
              " 'add_final_layer_norm': False,\n",
              " 'attention_dropout': 0.0,\n",
              " 'bos_token_id': 0,\n",
              " 'classif_dropout': 0.0,\n",
              " 'd_model': 1024,\n",
              " 'decoder_attention_heads': 16,\n",
              " 'decoder_ffn_dim': 4096,\n",
              " 'decoder_layerdrop': 0.0,\n",
              " 'decoder_layers': 12,\n",
              " 'dropout': 0.1,\n",
              " 'encoder_attention_heads': 16,\n",
              " 'encoder_ffn_dim': 4096,\n",
              " 'encoder_layerdrop': 0.0,\n",
              " 'encoder_layers': 12,\n",
              " 'eos_token_id': 2,\n",
              " 'extra_pos_embeddings': 2,\n",
              " 'id2label': {0: 'contradiction', 1: 'neutral', 2: 'entailment'},\n",
              " 'init_std': 0.02,\n",
              " 'is_encoder_decoder': True,\n",
              " 'label2id': {'contradiction': 0, 'entailment': 2, 'neutral': 1},\n",
              " 'max_position_embeddings': 1024,\n",
              " 'model_type': 'bart',\n",
              " 'normalize_before': False,\n",
              " 'normalize_embedding': True,\n",
              " 'num_hidden_layers': 12,\n",
              " 'output_past': False,\n",
              " 'pad_token_id': 1,\n",
              " 'scale_embedding': False,\n",
              " 'static_position_embeddings': False,\n",
              " 'vocab_size': 50265}"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 18
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "3t4ZOmg5sTrx",
        "colab_type": "text"
      },
      "source": [
        "Alright! The important configuration parameters are usually the number of layers `config.encoder_num_layers` and `config.decoder_num_layers`, the model's hidden size: `config.d_model`, the number of attention heads `config.encoder_attention_heads` and `config.decoder_attention_heads` and the vocabulary size `config.vocab_size`.\n",
        "\n",
        "Let's create 4 configurations different from the baseline and see how they compare in terms of peak memory consumption."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qA0d1RvGYAEE",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "config_baseline = BartConfig.from_pretrained(\"facebook/bart-large-mnli\")\n",
        "config_768_hidden = BartConfig.from_pretrained(\"facebook/bart-large-mnli\", d_model=768)\n",
        "config_8_heads = BartConfig.from_pretrained(\"facebook/bart-large-mnli\", decoder_attention_heads=8, encoder_attention_heads=8)\n",
        "config_10000_vocab = BartConfig.from_pretrained(\"facebook/bart-large-mnli\", vocab_size=10000)\n",
        "config_8_layers = BartConfig.from_pretrained(\"facebook/bart-large-mnli\", encoder_layers=8, decoder_layers=8)"
      ],
      "execution_count": 19,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "RhefJji1rU07",
        "colab_type": "text"
      },
      "source": [
        "Cool, now we can benchmark these configs against the baseline config. This time, instead of using the benchmarking script we will directly use the `PyTorchBenchmark` class. The class expects the argument `args` which has to be of type `PyTorchBenchmarkArguments` and optionally a list of configs.\n",
        "\n",
        "First, we define the `args` and give the different configurations appropriate model names. The model names must be in the same order as the configs that are directly passed to `PyTorchBenchMark`.\n",
        "\n",
        "If no `configs` are provided to `PyTorchBenchmark`, it is assumed that the model names `[\"bart-base\", \"bart-768-hid\", \"bart-8-head\", \"bart-10000-voc\", \"bart-8-lay\"]` correspond to official model identifiers and their corresponding configs are loaded as was shown in the previous section.\n",
        "\n",
        "It is assumed that the model will be trained on half-precision, so we add the option `fp16=True` for the following benchmarks."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Lv_WvM2jr79r",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 554
        },
        "outputId": "939dc355-036f-45ad-c996-e6cb136c7a59"
      },
      "source": [
        "# define args\n",
        "args = PyTorchBenchmarkArguments(models=[\"bart-base\", \"bart-768-hid\", \"bart-8-head\", \"bart-10000-voc\", \"bart-8-lay\"], \n",
        "                                 no_speed=True,\n",
        "                                 no_inference=True,\n",
        "                                 training=True, \n",
        "                                 train_memory_csv_file=\"plots_pt/training_mem_fp16.csv\", \n",
        "                                 save_to_csv=True, \n",
        "                                 env_info_csv_file=\"plots_pt/env.csv\",\n",
        "                                 sequence_lengths=[64, 128, 256, 512],\n",
        "                                 batch_sizes=[8],\n",
        "                                 no_env_print=True,\n",
        "                                 fp16=True)  # let's train on fp16\n",
        "\n",
        "# create benchmark\n",
        "benchmark = PyTorchBenchmark(configs=[config_baseline, config_768_hidden, config_8_heads, config_10000_vocab, config_8_layers], args=args)\n",
        "\n",
        "# run benchmark\n",
        "result = benchmark.run()"
      ],
      "execution_count": 20,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "1 / 5\n",
            "2 / 5\n",
            "3 / 5\n",
            "4 / 5\n",
            "5 / 5\n",
            "\n",
            "====================        TRAIN - MEMORY - RESULTS        ====================\n",
            "--------------------------------------------------------------------------------\n",
            "          Model Name             Batch Size     Seq Length    Memory in MB \n",
            "--------------------------------------------------------------------------------\n",
            "          bart-base                  8               64             2905     \n",
            "          bart-base                  8              128             3199     \n",
            "          bart-base                  8              256             5401     \n",
            "          bart-base                  8              512            11929     \n",
            "         bart-768-hid                8               64             2441     \n",
            "         bart-768-hid                8              128             2891     \n",
            "         bart-768-hid                8              256             4963     \n",
            "         bart-768-hid                8              512            10865     \n",
            "         bart-8-head                 8               64             2869     \n",
            "         bart-8-head                 8              128             3059     \n",
            "         bart-8-head                 8              256             4825     \n",
            "         bart-8-head                 8              512             9625     \n",
            "        bart-10000-voc               8               64             2607     \n",
            "        bart-10000-voc               8              128             2801     \n",
            "        bart-10000-voc               8              256             4687     \n",
            "        bart-10000-voc               8              512            10575     \n",
            "          bart-8-lay                 8               64             2445     \n",
            "          bart-8-lay                 8              128             2591     \n",
            "          bart-8-lay                 8              256             4187     \n",
            "          bart-8-lay                 8              512             8813     \n",
            "--------------------------------------------------------------------------------\n",
            "Saving results to csv.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "DJWs_tDjxzuO",
        "colab_type": "text"
      },
      "source": [
        "Nice, let's plot the results again."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "0r-r-R1lxEr0",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 514
        },
        "outputId": "5dbeb7f7-c996-4db2-a560-735354a5b76f"
      },
      "source": [
        "# plot graph and save as image\n",
        "!python plot_csv_file.py --csv_file plots_pt/training_mem_fp16.csv --figure_png_file=plots_pt/training_mem_fp16.png --no_log_scale\n",
        "\n",
        "# show image\n",
        "from IPython.display import Image\n",
        "Image('plots_pt/training_mem_fp16.png')"
      ],
      "execution_count": 21,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "2020-06-26 12:11:47.558303: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAoAAAAHgCAYAAAA10dzkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdd1gUV9sH4N+ywNJBigIWiiJFFAyGCKLYSewdNV8Qe40aS0QTFayYKGqwYTSgqFEsQUxiS9RXo6hIlGhALAErWEBQKVL2+f7g3XkZl6ogRJ77uvbSPXPmzJm6D2fOnJEQEYExxhhjjNUZKjVdAcYYY4wx9m5xAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVsdwAMgYY4wxVse8swDw5cuXGDNmDExNTSGRSDB9+vRqXZ6lpSV8fX3feP5vv/0W1tbWkEqlcHZ2rrqK/QtZWlqiV69eNV2NUkkkEkyZMqWmq1Hr8XaqvI4dO8LR0bHcfMnJyZBIJAgLCys3r6+vLywtLd++ctWgouv7b+Xv7w+JRIKnT5/WdFUEfDywmvLOAsBly5YhLCwMEydORHh4OD777LN3tehKO3bsGL788ku0a9cOoaGhWLZsWU1Xqc6Ij4+Hv78/kpOTa7oq7C2cO3cO/v7+yMjIqFD+U6dOQSKRlPpZunSp0jy//fYbOnfuDH19fejq6sLFxQV79uwR5cnNzcXy5cvh4OAALS0tNGzYEIMHD8bff/9dJevJ3tzDhw/h7++PK1euVGq+iIgItG3bFgYGBjAyMoKnpyd++eWXaqole1fe5Hi4ePEiJk2aBBcXF6ipqUEikZSZf+vWrbC3t4eGhgZsbGwQHBxcYr4HDx5gyJAhMDAwgJ6eHvr27Yt//vnnnZX5rqi+qwWdOHECbdu2xcKFC9/J8hITE6Gi8mbx7YkTJ6CiooKtW7dCXV29imvGyhIfH4+AgAB07Nix1v5VzMp37tw5BAQEwNfXFwYGBuXmt7e3R3h4uFJ6eHg4jh07hu7du4vSQ0NDMXr0aHTr1g3Lli2DVCpFYmIi7t27J8r36aefIioqCmPHjsUHH3yAhw8fYv369XBzc8PVq1dhYWHxdiv6XxYWFsjJyYGamlqVlFcXPHz4EAEBAbC0tKzwXZbg4GBMnToVPXv2RGBgIHJzcxEWFoZevXph//79GDBgQDXXmlWXNzkefv31V2zZsgWtWrWCtbU1bty4UWrekJAQTJgwAQMHDsSMGTNw5swZTJ06FdnZ2ZgzZ46Q7+XLl+jUqRMyMzMxb948qKmpYfXq1fD09MSVK1dgZGRUrWW+U/SOWFlZUc+ePausvPz8fHr16lWVlVfcyJEjSVtbu0rLzMrKqtLy3iULC4sq3XclycnJocLCQtq7dy8BoJMnT1Z4XgA0efLk6qvce+JdbKeXL18SEdG3335LACgpKemtymvWrBnZ2NiI0pKSkkhTU5OmTp1a5rz3798nADRr1ixR+okTJwgABQUFlbt8T09PatGiReUrXoYRI0aQhYVFlZZZVapjfV+nuHbHxMQQAAoNDa3wvDY2NvThhx+SXC4X0jIzM0lHR4f69OlT7vwLFy4kAPTkyZM3qXq14OPhzY+H1NRUys7OJiKiyZMnU2khTXZ2NhkZGSn9jn366aekra1N6enpQtqKFSsIAF28eFFIS0hIIKlUSnPnzq3WMt81lbS0NHz22WfQ09ODgYEBRowYgbi4uBL7s1y/fh2DBg2CoaEhNDQ00KZNG0RFRZUZYCpu7SQlJeGXX34RbukobvE9fvwYo0ePRoMGDaChoQEnJyds27ZNVIaif83KlSuxZs0aNG3aFDKZDPHx8aUu9/U+gGFhYZBIJDh79ixmzJgBExMTaGtro3///njy5ImQTyKRIDQ0FFlZWUJdi2+HHTt2wMXFBZqamjA0NMTQoUOVWh0U/SZiY2PRoUMHaGlpYd68eQCAV69eYeHChWjWrBlkMhkaN26ML7/8Eq9evRKVoeivFRkZCUdHR8hkMrRo0QJHjhxRWtcHDx5g9OjRMDc3h0wmg5WVFSZOnIi8vDwhT0ZGBqZPn47GjRtDJpOhWbNmWLFiBeRyeanb8HXHjh2Ds7MzNDQ04ODggAMHDoimp6enY9asWWjZsiV0dHSgp6eHTz75BHFxcaJ8imNi9+7d+Prrr9GwYUNoaWnhu+++w+DBgwEAnTp1Erb/qVOnKlS/nTt3wtbWFhoaGnBxccHp06dF0+/cuYNJkybB1tYWmpqaMDIywuDBg5VuN+fn5yMgIAA2NjbQ0NCAkZERPDw8cPz4cVG+Nzkf8vPzYWhoiJEjRypNe/78OTQ0NDBr1iwhLTg4GC1atICWlhbq1auHNm3aYNeuXRXaHqWpqu2kOKf+85//YNKkSahfvz4aNWoEf39/zJ49GwBgZWWldM5X1MWLF3Hr1i18+umnovRNmzahsLAQixYtAlD01zURKc3/4sULAECDBg1E6WZmZgAATU3NCtclPj4enTp1Em4jf/PNN6LppfUBVJy/GhoacHR0xE8//VSh5fXq1QvW1tYlTnNzc0ObNm2E78ePH4eHhwcMDAygo6MDW1tb4XrzpmJjY+Hu7g5NTU1YWVlh06ZNoul5eXlYsGABXFxcoK+vD21tbbRv3x4nT54U5Svt2r1hwwZ8+OGHAICRI0eWeK0tyfPnz1G/fn3RrT49PT3o6OhUan9mZGQIrdP6+voYOXIksrOzlfJV5Hp/5swZDB48GE2aNBGu6V988QVycnKUyuPjoWqPhwYNGlRov588eRJpaWmYNGmSKH3y5MnIysoSdSHYt28fPvzwQ6E+AGBnZ4cuXbogIiKiWsssSWX2fUFBARYvXixsV0tLS8ybN08pvgCAw4cPA25ubiSVSmnKlCm0bt066tatGzk5OSlF4teuXSN9fX1ycHCgFStW0Lp166hDhw4kkUjowIEDpUaYqampFB4eTsbGxuTs7Ezh4eEUHh5OL1++pOzsbLK3tyc1NTX64osv6LvvvqP27dsTAFqzZo1QRlJSEgEgBwcHsra2psDAQFq9ejXduXOn1OVaWFjQiBEjhO+hoaEEgFq3bk2dO3em4OBgmjlzJkmlUhoyZIiQLzw8nNq3b08ymUyo6+3bt4mIaMmSJSSRSMjb25s2bNhAAQEBZGxsTJaWlvTs2TOhDE9PTzI1NSUTExP6/PPPKSQkhCIjI6mwsJC6d+9OWlpaNH36dAoJCaEpU6aQqqoq9e3bV1R/AOTk5ERmZma0ePFiWrNmDVlbW5OWlhY9ffpUyPfgwQMyNzcXyty0aRPNnz+f7O3thTplZWVRq1atyMjIiObNm0ebNm0iHx8fkkgkNG3atFK3YfFt2bx5czIwMCA/Pz8KCgqili1bkoqKCh07dkzIFxMTQ02bNiU/Pz8KCQmhRYsWUcOGDUlfX58ePHgg5Dt58qSwP52dnSkoKIiWL19Of//9N02dOpUA0Lx584Ttn5qaWmb9AJCjoyMZGxvTokWLaMWKFWRhYUGampp09epVId/evXvJycmJFixYQJs3b6Z58+ZRvXr1yMLCQtRCO2/ePJJIJDR27Fj6/vvvadWqVTRs2DAKDAwU8rzp+UBENGrUKDIwMFBqwd62bRsBoJiYGCIi2rx5MwGgQYMGUUhICK1du5ZGjx5dbsvXu9pOinPKwcGBPD09KTg4mAIDAykuL
            "text/plain": [
              "<IPython.core.display.Image object>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 21
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "5xTuRPBCx-dw",
        "colab_type": "text"
      },
      "source": [
        "As expected the model of the baseline config requires the most memory. \n",
        "\n",
        "It is interesting to see that the \"bart-8-head\" model initially requires more memory than `bart-10000-voc`, but then clearly outperforms `bart-10000-voc` at an input length of 512. \n",
        "Less surprising is that the \"bart-8-lay\" is by far the most memory-efficient model when reminding oneself that during the forward pass every layer has to store its activations for the backward pass.\n",
        "\n",
        "Alright, given the data above, let's say we narrow our candidates down to only the \"bart-8-head\" and \"bart-8-lay\" models. \n",
        " \n",
        "Let's compare these models again on training time."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "c9xSoCUZ0Hlz",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 269
        },
        "outputId": "7054af8a-3050-4aca-f503-e229ed365cb0"
      },
      "source": [
        "# define args\n",
        "args = PyTorchBenchmarkArguments(models=[\"bart-8-head\", \"bart-8-lay\"], \n",
        "                                 no_inference=True,\n",
        "                                 training=True,\n",
        "                                 no_memory=True,\n",
        "                                 train_time_csv_file=\"plots_pt/training_speed_fp16.csv\", \n",
        "                                 save_to_csv=True, \n",
        "                                 env_info_csv_file=\"plots_pt/env.csv\",\n",
        "                                 sequence_lengths=[32, 128, 512],\n",
        "                                 batch_sizes=[8],\n",
        "                                 no_env_print=True,\n",
        "                                 repeat=1, # to make speed measurement faster but less accurate\n",
        "                                 no_multi_process=True,  # google colab has problems with multi processing\n",
        "                                 fp16=True\n",
        "                                 )\n",
        "\n",
        "# create benchmark\n",
        "benchmark = PyTorchBenchmark(configs=[config_8_heads, config_8_layers], args=args)\n",
        "\n",
        "# run benchmark\n",
        "result = benchmark.run()"
      ],
      "execution_count": 22,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "1 / 2\n",
            "2 / 2\n",
            "\n",
            "====================        TRAIN - SPEED - RESULTS         ====================\n",
            "--------------------------------------------------------------------------------\n",
            "          Model Name             Batch Size     Seq Length     Time in s   \n",
            "--------------------------------------------------------------------------------\n",
            "         bart-8-head                 8               32            0.127     \n",
            "         bart-8-head                 8              128            0.398     \n",
            "         bart-8-head                 8              512            1.567     \n",
            "          bart-8-lay                 8               32            0.088     \n",
            "          bart-8-lay                 8              128            0.284     \n",
            "          bart-8-lay                 8              512            1.153     \n",
            "--------------------------------------------------------------------------------\n",
            "Saving results to csv.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "UseFqLiuRQuX",
        "colab_type": "text"
      },
      "source": [
        "The option `no_multi_process` disabled multi-processing here. This option should in general only be used for testing or debugging. Enabling multi-processing is crucial to ensure accurate memory consumption measurement, but is less important when only measuring speed. The main reason it is disabled here is that google colab sometimes raises \"CUDA initialization\" due to the notebook's environment. \n",
        "This problem does not arise when running benchmarks outside of a notebook.\n",
        "\n",
        "Alright, let's plot the last speed results as well."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "8c6fjmWLU0Rx",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 514
        },
        "outputId": "8a4b4db7-abed-47c4-da61-c3b1ccae66f1"
      },
      "source": [
        "# plot graph and save as image\n",
        "!python plot_csv_file.py --csv_file plots_pt/training_speed_fp16.csv --figure_png_file=plots_pt/training_speed_fp16.png --no_log_scale --is_time\n",
        "\n",
        "# show image\n",
        "from IPython.display import Image\n",
        "Image('plots_pt/training_speed_fp16.png')"
      ],
      "execution_count": 23,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "2020-06-26 12:13:17.849561: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcudart.so.10.1\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "execute_result",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAoAAAAHgCAYAAAA10dzkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAPYQAAD2EBqD+naQAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdeVxU1fvA8c8w7LsEKCiCGiFgKYG75r6QopYLLt/E1NTSLMtMc0VLzQwXilwqF3LJvTL31LTFzDXNXUFMUXMDAWWb8/uDH5MjoKjgIPO8X6956T1z7r3PXZh55txzz9UopRRCCCGEEMJkmBk7ACGEEEII8XhJAiiEEEIIYWIkARRCCCGEMDGSAAohhBBCmBhJAIUQQgghTIwkgEIIIYQQJkYSQCGEEEIIEyMJoBBCCCGEiZEEUAghhBDCxEgCKIQQQghhYiQBFEIIIYQwMZIACiGEEEKYGEkAhRBCCCFMjCSAQgghhBAmRhJAIYQQQggTIwmgEEIIIYSJkQRQCCGEEMLESAIohBBCCGFiJAEUQgghhDAxkgAKIYQQQpgYSQCFEEIIIUyMJIBCCCGEECZGEkAhhBBCCBMjCaAQQgghhImRBFAIIYQQwsRIAiiEEEIIYWIkARRCCCGEMDGSAAohhBBCmBhJAIUQQgghTIwkgEIIIYQQJkYSQCGEEEIIEyMJoBBCCCGEiZEEUAghhBDCxEgCKIQQQghhYiQBNFG9evXCx8fH2GGYjKysLIYNG4aXlxdmZmZ06NChWNfXuHFjGjdu/NDzx8bGUrVqVSwsLHB2di66wJ5AjRs3plq1asYOo9iMGzcOjUbDlStXjB2KXkn+fCrt58P8+fPRaDTEx8cbOxRRzCQBLEU0Gk2hXtu3bzd2qCbn66+/5pNPPqFTp04sWLCAIUOGGDukAh07doxevXpRpUoV5s6dy5w5c4wdksm4cOEC48aN48CBAw8037Jly6hTpw7Ozs489dRTNGrUiB9//LGYohSPi5wPojiZGzsAUXRiY2MNphcuXMjmzZvzlPv7+zN37lx0Ot3jDM+kbd26lfLlyzNt2rTHsr5NmzY99Lzbt29Hp9MxY8YMnn766SKMStzPhQsXiIyMxMfHhxo1ahRqnujoaAYPHkybNm2YPHkyt2/fZv78+bRt25aVK1fy8ssvF3PUorjI+SCKkySApcj//vc/g+ldu3axefPmPOXi8bt8+XKRXkrV6XRkZGRgbW2d7/uWlpYPvezLly8DFGm8qamp2NnZFdnySpusrKyH/kEWHR1NzZo1+eGHH9BoNAD07t2b8uXLs2DBAvnCfwLJ+SAeB7kEbKLu7mMTHx+PRqNh6tSpfP7551SuXBlbW1tatmzJuXPnUEoxYcIEKlSogI2NDe3bt+fatWt5lrt+/XoaNmyInZ0dDg4OtGnThr///vu+8eT2Q7pbfv1R9uzZQ6tWrXB1dcXGxoZKlSrRu3dvg/mmTp1KvXr1eOqpp7CxsSE4OJgVK1bkWf6tW7cYPHgwrq6uODg40K5dO86fP49Go2HcuHEGdc+fP0/v3r0pW7YsVlZWBAYG8vXXX99zu3L367Zt2/j777/zXIZPTU3l3XffxcvLCysrK/z8/Jg6dSpKKYPlaDQaBg0axKJFiwgMDMTKyooNGzYUuN67+wBu374djUbDsmXL+Oijj6hQoQLW1tY0a9aMU6dO6ev5+PgwduxYANzc3PLsh8Ic3169emFvb8/p06d58cUXcXBwoEePHkBO4jp9+nQCAwOxtrambNmy9O/fn+vXrxssw8fHh7Zt2/LLL79Qq1YtrK2tqVy5MgsXLsyzrTdu3GDIkCH4+PhgZWVFhQoV6Nmzp0GftvT0dMaOHcvTTz+NlZUVXl5eDBs2jPT09AL34d327t1LvXr19OfcrFmzDN7PyMhgzJgxBAcH4+TkhJ2dHQ0bNmTbtm0G9e78W5s+fTpVqlTBysqKmJgYatasCcCrr76qP1fmz59/z7iSk5Nxd3c3+PtxdHTE3t4eGxubQm/fjRs36NWrF87Ozjg5OfHqq6+SlpaWp94333xDcHAwNjY2uLi40LVrV86dO2dQZ+fOnXTu3JmKFSvq9/eQIUO4detWnuWtWbOGatWqYW1tTbVq1Vi9enWh4m3bti2VK1fO9726desSEhKin968eTMNGjTA2dkZe3t7/Pz8+OCDDwq1noKU9vPhTt999x1t2rTB09MTKysrqlSpwoQJE8jOztbXGTt2LBYWFvz777955u/Xrx/Ozs7cvn37odYviokSpdbAgQNVQYc4IiJCeXt766fj4uIUoGrUqKECAgJUVFSUGjVqlLK0tFR16tRRH3zwgapXr56aOXOmGjx4sNJoNOrVV181WObChQuVRqNRrVu3VtHR0erjjz9WPj4+ytnZWcXFxd0z1rFjx+Yb67x58xSgn//SpUuqTJky6plnnlGffPKJmjt3rho5cqTy9/c3mK9ChQrqjTfeUJ999pmKiopStWrVUoBau3atQb0uXbooQL3yyivq888/V126dFHVq1dXgBo7dqy+3sWLF1WFChWUl5eXGj9+vPriiy9Uu3btFKCmTZtW4HalpKSo2NhYVbVqVVWhQgUVGxurYmNj1cWLF5VOp1NNmzZVGo1G9e3bV3322WcqLCxMAertt982WA6g/P39lZubm4qMjFSff/652r9/f4HrbdSokWrUqJF+etu2bQpQQUFBKjg4WE2bNk2NGzdO2draqlq1aunrrV69Wr300ksKUF988YWKjY1VBw8eVEoV/vhGREQoKysrVaVKFRUREaFmzZqlFi5cqJRSqm/fvsrc3Fy99tpratasWer9999XdnZ2qmbNmiojI0O/DG9vb+Xn56fKli2rPvjgA/XZZ5+p559/Xmk0GnX48GF9vZs3b6pq1aoprVarXnvtNfXFF1+oCRMmqJo1a+r3T3Z2tmrZsqWytbVVb7/9tpo9e7YaNGiQMjc3V+3bty9wH965Lz09PZW7u7saNGiQmjlzpmrQoIEC1FdffaWv9++//yoPDw/1zjvvqC+++EJNmTJF+fn5KQsLC4Njlfu3FhAQoCpXrqwmT56spk2bpuLj49X48eMVoPr166c/V06fPn3P+MLDw5VWq1UzZ85UcXFx6ujRo+qNN95QNjY26rfffrvv9uX+7QUFBamXX35ZxcTEqL59+ypADRs2zKDuhx9+qDQajQoPD1cxMTEqMjJSubq6Kh8fH3X9+nV9vTfffFO9+OKLauLEiWr27NmqT58+SqvVqk6dOhksb+PGjcrMzExVq1ZNRUVFqZEjRyonJycVGBho8PmUn4ULFypA7d6926A8Pj5eAeqTTz5RSil1+PBhZWlpqUJCQtSMGTPUrFmz1NChQ9ULL7xw332Tn9J+Ptz9mauUUh06dFBdunRRn3zyifriiy9U586dFaCGDh2qr3Py5EkFqOjoaIPlpaenqzJlyqjevXvfd93i8ZIEsBR7mATQzc1N3bhxQ18+YsQIBajq1aurzMxMfXm3bt2UpaWlun37tlIq54vY2dlZvfbaawbruXjxonJycspTfrfCJoCrV69WgPrzzz/vuby0tDSD6YyMDFWtWjXVtGlTfdnevXvzTbZ69eqVJwHs06eP8vDwUFeuXDGo27VrV+Xk5JRnfXdr1KiRCgwMNChbs2aNAtSHH35oUN6pUyel0WjUqVOn9GWAMjMzU3///fc913Pn+vJLAP39/VV6erq+fMaMGQpQhw4d0pflHot///1XX/YgxzciIkIBavjw4QZ1d+7cqQC1aNEig/INGzbkKff29laA2rFjh77s8uXLysrKSr377rv6sjFjxihArVq1Ks8+0Ol0SimlYmNjlZmZmdq5c6fB+7NmzVKA+vXXX/PMe6dGjRopQH366af6svT0dFWjRg3l7u6uT1yzsrIM9q1SSl2/fl2VLVvW4Msv92/N0dFRXb582aD+n3/+qQA1b968e8Z0p0uXLqlmzZopQP9ydXUt1Je9Uv8d77u/oF966SX11FNP6afj4+OVVqtVH330kUG9Q4cOK
            "text/plain": [
              "<IPython.core.display.Image object>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 23
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "b6T7I4lnVCpk",
        "colab_type": "text"
      },
      "source": [
        "Unsurprisingly, \"bart-8-lay\" is faster than \"bart-8-head\" by a factor of ca. 1.3. It might very well be that reducing the layers by a factor of 2 leads to much more performance degradation than reducing the number of heads by a factor of 2.\n",
        "For more information on computational efficient Bart models, check out the new *distilbart* model [here](https://huggingface.co/models?search=distilbart)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "S4cG0NwfNugm",
        "colab_type": "text"
      },
      "source": [
        "Alright, that's it! Now you should be able to benchmark your favorite models on your favorite configurations. \n",
        "\n",
        "Transparency for the computational cost of a model is becoming more and more important. Feel free to share your results with the community on a shared spreadsheet or by tweeting us @huggingface 🤗."
      ]
    }
  ]
}