optimum-neuron/tests/inference/inference_utils.py at v0.0.18 · huggingface/optimum-neuron · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# coding=utf-8
# Copyright 2022 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import shutil
import tempfile
import unittest
from io import BytesIO
from typing import Dict

import huggingface_hub
import requests
from PIL import Image
from transformers import set_seed


SEED = 42


MODEL_NAMES = {
    "albert": "hf-internal-testing/tiny-random-AlbertModel",
    "bert": "hf-internal-testing/tiny-random-BertModel",
    "camembert": "hf-internal-testing/tiny-random-camembert",
    "convbert": "hf-internal-testing/tiny-random-ConvBertModel",
    "deberta": "hf-internal-testing/tiny-random-DebertaModel",  # Failed for INF1: 'XSoftmax'
    "deberta-v2": "hf-internal-testing/tiny-random-DebertaV2Model",  # Failed for INF1: 'XSoftmax'
    "distilbert": "hf-internal-testing/tiny-random-DistilBertModel",
    "electra": "hf-internal-testing/tiny-random-ElectraModel",
    "flaubert": "flaubert/flaubert_small_cased",
    "gpt2": "hf-internal-testing/tiny-random-gpt2",
    "latent-consistency": "echarlaix/tiny-random-latent-consistency",
    "mobilebert": "hf-internal-testing/tiny-random-MobileBertModel",
    "mpnet": "hf-internal-testing/tiny-random-MPNetModel",
    "roberta": "hf-internal-testing/tiny-random-RobertaModel",
    "roformer": "hf-internal-testing/tiny-random-RoFormerModel",
    "sentence-transformers-transformer": "BAAI/bge-small-en-v1.5",
    "stable-diffusion": "hf-internal-testing/tiny-stable-diffusion-torch",
    "stable-diffusion-xl": "echarlaix/tiny-random-stable-diffusion-xl",
    "xlm": "hf-internal-testing/tiny-random-XLMModel",
    "xlm-roberta": "hf-internal-testing/tiny-xlm-roberta",
}


class NeuronModelIntegrationTestMixin(unittest.TestCase):
    USER = "optimum"
    MODEL_ID = None
    NEURON_MODEL_REPO = None
    NEURON_MODEL_CLASS = None
    STATIC_INPUTS_SHAPES = {}

    @classmethod
    def setUpClass(cls):
        cls._token = huggingface_hub.get_token()

        model_name = cls.MODEL_ID.split("/")[-1]
        model_dir = tempfile.mkdtemp(prefix=f"{model_name}_")

        # Export model to local path
        neuron_model = cls.NEURON_MODEL_CLASS.from_pretrained(cls.MODEL_ID, export=True, **cls.STATIC_INPUTS_SHAPES)
        neuron_model.save_pretrained(model_dir)
        cls.local_model_path = model_dir

        # Upload to the hub
        cls.neuron_model_id = f"{cls.USER}/{cls.NEURON_MODEL_REPO}"
        neuron_model.push_to_hub(model_dir, repository_id=cls.neuron_model_id, use_auth_token=cls._token)

    @classmethod
    def tearDownClass(cls):
        if cls.local_model_path is not None:
            shutil.rmtree(cls.local_model_path)


class NeuronModelTestMixin(unittest.TestCase):
    ARCH_MODEL_MAP = {}
    STATIC_INPUTS_SHAPES = {"batch_size": 1, "sequence_length": 32}

    @classmethod
    def setUpClass(cls):
        cls.neuron_model_dirs = {}

    def _setup(self, model_args: Dict):
        """
        Exports the PyTorch models to Neuron models ahead of time to avoid multiple exports during the tests.
        We don't use unittest setUpClass, in order to still be able to run individual tests.
        """
        model_arch = model_args["model_arch"]
        model_arch_and_params = model_args["test_name"]
        dynamic_batch_size = model_args.get("dynamic_batch_size", False)

        if model_arch_and_params not in self.neuron_model_dirs:
            # model_args will contain kwargs to pass to NeuronBaseModel.from_pretrained()
            model_args.pop("test_name")
            model_args.pop("model_arch")
            model_args.pop("dynamic_batch_size", None)

            model_id = (
                self.ARCH_MODEL_MAP[model_arch] if model_arch in self.ARCH_MODEL_MAP else MODEL_NAMES[model_arch]
            )
            set_seed(SEED)
            neuron_model = self.NEURON_MODEL_CLASS.from_pretrained(
                model_id, **model_args, export=True, dynamic_batch_size=dynamic_batch_size, **self.STATIC_INPUTS_SHAPES
            )

            model_dir = tempfile.mkdtemp(prefix=f"{model_arch_and_params}_{self.TASK}_")
            neuron_model.save_pretrained(model_dir)
            self.neuron_model_dirs[model_arch_and_params] = model_dir

    @classmethod
    def tearDownClass(cls):
        for _, dir_path in cls.neuron_model_dirs.items():
            shutil.rmtree(dir_path)


def download_image(url):
    response = requests.get(url)
    return Image.open(BytesIO(response.content)).convert("RGB")