huggingface · marcindulak · Dec 19, 2024 · Dec 20, 2024 · echarlaix · Dec 20, 2024
diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx
@@ -47,6 +47,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra
 - ESM
 - Falcon
 - Flaubert
+- GIT
 - GPT-2
 - GPT-BigCode
 - GPT-J

diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
@@ -2621,3 +2621,23 @@ class EncoderDecoderOnnxConfig(EncoderDecoderBaseOnnxConfig):
     NORMALIZED_CONFIG_CLASS = NormalizedEncoderDecoderConfig
 
     DEFAULT_ONNX_OPSET = 14  # uses SDPA in Transformers, hence opset>=14.
+
+
+class GITOnnxConfig(VisionOnnxConfig):
+    NORMALIZED_CONFIG_CLASS = NormalizedVisionConfig
-    NORMALIZED_CONFIG_CLASS = NormalizedVisionConfig
+    NormalizedTextAndVisionConfig.with_args(vision_config="vision_config")
 if normalized_config.has_attribute("image_size"): 
-    NORMALIZED_CONFIG_CLASS = NormalizedVisionConfig
+    NormalizedTextAndVisionConfig.with_args(vision_config="vision_config")
 if normalized_config.has_attribute("image_size"): 
+    DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, DummyVisionInputGenerator)    
+
+    @property
+    def inputs(self) -> Dict[str, Dict[int, str]]:
+        return {
+            "input_ids": {0: "text_batch_size", 1: "sequence_length"},
+            "pixel_values": {0: "image_batch_size", 1: "num_channels", 2: "height", 3: "width"}
+        }
+
+
+class GITVisionModelOnnxConfig(VisionOnnxConfig):
+    NORMALIZED_CONFIG_CLASS = NormalizedVisionConfig
+
+    @property
+    def inputs(self) -> Dict[str, Dict[int, str]]:
+        return {"pixel_values": {0: "batch_size", 1: "num_channels", 2: "height", 3: "width"}}
-class GITVisionModelOnnxConfig(VisionOnnxConfig):
-    NORMALIZED_CONFIG_CLASS = NormalizedVisionConfig
-
-    @property
-    def inputs(self) -> Dict[str, Dict[int, str]]:
-        return {"pixel_values": {0: "batch_size", 1: "num_channels", 2: "height", 3: "width"}}
-class GITVisionModelOnnxConfig(VisionOnnxConfig):
-    NORMALIZED_CONFIG_CLASS = NormalizedVisionConfig
-
-    @property
-    def inputs(self) -> Dict[str, Dict[int, str]]:
-        return {"pixel_values": {0: "batch_size", 1: "num_channels", 2: "height", 3: "width"}}
diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
@@ -692,6 +692,17 @@ class TasksManager:
             "text-classification",
             onnx="GemmaOnnxConfig",
         ),
+        "git": supported_tasks_mapping(
+            "feature-extraction",
+            "image-text-to-text",
 _TRANSFORMERS_TASKS_TO_MODEL_LOADERS = { 
 _TRANSFORMERS_TASKS_TO_MODEL_LOADERS = { 
+            "image-to-text",
+            onnx="GITOnnxConfig",
+        ),
+        "git-vision-model": supported_tasks_mapping(
+            "feature-extraction",
+            "image-to-text",
+            onnx="GITVisionModelOnnxConfig",
+        ),        
-        "git-vision-model": supported_tasks_mapping(
-            "feature-extraction",
-            "image-to-text",
-            onnx="GITVisionModelOnnxConfig",
-        ),        
-        "git-vision-model": supported_tasks_mapping(
-            "feature-extraction",
-            "image-to-text",
-            onnx="GITVisionModelOnnxConfig",
-        ),        
         "glpn": supported_tasks_mapping(
             "feature-extraction",
             "depth-estimation",

diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
@@ -96,6 +96,14 @@
     },
     "flaubert": "hf-internal-testing/tiny-random-flaubert",
     "gemma": "fxmarty/tiny-random-GemmaForCausalLM",
+    "git": {
+        "hf-internal-testing/tiny-random-GitModel": [
+            "feature-extraction",
+        ],
+        "hf-internal-testing/tiny-random-GitForCausalLM": [
+            "image-text-to-text",
+        ],
+    },
     "glpn": "hf-internal-testing/tiny-random-GLPNModel",
     "gpt2": "hf-internal-testing/tiny-random-gpt2",
     "gpt-bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",

diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py
@@ -101,6 +101,14 @@
     "flaubert": "hf-internal-testing/tiny-random-flaubert",
     "flux": "optimum-internal-testing/tiny-random-flux",
     "gemma": "fxmarty/tiny-random-GemmaForCausalLM",
+    "git": {
+        "hf-internal-testing/tiny-random-GitModel": [
+            "feature-extraction",
+        ],
+        "hf-internal-testing/tiny-random-GitForCausalLM": [
+            "image-text-to-text",
+        ],
+    },
     "gpt2": "hf-internal-testing/tiny-random-gpt2",
     "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel",
     "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel",
-Original file line number
+Diff line change
@@ Expand Up @@
     - ESM
     - Falcon
     - Flaubert
+    - GIT
     - GPT-2
     - GPT-BigCode
     - GPT-J
@@ Expand Down @@