make quantized model tests better (#3495)

pytorch · May 3, 2024 · 25b6352 · 25b6352
1 parent 4bd45c3
commit 25b6352
Show file tree

Hide file tree

Showing 11 changed files with 114 additions and 13 deletions.
diff --git a/backends/xnnpack/test/models/deeplab_v3.py b/backends/xnnpack/test/models/deeplab_v3.py
@@ -25,7 +25,7 @@ def forward(self, *args):
 class TestDeepLabV3(unittest.TestCase):
     dl3 = DL3Wrapper()
     dl3 = dl3.eval()
-    model_inputs = (torch.ones(1, 3, 224, 224),)
+    model_inputs = (torch.randn(1, 3, 224, 224),)
 
     def test_fp32_dl3(self):
 

diff --git a/backends/xnnpack/test/models/edsr.py b/backends/xnnpack/test/models/edsr.py
@@ -15,7 +15,7 @@
 
 class TestEDSR(unittest.TestCase):
     edsr = edsr_r16f64(2, False).eval()  # noqa
-    model_inputs = (torch.ones(1, 3, 224, 224),)
+    model_inputs = (torch.randn(1, 3, 224, 224),)
 
     def test_fp32_edsr(self):
         (
@@ -28,7 +28,21 @@ def test_fp32_edsr(self):
             .run_method_and_compare_outputs()
         )
 
+    @unittest.skip("T187799178: Debugging Numerical Issues with Calibration")
     def test_qs8_edsr(self):
+        (
+            Tester(self.edsr, self.model_inputs)
+            .quantize()
+            .export()
+            .to_edge()
+            .partition()
+            .to_executorch()
+            .serialize()
+            .run_method_and_compare_outputs()
+        )
+
+    # TODO: Delete and only used calibrated test after T187799178
+    def test_qs8_edsr_no_calibrate(self):
         (
             Tester(self.edsr, self.model_inputs)
             .quantize(Quantize(calibrate=False))

diff --git a/backends/xnnpack/test/models/emformer_rnnt.py b/backends/xnnpack/test/models/emformer_rnnt.py
@@ -57,7 +57,9 @@ def get_example_inputs(self):
             )
             return predict_inputs
 
-    @unittest.skip("T183426271")
+    @unittest.skip(
+        "T183426271: Emformer Predictor Takes too long to export + partition"
+    )
     def test_fp32_emformer_predictor(self):
         predictor = self.Predictor()
         (

diff --git a/backends/xnnpack/test/models/inception_v3.py b/backends/xnnpack/test/models/inception_v3.py
@@ -15,7 +15,7 @@
 class TestInceptionV3(unittest.TestCase):
     # pyre-ignore
     ic3 = models.inception_v3(weights="IMAGENET1K_V1").eval()  # noqa
-    model_inputs = (torch.ones(1, 3, 224, 224),)
+    model_inputs = (torch.randn(1, 3, 224, 224),)
 
     all_operators = {
         "executorch_exir_dialects_edge__ops_aten_addmm_default",
@@ -45,12 +45,34 @@ def test_fp32_ic3(self):
             .run_method_and_compare_outputs()
         )
 
+    @unittest.skip("T187799178: Debugging Numerical Issues with Calibration")
     def test_qs8_ic3(self):
         # Quantization fuses away batchnorm, so it is no longer in the graph
         ops_after_quantization = self.all_operators - {
             "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
         }
 
+        (
+            Tester(self.ic3, self.model_inputs)
+            .quantize()
+            .export()
+            .to_edge()
+            .check(list(ops_after_quantization))
+            .partition()
+            .check(["torch.ops.higher_order.executorch_call_delegate"])
+            .check_not(list(ops_after_quantization))
+            .to_executorch()
+            .serialize()
+            .run_method_and_compare_outputs()
+        )
+
+    # TODO: Delete and only used calibrated test after T187799178
+    def test_qs8_ic3_no_calibration(self):
+        # Quantization fuses away batchnorm, so it is no longer in the graph
+        ops_after_quantization = self.all_operators - {
+            "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
+        }
+
         (
             Tester(self.ic3, self.model_inputs)
             .quantize(Quantize(calibrate=False))

diff --git a/backends/xnnpack/test/models/inception_v4.py b/backends/xnnpack/test/models/inception_v4.py
@@ -13,7 +13,7 @@
 
 class TestInceptionV4(unittest.TestCase):
     ic4 = inception_v4(pretrained=False).eval()
-    model_inputs = (torch.ones(3, 299, 299).unsqueeze(0),)
+    model_inputs = (torch.randn(3, 299, 299).unsqueeze(0),)
 
     all_operators = {
         "executorch_exir_dialects_edge__ops_aten_addmm_default",

diff --git a/backends/xnnpack/test/models/llama2_et_example.py b/backends/xnnpack/test/models/llama2_et_example.py
@@ -45,5 +45,5 @@ def _test(self, dtype: torch.dtype = torch.float):
             .dump_artifact()
             .to_executorch()
             .serialize()
-            .run_method_and_compare_outputs(atol=5e-2)
+            .run_method_and_compare_outputs(atol=5e-2, inputs=example_inputs)
         )
diff --git a/backends/xnnpack/test/models/mobilebert.py b/backends/xnnpack/test/models/mobilebert.py
@@ -38,5 +38,5 @@ def test_fp32_mobilebert(self):
             .check_not(list(self.supported_ops))
             .to_executorch()
             .serialize()
-            .run_method_and_compare_outputs()
+            .run_method_and_compare_outputs(inputs=self.example_inputs)
         )
diff --git a/backends/xnnpack/test/models/mobilenet_v2.py b/backends/xnnpack/test/models/mobilenet_v2.py
@@ -16,7 +16,7 @@
 class TestMobileNetV2(unittest.TestCase):
     mv2 = models.mobilenetv2.mobilenet_v2(weights=MobileNet_V2_Weights)
     mv2 = mv2.eval()
-    model_inputs = (torch.ones(1, 3, 224, 224),)
+    model_inputs = (torch.randn(1, 3, 224, 224),)
 
     all_operators = {
         "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
@@ -49,6 +49,7 @@ def test_fp32_mv2(self):
             .run_method_and_compare_outputs(num_runs=10)
         )
 
+    @unittest.skip("T187799178: Debugging Numerical Issues with Calibration")
     def test_qs8_mv2(self):
         # Quantization fuses away batchnorm, so it is no longer in the graph
         ops_after_quantization = self.all_operators - {
@@ -62,6 +63,34 @@ def test_qs8_mv2(self):
             },
         )
 
+        (
+            Tester(self.mv2, self.model_inputs, dynamic_shapes=dynamic_shapes)
+            .quantize()
+            .export()
+            .to_edge()
+            .check(list(ops_after_quantization))
+            .partition()
+            .check(["torch.ops.higher_order.executorch_call_delegate"])
+            .check_not(list(ops_after_quantization))
+            .to_executorch()
+            .serialize()
+            .run_method_and_compare_outputs(num_runs=10)
+        )
+
+    # TODO: Delete and only used calibrated test after T187799178
+    def test_qs8_mv2_no_calibration(self):
+        # Quantization fuses away batchnorm, so it is no longer in the graph
+        ops_after_quantization = self.all_operators - {
+            "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
+        }
+
+        dynamic_shapes = (
+            {
+                2: torch.export.Dim("height", min=224, max=455),
+                3: torch.export.Dim("width", min=224, max=455),
+            },
+        )
+
         (
             Tester(self.mv2, self.model_inputs, dynamic_shapes=dynamic_shapes)
             .quantize(Quantize(calibrate=False))

diff --git a/backends/xnnpack/test/models/mobilenet_v3.py b/backends/xnnpack/test/models/mobilenet_v3.py
@@ -15,7 +15,7 @@
 class TestMobileNetV3(unittest.TestCase):
     mv3 = models.mobilenetv3.mobilenet_v3_small(pretrained=True)
     mv3 = mv3.eval()
-    model_inputs = (torch.ones(1, 3, 224, 224),)
+    model_inputs = (torch.randn(1, 3, 224, 224),)
     dynamic_shapes = (
         {
             2: torch.export.Dim("height", min=224, max=455),
@@ -51,12 +51,34 @@ def test_fp32_mv3(self):
             .run_method_and_compare_outputs(num_runs=5)
         )
 
+    @unittest.skip("T187799178: Debugging Numerical Issues with Calibration")
     def test_qs8_mv3(self):
         ops_after_quantization = self.all_operators - {
             "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
         }
         ops_after_lowering = self.all_operators
 
+        (
+            Tester(self.mv3, self.model_inputs, dynamic_shapes=self.dynamic_shapes)
+            .quantize()
+            .export()
+            .to_edge()
+            .check(list(ops_after_quantization))
+            .partition()
+            .check(["torch.ops.higher_order.executorch_call_delegate"])
+            .check_not(list(ops_after_lowering))
+            .to_executorch()
+            .serialize()
+            .run_method_and_compare_outputs(num_runs=5)
+        )
+
+    # TODO: Delete and only used calibrated test after T187799178
+    def test_qs8_mv3_no_calibration(self):
+        ops_after_quantization = self.all_operators - {
+            "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default",
+        }
+        ops_after_lowering = self.all_operators
+
         (
             Tester(self.mv3, self.model_inputs, dynamic_shapes=self.dynamic_shapes)
             .quantize(Quantize(calibrate=False))

diff --git a/backends/xnnpack/test/models/resnet.py b/backends/xnnpack/test/models/resnet.py
@@ -9,12 +9,11 @@
 import torch
 import torchvision
 
-from executorch.backends.xnnpack.test.tester import Tester
-from executorch.backends.xnnpack.test.tester.tester import Quantize
+from executorch.backends.xnnpack.test.tester import Quantize, Tester
 
 
 class TestResNet18(unittest.TestCase):
-    inputs = (torch.ones(1, 3, 224, 224),)
+    inputs = (torch.randn(1, 3, 224, 224),)
     dynamic_shapes = (
         {
             2: torch.export.Dim("height", min=224, max=455),
@@ -57,7 +56,13 @@ def _test_exported_resnet(self, tester):
     def test_fp32_resnet18(self):
         self._test_exported_resnet(Tester(torchvision.models.resnet18(), self.inputs))
 
+    @unittest.skip("T187799178: Debugging Numerical Issues with Calibration")
     def test_qs8_resnet18(self):
+        quantized_tester = Tester(torchvision.models.resnet18(), self.inputs).quantize()
+        self._test_exported_resnet(quantized_tester)
+
+    # TODO: Delete and only used calibrated test after T187799178
+    def test_qs8_resnet18_no_calibration(self):
         quantized_tester = Tester(torchvision.models.resnet18(), self.inputs).quantize(
             Quantize(calibrate=False)
         )
@@ -68,7 +73,14 @@ def test_fp32_resnet18_dynamic(self):
             Tester(self.DynamicResNet(), self.inputs, self.dynamic_shapes)
         )
 
+    @unittest.skip("T187799178: Debugging Numerical Issues with Calibration")
     def test_qs8_resnet18_dynamic(self):
+        self._test_exported_resnet(
+            Tester(self.DynamicResNet(), self.inputs, self.dynamic_shapes).quantize()
+        )
+
+    # TODO: Delete and only used calibrated test after T187799178
+    def test_qs8_resnet18_dynamic_no_calibration(self):
         self._test_exported_resnet(
             Tester(self.DynamicResNet(), self.inputs, self.dynamic_shapes).quantize(
                 Quantize(calibrate=False)

diff --git a/backends/xnnpack/test/models/torchvision_vit.py b/backends/xnnpack/test/models/torchvision_vit.py
@@ -14,7 +14,7 @@
 class TestViT(unittest.TestCase):
     vit = models.vision_transformer.vit_b_16(weights="IMAGENET1K_V1")
     vit = vit.eval()
-    model_inputs = (torch.ones(1, 3, 224, 224),)
+    model_inputs = (torch.randn(1, 3, 224, 224),)
     dynamic_shapes = (
         {
             2: torch.export.Dim("height", min=224, max=455),