From f362c108920dfa15ceeb8a9c5ad46abe31fba034 Mon Sep 17 00:00:00 2001
From: yayoimizuha <poko.peko.tomo@icloud.com>
Date: Tue, 17 Oct 2023 10:02:04 +0900
Subject: [PATCH] update

---
 inference_all.py                    |  17 +-
 test_script/retinaface_pure_impl.py | 301 ++++++++++++++++++++++++++++
 test_script/to_onnx.py              |  28 +++
 test_script/trt_test.py             |  24 ++-
 4 files changed, 357 insertions(+), 13 deletions(-)
 create mode 100644 test_script/retinaface_pure_impl.py
 create mode 100644 test_script/to_onnx.py

diff --git a/inference_all.py b/inference_all.py
index 9d93c1f..a77df06 100644
--- a/inference_all.py
+++ b/inference_all.py
@@ -16,12 +16,12 @@ from pandas import DataFrame
 from seaborn import heatmap, color_palette, set_palette
 from matplotlib import pyplot
 from japanize_matplotlib import japanize
-from torch_tensorrt import compile
+from torch_tensorrt import compile, Input
 
 device = device('cuda' if is_available() else 'cpu')
 # device = 'cpu'
 print(f'device: {device}')
-model_path: str = join(datadir(), 'artifact', 'facenet-tl_2023-10-15 07:08:44.537055', 'model.pth')
+model_path: str = join(datadir(), 'artifact', 'facenet-tl_2023-10-15 14:46:51.187699', 'model.pth')
 print(f'model path: {model_path}')
 input_shape: int = 256
 batch_size = 64
@@ -52,9 +52,16 @@ else:
 
     example_input = randn(size=[batch_size, 3, 256, 256]).float().cuda()
     traced_script_module = jit.trace(model, example_inputs=[example_input])
-    trt_model = compile(module=traced_script_module, inputs=[example_input],
-                        enabled_precisions={float32, float16},
-                        truncate_long_and_double=True)
+    trt_model = compile(module=traced_script_module, inputs=[
+        Input(
+            min_shape=[1, 3, 256, 256],
+            opt_shape=[batch_size, 3, 256, 256],
+            max_shape=[batch_size, 3, 256, 256]
+        )
+    ],
+                        enabled_precisions={float32},
+                        truncate_long_and_double=True,
+                        allow_shape_tensors=True)
     jit.save(trt_model, join(datadir(), 'infer_all_torch_trt.ts'))
 
 # heatmap_df = DataFrame(index=image_class, columns=image_folder.classes).fillna(0)
diff --git a/test_script/retinaface_pure_impl.py b/test_script/retinaface_pure_impl.py
new file mode 100644
index 0000000..71b533d
--- /dev/null
+++ b/test_script/retinaface_pure_impl.py
@@ -0,0 +1,301 @@
+from itertools import product
+from math import ceil
+
+import torch
+from PIL import Image
+from numpy import array
+from retinaface.pre_trained_models import get_model
+from retinaface.predict_single import Model
+from retinaface.network import RetinaFace
+from torch import jit, randn, no_grad, Tensor, int64, tensor, onnx
+import albumentations as A
+from torchinfo import summary
+import numpy as np
+from typing import Dict, List, Optional, Tuple, Union
+import cv2
+from torch.nn import functional as F
+from torchvision.extension import _assert_has_ops
+from torchvision.utils import _log_api_usage_once
+
+# model: Model = get_model(model_name='resnet50_2020-07-20', max_size=512, device='cuda')
+# model.eval()
+
+
+image = Image.open(
+    fp="/home/tomokazu/PycharmProjects/helloproject-ai/data/blog_images"
+       "/稲場愛香/稲場愛香=juicejuice-official=12737097989-2.jpg").convert(mode="RGB")
+image_arr = array(image)
+max_size = 512
+
+example_input = randn(size=[1, 3, 256, 256]).float().cuda()
+
+retina_model = RetinaFace(
+    name="Resnet50",
+    pretrained=False,
+    return_layers={"layer2": 1, "layer3": 2, "layer4": 3},
+    in_channels=256,
+    out_channels=256,
+).cuda()
+
+
+# onnx.export(
+#     model=retina_model, args=example_input, export_params=True, verbose=False, input_names=["input"],
+#     output_names=["bbox", "confidence", "landmark"],
+#     dynamic_axes={"input": {
+#         0: "batch_size",
+#         2: "height",
+#         3: "width"
+#     }, "bbox": {1: "bbox"}, "confidence": {1: "confidence"}, "landmark": {1: "landmark"}}, opset_version=16,
+#     f="retinaface.onnx"
+# )
+
+def pad_to_size(
+        target_size: Tuple[int, int],
+        image: np.array,
+        bboxes: Optional[np.ndarray] = None,
+        keypoints: Optional[np.ndarray] = None,
+) -> Dict[str, Union[np.ndarray, Tuple[int, int, int, int]]]:
+    target_height, target_width = target_size
+
+    image_height, image_width = image.shape[:2]
+
+    if target_width < image_width:
+        raise ValueError(f"Target width should bigger than image_width" f"We got {target_width} {image_width}")
+
+    if target_height < image_height:
+        raise ValueError(f"Target height should bigger than image_height" f"We got {target_height} {image_height}")
+
+    if image_height == target_height:
+        y_min_pad = 0
+        y_max_pad = 0
+    else:
+        y_pad = target_height - image_height
+        y_min_pad = y_pad // 2
+        y_max_pad = y_pad - y_min_pad
+
+    if image_width == target_width:
+        x_min_pad = 0
+        x_max_pad = 0
+    else:
+        x_pad = target_width - image_width
+        x_min_pad = x_pad // 2
+        x_max_pad = x_pad - x_min_pad
+
+    result = {
+        "pads": (x_min_pad, y_min_pad, x_max_pad, y_max_pad),
+        "image": cv2.copyMakeBorder(image, y_min_pad, y_max_pad, x_min_pad, x_max_pad, cv2.BORDER_CONSTANT),
+    }
+
+    if bboxes is not None:
+        bboxes[:, 0] += x_min_pad
+        bboxes[:, 1] += y_min_pad
+        bboxes[:, 2] += x_min_pad
+        bboxes[:, 3] += y_min_pad
+
+        result["bboxes"] = bboxes
+
+    if keypoints is not None:
+        keypoints[:, 0] += x_min_pad
+        keypoints[:, 1] += y_min_pad
+
+        result["keypoints"] = keypoints
+
+    return result
+
+
+def tensor_from_rgb_image(image: np.ndarray) -> torch.Tensor:
+    image = np.transpose(image, (2, 0, 1))
+    return torch.from_numpy(image)
+
+
+def priorbox(min_sizes, steps, clip, image_size):
+    feature_maps = [[ceil(image_size[0] / step), ceil(image_size[1] / step)] for step in steps]
+
+    anchors = []
+    for k, f in enumerate(feature_maps):
+        t_min_sizes = min_sizes[k]
+        for i, j in product(range(f[0]), range(f[1])):
+            for min_size in t_min_sizes:
+                s_kx = min_size / image_size[1]
+                s_ky = min_size / image_size[0]
+                dense_cx = [x * steps[k] / image_size[1] for x in [j + 0.5]]
+                dense_cy = [y * steps[k] / image_size[0] for y in [i + 0.5]]
+                for cy, cx in product(dense_cy, dense_cx):
+                    anchors += [cx, cy, s_kx, s_ky]
+
+    # back to torch land
+    output = torch.Tensor(anchors).view(-1, 4)
+    if clip:
+        output.clamp_(max=1, min=0)
+    return output
+
+
+def decode(
+        loc: torch.Tensor, priors: torch.Tensor, variances: Union[List[float], Tuple[float, float]]
+) -> torch.Tensor:
+    boxes = torch.cat(
+        (
+            priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
+            priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1]),
+        ),
+        1,
+    )
+    boxes[:, :2] -= boxes[:, 2:] / 2
+    boxes[:, 2:] += boxes[:, :2]
+    return boxes
+
+
+def decode_landm(
+        pre: torch.Tensor, priors: torch.Tensor, variances: Union[List[float], Tuple[float, float]]
+) -> torch.Tensor:
+    return torch.cat(
+        (
+            priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
+            priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
+            priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
+            priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
+            priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
+        ),
+        dim=1,
+    )
+
+
+def nms(boxes: Tensor, scores: Tensor, iou_threshold: float) -> Tensor:
+    if not torch.jit.is_scripting() and not torch.jit.is_tracing():
+        _log_api_usage_once(nms)
+    _assert_has_ops()
+    return torch.ops.torchvision.nms(boxes, scores, iou_threshold)
+
+
+def unpad_from_size(
+        pads: Tuple[int, int, int, int],
+        image: Optional[np.array] = None,
+        bboxes: Optional[np.ndarray] = None,
+        keypoints: Optional[np.ndarray] = None,
+) -> Dict[str, np.ndarray]:
+    x_min_pad, y_min_pad, x_max_pad, y_max_pad = pads
+
+    result = {}
+
+    if image is not None:
+        height, width = image.shape[:2]
+        result["image"] = image[y_min_pad: height - y_max_pad, x_min_pad: width - x_max_pad]
+
+    if bboxes is not None:
+        bboxes[:, 0] -= x_min_pad
+        bboxes[:, 1] -= y_min_pad
+        bboxes[:, 2] -= x_min_pad
+        bboxes[:, 3] -= y_min_pad
+
+        result["bboxes"] = bboxes
+
+    if keypoints is not None:
+        keypoints[:, 0] -= x_min_pad
+        keypoints[:, 1] -= y_min_pad
+
+        result["keypoints"] = keypoints
+
+    return result
+
+
+device = "cuda"
+transform = A.Compose([A.LongestMaxSize(max_size=max_size, p=1), A.Normalize(p=1)])
+variance = [0.1, 0.2]
+nms_threshold = .4
+confidence_threshold = .7
+_priorbox = priorbox(
+    min_sizes=[[16, 32], [64, 128], [256, 512]],
+    steps=[8, 16, 32],
+    clip=False,
+    image_size=(max_size, max_size),
+).to(device)
+original_height, original_width = image_arr.shape[:2]
+
+scale_landmarks = torch.from_numpy(np.tile([max_size, max_size], 5)).to(device)
+scale_bboxes = torch.from_numpy(np.tile([max_size, max_size], 2)).to(device)
+
+transformed_image = transform(image=image_arr)["image"]
+
+paded = pad_to_size(target_size=(max_size, max_size), image=transformed_image)
+
+pads = paded["pads"]
+
+torched_image = tensor_from_rgb_image(paded["image"]).to(device)
+
+
+# loc, conf, land = retina_model(torched_image.unsqueeze(0))
+
+
+def infer(loc, conf, land):
+    conf = F.softmax(conf, dim=-1)
+
+    annotations = []
+
+    boxes = decode(loc.data[0], _priorbox, variance)
+
+    boxes *= scale_bboxes
+    scores = conf[0][:, 1]
+
+    landmarks = decode_landm(land.data[0], _priorbox, variance)
+    landmarks *= scale_landmarks
+
+    # ignore low scores
+    valid_index = torch.where(scores > confidence_threshold)[0]
+    boxes = boxes[valid_index]
+    landmarks = landmarks[valid_index]
+    scores = scores[valid_index]
+
+    # Sort from high to low
+    order = scores.argsort(descending=True)
+    boxes = boxes[order]
+    landmarks = landmarks[order]
+    scores = scores[order]
+
+    # do NMS
+    keep = nms(boxes, scores, nms_threshold)
+    boxes = boxes[keep, :].int()
+
+    if boxes.shape[0] == 0:
+        return [{"bbox": [], "score": -1, "landmarks": []}]
+
+    landmarks = landmarks[keep]
+
+    scores = scores[keep].cpu().detach().numpy().astype(np.float64)
+    boxes = boxes.cpu().numpy()
+    landmarks = landmarks.cpu().numpy()
+    landmarks = landmarks.reshape([-1, 2])
+
+    unpadded = unpad_from_size(pads, bboxes=boxes, keypoints=landmarks)
+
+    resize_coeff = max(original_height, original_width) / max_size
+
+    boxes = (unpadded["bboxes"] * resize_coeff).astype(int)
+    landmarks = (unpadded["keypoints"].reshape(-1, 10) * resize_coeff).astype(int)
+
+    for box_id, bbox in enumerate(boxes):
+        x_min, y_min, x_max, y_max = bbox
+
+        x_min = np.clip(x_min, 0, original_width - 1)
+        x_max = np.clip(x_max, x_min + 1, original_width - 1)
+
+        if x_min >= x_max:
+            continue
+
+        y_min = np.clip(y_min, 0, original_height - 1)
+        y_max = np.clip(y_max, y_min + 1, original_height - 1)
+
+        if y_min >= y_max:
+            continue
+
+        annotations += [
+            {
+                "bbox": bbox.tolist(),
+                "score": scores[box_id],
+                "landmarks": landmarks[box_id].reshape(-1, 2).tolist(),
+            }
+        ]
+    return annotations
+
+
+ans = infer(*retina_model(torched_image.unsqueeze(0)))
+print(ans)
diff --git a/test_script/to_onnx.py b/test_script/to_onnx.py
new file mode 100644
index 0000000..9288251
--- /dev/null
+++ b/test_script/to_onnx.py
@@ -0,0 +1,28 @@
+from torch import load, randn, float, half, jit, ones, no_grad
+import torch_tensorrt
+from torch.nn import Module
+from torch.onnx import export
+
+model: Module = load(
+    f='/home/tomokazu/PycharmProjects/helloproject-ai/data/artifact/facenet-tl_2023-10-15 14:46:51.187699/checkpoints/80.pth')
+model.cuda()
+model.eval()
+model = model.half()
+with no_grad():
+    example_input = randn(1, 3, 224, 224).cuda().half()
+
+    export(
+        model=model,
+        args=example_input,
+        f="onnx_test.onnx",
+        input_names=["input"],
+        output_names=["output"],
+        dynamic_axes={
+            "input": {
+                0: "batch_size",
+                2: "height",
+                3: "width"
+            }
+        },
+        verbose=False
+    )
diff --git a/test_script/trt_test.py b/test_script/trt_test.py
index ca5c126..8cfacd2 100644
--- a/test_script/trt_test.py
+++ b/test_script/trt_test.py
@@ -1,17 +1,25 @@
-from torch import load, randn, float, half, jit
+from torch import load, randn, float, half, jit, ones, no_grad
 import torch_tensorrt
 from torch.nn import Module
+from torch_tensorrt import Input
 
 model: Module = load(
     f='/home/tomokazu/PycharmProjects/helloproject-ai/data/artifact/facenet-tl_2023-05-28 23:05:09.874085/model.pth')
 model.cuda()
 model.eval()
+with no_grad():
+    example_input = ones(1, 3, 224, 224).cuda()
 
-example_input = randn(size=[1, 3, 224, 224]).float().cuda()
+    traced_script_module = jit.trace(model, example_inputs=[example_input])
+    tensorrt_module = torch_tensorrt.compile(module=traced_script_module, inputs=[
+        Input(
+            min_shape=[1, 3, 224, 224],
+            opt_shape=[32, 3, 224, 224],
+            max_shape=[32, 3, 224, 224]
+        )
+    ],
+                                             enabled_precisions={float},
+                                             truncate_long_and_double=True,
+                                             allow_shape_tensors=True)
 
-traced_script_module = jit.trace(model, example_inputs=[example_input])
-tensorrt_module = torch_tensorrt.compile(module=traced_script_module, inputs=[example_input],
-                                         enabled_precisions={float, half},
-                                         truncate_long_and_double=True)
-
-jit.save(tensorrt_module, "trt_test.ts")
+    jit.save(tensorrt_module, "trt_test.ts")