From f362c108920dfa15ceeb8a9c5ad46abe31fba034 Mon Sep 17 00:00:00 2001 From: yayoimizuha Date: Tue, 17 Oct 2023 10:02:04 +0900 Subject: [PATCH] update --- inference_all.py | 17 +- test_script/retinaface_pure_impl.py | 301 ++++++++++++++++++++++++++++ test_script/to_onnx.py | 28 +++ test_script/trt_test.py | 24 ++- 4 files changed, 357 insertions(+), 13 deletions(-) create mode 100644 test_script/retinaface_pure_impl.py create mode 100644 test_script/to_onnx.py diff --git a/inference_all.py b/inference_all.py index 9d93c1f..a77df06 100644 --- a/inference_all.py +++ b/inference_all.py @@ -16,12 +16,12 @@ from pandas import DataFrame from seaborn import heatmap, color_palette, set_palette from matplotlib import pyplot from japanize_matplotlib import japanize -from torch_tensorrt import compile +from torch_tensorrt import compile, Input device = device('cuda' if is_available() else 'cpu') # device = 'cpu' print(f'device: {device}') -model_path: str = join(datadir(), 'artifact', 'facenet-tl_2023-10-15 07:08:44.537055', 'model.pth') +model_path: str = join(datadir(), 'artifact', 'facenet-tl_2023-10-15 14:46:51.187699', 'model.pth') print(f'model path: {model_path}') input_shape: int = 256 batch_size = 64 @@ -52,9 +52,16 @@ else: example_input = randn(size=[batch_size, 3, 256, 256]).float().cuda() traced_script_module = jit.trace(model, example_inputs=[example_input]) - trt_model = compile(module=traced_script_module, inputs=[example_input], - enabled_precisions={float32, float16}, - truncate_long_and_double=True) + trt_model = compile(module=traced_script_module, inputs=[ + Input( + min_shape=[1, 3, 256, 256], + opt_shape=[batch_size, 3, 256, 256], + max_shape=[batch_size, 3, 256, 256] + ) + ], + enabled_precisions={float32}, + truncate_long_and_double=True, + allow_shape_tensors=True) jit.save(trt_model, join(datadir(), 'infer_all_torch_trt.ts')) # heatmap_df = DataFrame(index=image_class, columns=image_folder.classes).fillna(0) diff --git a/test_script/retinaface_pure_impl.py b/test_script/retinaface_pure_impl.py new file mode 100644 index 0000000..71b533d --- /dev/null +++ b/test_script/retinaface_pure_impl.py @@ -0,0 +1,301 @@ +from itertools import product +from math import ceil + +import torch +from PIL import Image +from numpy import array +from retinaface.pre_trained_models import get_model +from retinaface.predict_single import Model +from retinaface.network import RetinaFace +from torch import jit, randn, no_grad, Tensor, int64, tensor, onnx +import albumentations as A +from torchinfo import summary +import numpy as np +from typing import Dict, List, Optional, Tuple, Union +import cv2 +from torch.nn import functional as F +from torchvision.extension import _assert_has_ops +from torchvision.utils import _log_api_usage_once + +# model: Model = get_model(model_name='resnet50_2020-07-20', max_size=512, device='cuda') +# model.eval() + + +image = Image.open( + fp="/home/tomokazu/PycharmProjects/helloproject-ai/data/blog_images" + "/稲場愛香/稲場愛香=juicejuice-official=12737097989-2.jpg").convert(mode="RGB") +image_arr = array(image) +max_size = 512 + +example_input = randn(size=[1, 3, 256, 256]).float().cuda() + +retina_model = RetinaFace( + name="Resnet50", + pretrained=False, + return_layers={"layer2": 1, "layer3": 2, "layer4": 3}, + in_channels=256, + out_channels=256, +).cuda() + + +# onnx.export( +# model=retina_model, args=example_input, export_params=True, verbose=False, input_names=["input"], +# output_names=["bbox", "confidence", "landmark"], +# dynamic_axes={"input": { +# 0: "batch_size", +# 2: "height", +# 3: "width" +# }, "bbox": {1: "bbox"}, "confidence": {1: "confidence"}, "landmark": {1: "landmark"}}, opset_version=16, +# f="retinaface.onnx" +# ) + +def pad_to_size( + target_size: Tuple[int, int], + image: np.array, + bboxes: Optional[np.ndarray] = None, + keypoints: Optional[np.ndarray] = None, +) -> Dict[str, Union[np.ndarray, Tuple[int, int, int, int]]]: + target_height, target_width = target_size + + image_height, image_width = image.shape[:2] + + if target_width < image_width: + raise ValueError(f"Target width should bigger than image_width" f"We got {target_width} {image_width}") + + if target_height < image_height: + raise ValueError(f"Target height should bigger than image_height" f"We got {target_height} {image_height}") + + if image_height == target_height: + y_min_pad = 0 + y_max_pad = 0 + else: + y_pad = target_height - image_height + y_min_pad = y_pad // 2 + y_max_pad = y_pad - y_min_pad + + if image_width == target_width: + x_min_pad = 0 + x_max_pad = 0 + else: + x_pad = target_width - image_width + x_min_pad = x_pad // 2 + x_max_pad = x_pad - x_min_pad + + result = { + "pads": (x_min_pad, y_min_pad, x_max_pad, y_max_pad), + "image": cv2.copyMakeBorder(image, y_min_pad, y_max_pad, x_min_pad, x_max_pad, cv2.BORDER_CONSTANT), + } + + if bboxes is not None: + bboxes[:, 0] += x_min_pad + bboxes[:, 1] += y_min_pad + bboxes[:, 2] += x_min_pad + bboxes[:, 3] += y_min_pad + + result["bboxes"] = bboxes + + if keypoints is not None: + keypoints[:, 0] += x_min_pad + keypoints[:, 1] += y_min_pad + + result["keypoints"] = keypoints + + return result + + +def tensor_from_rgb_image(image: np.ndarray) -> torch.Tensor: + image = np.transpose(image, (2, 0, 1)) + return torch.from_numpy(image) + + +def priorbox(min_sizes, steps, clip, image_size): + feature_maps = [[ceil(image_size[0] / step), ceil(image_size[1] / step)] for step in steps] + + anchors = [] + for k, f in enumerate(feature_maps): + t_min_sizes = min_sizes[k] + for i, j in product(range(f[0]), range(f[1])): + for min_size in t_min_sizes: + s_kx = min_size / image_size[1] + s_ky = min_size / image_size[0] + dense_cx = [x * steps[k] / image_size[1] for x in [j + 0.5]] + dense_cy = [y * steps[k] / image_size[0] for y in [i + 0.5]] + for cy, cx in product(dense_cy, dense_cx): + anchors += [cx, cy, s_kx, s_ky] + + # back to torch land + output = torch.Tensor(anchors).view(-1, 4) + if clip: + output.clamp_(max=1, min=0) + return output + + +def decode( + loc: torch.Tensor, priors: torch.Tensor, variances: Union[List[float], Tuple[float, float]] +) -> torch.Tensor: + boxes = torch.cat( + ( + priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:], + priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1]), + ), + 1, + ) + boxes[:, :2] -= boxes[:, 2:] / 2 + boxes[:, 2:] += boxes[:, :2] + return boxes + + +def decode_landm( + pre: torch.Tensor, priors: torch.Tensor, variances: Union[List[float], Tuple[float, float]] +) -> torch.Tensor: + return torch.cat( + ( + priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:], + priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:], + priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:], + priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:], + priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:], + ), + dim=1, + ) + + +def nms(boxes: Tensor, scores: Tensor, iou_threshold: float) -> Tensor: + if not torch.jit.is_scripting() and not torch.jit.is_tracing(): + _log_api_usage_once(nms) + _assert_has_ops() + return torch.ops.torchvision.nms(boxes, scores, iou_threshold) + + +def unpad_from_size( + pads: Tuple[int, int, int, int], + image: Optional[np.array] = None, + bboxes: Optional[np.ndarray] = None, + keypoints: Optional[np.ndarray] = None, +) -> Dict[str, np.ndarray]: + x_min_pad, y_min_pad, x_max_pad, y_max_pad = pads + + result = {} + + if image is not None: + height, width = image.shape[:2] + result["image"] = image[y_min_pad: height - y_max_pad, x_min_pad: width - x_max_pad] + + if bboxes is not None: + bboxes[:, 0] -= x_min_pad + bboxes[:, 1] -= y_min_pad + bboxes[:, 2] -= x_min_pad + bboxes[:, 3] -= y_min_pad + + result["bboxes"] = bboxes + + if keypoints is not None: + keypoints[:, 0] -= x_min_pad + keypoints[:, 1] -= y_min_pad + + result["keypoints"] = keypoints + + return result + + +device = "cuda" +transform = A.Compose([A.LongestMaxSize(max_size=max_size, p=1), A.Normalize(p=1)]) +variance = [0.1, 0.2] +nms_threshold = .4 +confidence_threshold = .7 +_priorbox = priorbox( + min_sizes=[[16, 32], [64, 128], [256, 512]], + steps=[8, 16, 32], + clip=False, + image_size=(max_size, max_size), +).to(device) +original_height, original_width = image_arr.shape[:2] + +scale_landmarks = torch.from_numpy(np.tile([max_size, max_size], 5)).to(device) +scale_bboxes = torch.from_numpy(np.tile([max_size, max_size], 2)).to(device) + +transformed_image = transform(image=image_arr)["image"] + +paded = pad_to_size(target_size=(max_size, max_size), image=transformed_image) + +pads = paded["pads"] + +torched_image = tensor_from_rgb_image(paded["image"]).to(device) + + +# loc, conf, land = retina_model(torched_image.unsqueeze(0)) + + +def infer(loc, conf, land): + conf = F.softmax(conf, dim=-1) + + annotations = [] + + boxes = decode(loc.data[0], _priorbox, variance) + + boxes *= scale_bboxes + scores = conf[0][:, 1] + + landmarks = decode_landm(land.data[0], _priorbox, variance) + landmarks *= scale_landmarks + + # ignore low scores + valid_index = torch.where(scores > confidence_threshold)[0] + boxes = boxes[valid_index] + landmarks = landmarks[valid_index] + scores = scores[valid_index] + + # Sort from high to low + order = scores.argsort(descending=True) + boxes = boxes[order] + landmarks = landmarks[order] + scores = scores[order] + + # do NMS + keep = nms(boxes, scores, nms_threshold) + boxes = boxes[keep, :].int() + + if boxes.shape[0] == 0: + return [{"bbox": [], "score": -1, "landmarks": []}] + + landmarks = landmarks[keep] + + scores = scores[keep].cpu().detach().numpy().astype(np.float64) + boxes = boxes.cpu().numpy() + landmarks = landmarks.cpu().numpy() + landmarks = landmarks.reshape([-1, 2]) + + unpadded = unpad_from_size(pads, bboxes=boxes, keypoints=landmarks) + + resize_coeff = max(original_height, original_width) / max_size + + boxes = (unpadded["bboxes"] * resize_coeff).astype(int) + landmarks = (unpadded["keypoints"].reshape(-1, 10) * resize_coeff).astype(int) + + for box_id, bbox in enumerate(boxes): + x_min, y_min, x_max, y_max = bbox + + x_min = np.clip(x_min, 0, original_width - 1) + x_max = np.clip(x_max, x_min + 1, original_width - 1) + + if x_min >= x_max: + continue + + y_min = np.clip(y_min, 0, original_height - 1) + y_max = np.clip(y_max, y_min + 1, original_height - 1) + + if y_min >= y_max: + continue + + annotations += [ + { + "bbox": bbox.tolist(), + "score": scores[box_id], + "landmarks": landmarks[box_id].reshape(-1, 2).tolist(), + } + ] + return annotations + + +ans = infer(*retina_model(torched_image.unsqueeze(0))) +print(ans) diff --git a/test_script/to_onnx.py b/test_script/to_onnx.py new file mode 100644 index 0000000..9288251 --- /dev/null +++ b/test_script/to_onnx.py @@ -0,0 +1,28 @@ +from torch import load, randn, float, half, jit, ones, no_grad +import torch_tensorrt +from torch.nn import Module +from torch.onnx import export + +model: Module = load( + f='/home/tomokazu/PycharmProjects/helloproject-ai/data/artifact/facenet-tl_2023-10-15 14:46:51.187699/checkpoints/80.pth') +model.cuda() +model.eval() +model = model.half() +with no_grad(): + example_input = randn(1, 3, 224, 224).cuda().half() + + export( + model=model, + args=example_input, + f="onnx_test.onnx", + input_names=["input"], + output_names=["output"], + dynamic_axes={ + "input": { + 0: "batch_size", + 2: "height", + 3: "width" + } + }, + verbose=False + ) diff --git a/test_script/trt_test.py b/test_script/trt_test.py index ca5c126..8cfacd2 100644 --- a/test_script/trt_test.py +++ b/test_script/trt_test.py @@ -1,17 +1,25 @@ -from torch import load, randn, float, half, jit +from torch import load, randn, float, half, jit, ones, no_grad import torch_tensorrt from torch.nn import Module +from torch_tensorrt import Input model: Module = load( f='/home/tomokazu/PycharmProjects/helloproject-ai/data/artifact/facenet-tl_2023-05-28 23:05:09.874085/model.pth') model.cuda() model.eval() +with no_grad(): + example_input = ones(1, 3, 224, 224).cuda() -example_input = randn(size=[1, 3, 224, 224]).float().cuda() + traced_script_module = jit.trace(model, example_inputs=[example_input]) + tensorrt_module = torch_tensorrt.compile(module=traced_script_module, inputs=[ + Input( + min_shape=[1, 3, 224, 224], + opt_shape=[32, 3, 224, 224], + max_shape=[32, 3, 224, 224] + ) + ], + enabled_precisions={float}, + truncate_long_and_double=True, + allow_shape_tensors=True) -traced_script_module = jit.trace(model, example_inputs=[example_input]) -tensorrt_module = torch_tensorrt.compile(module=traced_script_module, inputs=[example_input], - enabled_precisions={float, half}, - truncate_long_and_double=True) - -jit.save(tensorrt_module, "trt_test.ts") + jit.save(tensorrt_module, "trt_test.ts")