update
continuous-integration/drone/push Build was killed
Details
continuous-integration/drone/push Build was killed
Details
This commit is contained in:
parent
1e7cf01e0d
commit
f362c10892
|
|
@ -16,12 +16,12 @@ from pandas import DataFrame
|
||||||
from seaborn import heatmap, color_palette, set_palette
|
from seaborn import heatmap, color_palette, set_palette
|
||||||
from matplotlib import pyplot
|
from matplotlib import pyplot
|
||||||
from japanize_matplotlib import japanize
|
from japanize_matplotlib import japanize
|
||||||
from torch_tensorrt import compile
|
from torch_tensorrt import compile, Input
|
||||||
|
|
||||||
device = device('cuda' if is_available() else 'cpu')
|
device = device('cuda' if is_available() else 'cpu')
|
||||||
# device = 'cpu'
|
# device = 'cpu'
|
||||||
print(f'device: {device}')
|
print(f'device: {device}')
|
||||||
model_path: str = join(datadir(), 'artifact', 'facenet-tl_2023-10-15 07:08:44.537055', 'model.pth')
|
model_path: str = join(datadir(), 'artifact', 'facenet-tl_2023-10-15 14:46:51.187699', 'model.pth')
|
||||||
print(f'model path: {model_path}')
|
print(f'model path: {model_path}')
|
||||||
input_shape: int = 256
|
input_shape: int = 256
|
||||||
batch_size = 64
|
batch_size = 64
|
||||||
|
|
@ -52,9 +52,16 @@ else:
|
||||||
|
|
||||||
example_input = randn(size=[batch_size, 3, 256, 256]).float().cuda()
|
example_input = randn(size=[batch_size, 3, 256, 256]).float().cuda()
|
||||||
traced_script_module = jit.trace(model, example_inputs=[example_input])
|
traced_script_module = jit.trace(model, example_inputs=[example_input])
|
||||||
trt_model = compile(module=traced_script_module, inputs=[example_input],
|
trt_model = compile(module=traced_script_module, inputs=[
|
||||||
enabled_precisions={float32, float16},
|
Input(
|
||||||
truncate_long_and_double=True)
|
min_shape=[1, 3, 256, 256],
|
||||||
|
opt_shape=[batch_size, 3, 256, 256],
|
||||||
|
max_shape=[batch_size, 3, 256, 256]
|
||||||
|
)
|
||||||
|
],
|
||||||
|
enabled_precisions={float32},
|
||||||
|
truncate_long_and_double=True,
|
||||||
|
allow_shape_tensors=True)
|
||||||
jit.save(trt_model, join(datadir(), 'infer_all_torch_trt.ts'))
|
jit.save(trt_model, join(datadir(), 'infer_all_torch_trt.ts'))
|
||||||
|
|
||||||
# heatmap_df = DataFrame(index=image_class, columns=image_folder.classes).fillna(0)
|
# heatmap_df = DataFrame(index=image_class, columns=image_folder.classes).fillna(0)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,301 @@
|
||||||
|
from itertools import product
|
||||||
|
from math import ceil
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from PIL import Image
|
||||||
|
from numpy import array
|
||||||
|
from retinaface.pre_trained_models import get_model
|
||||||
|
from retinaface.predict_single import Model
|
||||||
|
from retinaface.network import RetinaFace
|
||||||
|
from torch import jit, randn, no_grad, Tensor, int64, tensor, onnx
|
||||||
|
import albumentations as A
|
||||||
|
from torchinfo import summary
|
||||||
|
import numpy as np
|
||||||
|
from typing import Dict, List, Optional, Tuple, Union
|
||||||
|
import cv2
|
||||||
|
from torch.nn import functional as F
|
||||||
|
from torchvision.extension import _assert_has_ops
|
||||||
|
from torchvision.utils import _log_api_usage_once
|
||||||
|
|
||||||
|
# model: Model = get_model(model_name='resnet50_2020-07-20', max_size=512, device='cuda')
|
||||||
|
# model.eval()
|
||||||
|
|
||||||
|
|
||||||
|
image = Image.open(
|
||||||
|
fp="/home/tomokazu/PycharmProjects/helloproject-ai/data/blog_images"
|
||||||
|
"/稲場愛香/稲場愛香=juicejuice-official=12737097989-2.jpg").convert(mode="RGB")
|
||||||
|
image_arr = array(image)
|
||||||
|
max_size = 512
|
||||||
|
|
||||||
|
example_input = randn(size=[1, 3, 256, 256]).float().cuda()
|
||||||
|
|
||||||
|
retina_model = RetinaFace(
|
||||||
|
name="Resnet50",
|
||||||
|
pretrained=False,
|
||||||
|
return_layers={"layer2": 1, "layer3": 2, "layer4": 3},
|
||||||
|
in_channels=256,
|
||||||
|
out_channels=256,
|
||||||
|
).cuda()
|
||||||
|
|
||||||
|
|
||||||
|
# onnx.export(
|
||||||
|
# model=retina_model, args=example_input, export_params=True, verbose=False, input_names=["input"],
|
||||||
|
# output_names=["bbox", "confidence", "landmark"],
|
||||||
|
# dynamic_axes={"input": {
|
||||||
|
# 0: "batch_size",
|
||||||
|
# 2: "height",
|
||||||
|
# 3: "width"
|
||||||
|
# }, "bbox": {1: "bbox"}, "confidence": {1: "confidence"}, "landmark": {1: "landmark"}}, opset_version=16,
|
||||||
|
# f="retinaface.onnx"
|
||||||
|
# )
|
||||||
|
|
||||||
|
def pad_to_size(
|
||||||
|
target_size: Tuple[int, int],
|
||||||
|
image: np.array,
|
||||||
|
bboxes: Optional[np.ndarray] = None,
|
||||||
|
keypoints: Optional[np.ndarray] = None,
|
||||||
|
) -> Dict[str, Union[np.ndarray, Tuple[int, int, int, int]]]:
|
||||||
|
target_height, target_width = target_size
|
||||||
|
|
||||||
|
image_height, image_width = image.shape[:2]
|
||||||
|
|
||||||
|
if target_width < image_width:
|
||||||
|
raise ValueError(f"Target width should bigger than image_width" f"We got {target_width} {image_width}")
|
||||||
|
|
||||||
|
if target_height < image_height:
|
||||||
|
raise ValueError(f"Target height should bigger than image_height" f"We got {target_height} {image_height}")
|
||||||
|
|
||||||
|
if image_height == target_height:
|
||||||
|
y_min_pad = 0
|
||||||
|
y_max_pad = 0
|
||||||
|
else:
|
||||||
|
y_pad = target_height - image_height
|
||||||
|
y_min_pad = y_pad // 2
|
||||||
|
y_max_pad = y_pad - y_min_pad
|
||||||
|
|
||||||
|
if image_width == target_width:
|
||||||
|
x_min_pad = 0
|
||||||
|
x_max_pad = 0
|
||||||
|
else:
|
||||||
|
x_pad = target_width - image_width
|
||||||
|
x_min_pad = x_pad // 2
|
||||||
|
x_max_pad = x_pad - x_min_pad
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"pads": (x_min_pad, y_min_pad, x_max_pad, y_max_pad),
|
||||||
|
"image": cv2.copyMakeBorder(image, y_min_pad, y_max_pad, x_min_pad, x_max_pad, cv2.BORDER_CONSTANT),
|
||||||
|
}
|
||||||
|
|
||||||
|
if bboxes is not None:
|
||||||
|
bboxes[:, 0] += x_min_pad
|
||||||
|
bboxes[:, 1] += y_min_pad
|
||||||
|
bboxes[:, 2] += x_min_pad
|
||||||
|
bboxes[:, 3] += y_min_pad
|
||||||
|
|
||||||
|
result["bboxes"] = bboxes
|
||||||
|
|
||||||
|
if keypoints is not None:
|
||||||
|
keypoints[:, 0] += x_min_pad
|
||||||
|
keypoints[:, 1] += y_min_pad
|
||||||
|
|
||||||
|
result["keypoints"] = keypoints
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def tensor_from_rgb_image(image: np.ndarray) -> torch.Tensor:
|
||||||
|
image = np.transpose(image, (2, 0, 1))
|
||||||
|
return torch.from_numpy(image)
|
||||||
|
|
||||||
|
|
||||||
|
def priorbox(min_sizes, steps, clip, image_size):
|
||||||
|
feature_maps = [[ceil(image_size[0] / step), ceil(image_size[1] / step)] for step in steps]
|
||||||
|
|
||||||
|
anchors = []
|
||||||
|
for k, f in enumerate(feature_maps):
|
||||||
|
t_min_sizes = min_sizes[k]
|
||||||
|
for i, j in product(range(f[0]), range(f[1])):
|
||||||
|
for min_size in t_min_sizes:
|
||||||
|
s_kx = min_size / image_size[1]
|
||||||
|
s_ky = min_size / image_size[0]
|
||||||
|
dense_cx = [x * steps[k] / image_size[1] for x in [j + 0.5]]
|
||||||
|
dense_cy = [y * steps[k] / image_size[0] for y in [i + 0.5]]
|
||||||
|
for cy, cx in product(dense_cy, dense_cx):
|
||||||
|
anchors += [cx, cy, s_kx, s_ky]
|
||||||
|
|
||||||
|
# back to torch land
|
||||||
|
output = torch.Tensor(anchors).view(-1, 4)
|
||||||
|
if clip:
|
||||||
|
output.clamp_(max=1, min=0)
|
||||||
|
return output
|
||||||
|
|
||||||
|
|
||||||
|
def decode(
|
||||||
|
loc: torch.Tensor, priors: torch.Tensor, variances: Union[List[float], Tuple[float, float]]
|
||||||
|
) -> torch.Tensor:
|
||||||
|
boxes = torch.cat(
|
||||||
|
(
|
||||||
|
priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
|
||||||
|
priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1]),
|
||||||
|
),
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
boxes[:, :2] -= boxes[:, 2:] / 2
|
||||||
|
boxes[:, 2:] += boxes[:, :2]
|
||||||
|
return boxes
|
||||||
|
|
||||||
|
|
||||||
|
def decode_landm(
|
||||||
|
pre: torch.Tensor, priors: torch.Tensor, variances: Union[List[float], Tuple[float, float]]
|
||||||
|
) -> torch.Tensor:
|
||||||
|
return torch.cat(
|
||||||
|
(
|
||||||
|
priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
|
||||||
|
priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
|
||||||
|
priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
|
||||||
|
priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
|
||||||
|
priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
|
||||||
|
),
|
||||||
|
dim=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def nms(boxes: Tensor, scores: Tensor, iou_threshold: float) -> Tensor:
|
||||||
|
if not torch.jit.is_scripting() and not torch.jit.is_tracing():
|
||||||
|
_log_api_usage_once(nms)
|
||||||
|
_assert_has_ops()
|
||||||
|
return torch.ops.torchvision.nms(boxes, scores, iou_threshold)
|
||||||
|
|
||||||
|
|
||||||
|
def unpad_from_size(
|
||||||
|
pads: Tuple[int, int, int, int],
|
||||||
|
image: Optional[np.array] = None,
|
||||||
|
bboxes: Optional[np.ndarray] = None,
|
||||||
|
keypoints: Optional[np.ndarray] = None,
|
||||||
|
) -> Dict[str, np.ndarray]:
|
||||||
|
x_min_pad, y_min_pad, x_max_pad, y_max_pad = pads
|
||||||
|
|
||||||
|
result = {}
|
||||||
|
|
||||||
|
if image is not None:
|
||||||
|
height, width = image.shape[:2]
|
||||||
|
result["image"] = image[y_min_pad: height - y_max_pad, x_min_pad: width - x_max_pad]
|
||||||
|
|
||||||
|
if bboxes is not None:
|
||||||
|
bboxes[:, 0] -= x_min_pad
|
||||||
|
bboxes[:, 1] -= y_min_pad
|
||||||
|
bboxes[:, 2] -= x_min_pad
|
||||||
|
bboxes[:, 3] -= y_min_pad
|
||||||
|
|
||||||
|
result["bboxes"] = bboxes
|
||||||
|
|
||||||
|
if keypoints is not None:
|
||||||
|
keypoints[:, 0] -= x_min_pad
|
||||||
|
keypoints[:, 1] -= y_min_pad
|
||||||
|
|
||||||
|
result["keypoints"] = keypoints
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
device = "cuda"
|
||||||
|
transform = A.Compose([A.LongestMaxSize(max_size=max_size, p=1), A.Normalize(p=1)])
|
||||||
|
variance = [0.1, 0.2]
|
||||||
|
nms_threshold = .4
|
||||||
|
confidence_threshold = .7
|
||||||
|
_priorbox = priorbox(
|
||||||
|
min_sizes=[[16, 32], [64, 128], [256, 512]],
|
||||||
|
steps=[8, 16, 32],
|
||||||
|
clip=False,
|
||||||
|
image_size=(max_size, max_size),
|
||||||
|
).to(device)
|
||||||
|
original_height, original_width = image_arr.shape[:2]
|
||||||
|
|
||||||
|
scale_landmarks = torch.from_numpy(np.tile([max_size, max_size], 5)).to(device)
|
||||||
|
scale_bboxes = torch.from_numpy(np.tile([max_size, max_size], 2)).to(device)
|
||||||
|
|
||||||
|
transformed_image = transform(image=image_arr)["image"]
|
||||||
|
|
||||||
|
paded = pad_to_size(target_size=(max_size, max_size), image=transformed_image)
|
||||||
|
|
||||||
|
pads = paded["pads"]
|
||||||
|
|
||||||
|
torched_image = tensor_from_rgb_image(paded["image"]).to(device)
|
||||||
|
|
||||||
|
|
||||||
|
# loc, conf, land = retina_model(torched_image.unsqueeze(0))
|
||||||
|
|
||||||
|
|
||||||
|
def infer(loc, conf, land):
|
||||||
|
conf = F.softmax(conf, dim=-1)
|
||||||
|
|
||||||
|
annotations = []
|
||||||
|
|
||||||
|
boxes = decode(loc.data[0], _priorbox, variance)
|
||||||
|
|
||||||
|
boxes *= scale_bboxes
|
||||||
|
scores = conf[0][:, 1]
|
||||||
|
|
||||||
|
landmarks = decode_landm(land.data[0], _priorbox, variance)
|
||||||
|
landmarks *= scale_landmarks
|
||||||
|
|
||||||
|
# ignore low scores
|
||||||
|
valid_index = torch.where(scores > confidence_threshold)[0]
|
||||||
|
boxes = boxes[valid_index]
|
||||||
|
landmarks = landmarks[valid_index]
|
||||||
|
scores = scores[valid_index]
|
||||||
|
|
||||||
|
# Sort from high to low
|
||||||
|
order = scores.argsort(descending=True)
|
||||||
|
boxes = boxes[order]
|
||||||
|
landmarks = landmarks[order]
|
||||||
|
scores = scores[order]
|
||||||
|
|
||||||
|
# do NMS
|
||||||
|
keep = nms(boxes, scores, nms_threshold)
|
||||||
|
boxes = boxes[keep, :].int()
|
||||||
|
|
||||||
|
if boxes.shape[0] == 0:
|
||||||
|
return [{"bbox": [], "score": -1, "landmarks": []}]
|
||||||
|
|
||||||
|
landmarks = landmarks[keep]
|
||||||
|
|
||||||
|
scores = scores[keep].cpu().detach().numpy().astype(np.float64)
|
||||||
|
boxes = boxes.cpu().numpy()
|
||||||
|
landmarks = landmarks.cpu().numpy()
|
||||||
|
landmarks = landmarks.reshape([-1, 2])
|
||||||
|
|
||||||
|
unpadded = unpad_from_size(pads, bboxes=boxes, keypoints=landmarks)
|
||||||
|
|
||||||
|
resize_coeff = max(original_height, original_width) / max_size
|
||||||
|
|
||||||
|
boxes = (unpadded["bboxes"] * resize_coeff).astype(int)
|
||||||
|
landmarks = (unpadded["keypoints"].reshape(-1, 10) * resize_coeff).astype(int)
|
||||||
|
|
||||||
|
for box_id, bbox in enumerate(boxes):
|
||||||
|
x_min, y_min, x_max, y_max = bbox
|
||||||
|
|
||||||
|
x_min = np.clip(x_min, 0, original_width - 1)
|
||||||
|
x_max = np.clip(x_max, x_min + 1, original_width - 1)
|
||||||
|
|
||||||
|
if x_min >= x_max:
|
||||||
|
continue
|
||||||
|
|
||||||
|
y_min = np.clip(y_min, 0, original_height - 1)
|
||||||
|
y_max = np.clip(y_max, y_min + 1, original_height - 1)
|
||||||
|
|
||||||
|
if y_min >= y_max:
|
||||||
|
continue
|
||||||
|
|
||||||
|
annotations += [
|
||||||
|
{
|
||||||
|
"bbox": bbox.tolist(),
|
||||||
|
"score": scores[box_id],
|
||||||
|
"landmarks": landmarks[box_id].reshape(-1, 2).tolist(),
|
||||||
|
}
|
||||||
|
]
|
||||||
|
return annotations
|
||||||
|
|
||||||
|
|
||||||
|
ans = infer(*retina_model(torched_image.unsqueeze(0)))
|
||||||
|
print(ans)
|
||||||
|
|
@ -0,0 +1,28 @@
|
||||||
|
from torch import load, randn, float, half, jit, ones, no_grad
|
||||||
|
import torch_tensorrt
|
||||||
|
from torch.nn import Module
|
||||||
|
from torch.onnx import export
|
||||||
|
|
||||||
|
model: Module = load(
|
||||||
|
f='/home/tomokazu/PycharmProjects/helloproject-ai/data/artifact/facenet-tl_2023-10-15 14:46:51.187699/checkpoints/80.pth')
|
||||||
|
model.cuda()
|
||||||
|
model.eval()
|
||||||
|
model = model.half()
|
||||||
|
with no_grad():
|
||||||
|
example_input = randn(1, 3, 224, 224).cuda().half()
|
||||||
|
|
||||||
|
export(
|
||||||
|
model=model,
|
||||||
|
args=example_input,
|
||||||
|
f="onnx_test.onnx",
|
||||||
|
input_names=["input"],
|
||||||
|
output_names=["output"],
|
||||||
|
dynamic_axes={
|
||||||
|
"input": {
|
||||||
|
0: "batch_size",
|
||||||
|
2: "height",
|
||||||
|
3: "width"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
verbose=False
|
||||||
|
)
|
||||||
|
|
@ -1,17 +1,25 @@
|
||||||
from torch import load, randn, float, half, jit
|
from torch import load, randn, float, half, jit, ones, no_grad
|
||||||
import torch_tensorrt
|
import torch_tensorrt
|
||||||
from torch.nn import Module
|
from torch.nn import Module
|
||||||
|
from torch_tensorrt import Input
|
||||||
|
|
||||||
model: Module = load(
|
model: Module = load(
|
||||||
f='/home/tomokazu/PycharmProjects/helloproject-ai/data/artifact/facenet-tl_2023-05-28 23:05:09.874085/model.pth')
|
f='/home/tomokazu/PycharmProjects/helloproject-ai/data/artifact/facenet-tl_2023-05-28 23:05:09.874085/model.pth')
|
||||||
model.cuda()
|
model.cuda()
|
||||||
model.eval()
|
model.eval()
|
||||||
|
with no_grad():
|
||||||
example_input = randn(size=[1, 3, 224, 224]).float().cuda()
|
example_input = ones(1, 3, 224, 224).cuda()
|
||||||
|
|
||||||
traced_script_module = jit.trace(model, example_inputs=[example_input])
|
traced_script_module = jit.trace(model, example_inputs=[example_input])
|
||||||
tensorrt_module = torch_tensorrt.compile(module=traced_script_module, inputs=[example_input],
|
tensorrt_module = torch_tensorrt.compile(module=traced_script_module, inputs=[
|
||||||
enabled_precisions={float, half},
|
Input(
|
||||||
truncate_long_and_double=True)
|
min_shape=[1, 3, 224, 224],
|
||||||
|
opt_shape=[32, 3, 224, 224],
|
||||||
|
max_shape=[32, 3, 224, 224]
|
||||||
|
)
|
||||||
|
],
|
||||||
|
enabled_precisions={float},
|
||||||
|
truncate_long_and_double=True,
|
||||||
|
allow_shape_tensors=True)
|
||||||
|
|
||||||
jit.save(tensorrt_module, "trt_test.ts")
|
jit.save(tensorrt_module, "trt_test.ts")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue