Compare commits
No commits in common. "bf22cbf918e018f406aed262b9f30e0c0c45c9b7" and "70bb7fa5ba33747c0c72c93af05fe63ae03ac134" have entirely different histories.
bf22cbf918
...
70bb7fa5ba
|
|
@ -1,37 +0,0 @@
|
||||||
import cupy
|
|
||||||
import numpy
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
|
|
||||||
TYPE = "facenet"
|
|
||||||
|
|
||||||
DEST_ROOT = r"D:\helloproject-ai-data\similar_face"
|
|
||||||
SAMPLE_FILE = r"D:\helloproject-ai-data\face_cropped\井上春華\井上春華=morningmusume16ki=12815441476-0-0.jpg"
|
|
||||||
PICK_N = 500
|
|
||||||
PROBE = 0.6
|
|
||||||
embeddings = cupy.load(rf"C:\Users\tomokazu\PycharmProjects\helloproject-ai\embeddings_{TYPE}.npy")
|
|
||||||
embeddings_label = numpy.load(rf"C:\Users\tomokazu\PycharmProjects\helloproject-ai\embeddings_{TYPE}_label.npy")
|
|
||||||
|
|
||||||
sample_pos = numpy.argwhere(embeddings_label == os.path.basename(SAMPLE_FILE))
|
|
||||||
sample_emb = embeddings[*sample_pos, :].T
|
|
||||||
print(sample_pos)
|
|
||||||
print(embeddings_label.shape)
|
|
||||||
print(embeddings.shape)
|
|
||||||
print(sample_emb.shape)
|
|
||||||
similarity: cupy.ndarray = cupy.dot(embeddings, sample_emb).T / (
|
|
||||||
cupy.linalg.norm(embeddings, axis=1) * cupy.linalg.norm(sample_emb))
|
|
||||||
# sample_norm = cupy.linalg.norm(sample_emb)
|
|
||||||
# similarity = cupy.apply_along_axis(lambda x: cupy.dot(x, sample_emb) / (cupy.linalg.norm(x) * sample_norm), 1,
|
|
||||||
# embeddings)
|
|
||||||
print(similarity.shape)
|
|
||||||
similar_pos: cupy.ndarray = similarity.reshape((-1,)).argsort()[::-1]
|
|
||||||
print(similar_pos)
|
|
||||||
p = int(cupy.count_nonzero(cupy.argwhere(similarity > PROBE)))
|
|
||||||
# p = 300
|
|
||||||
sim_list = embeddings_label[cupy.asnumpy(similar_pos)[:p]]
|
|
||||||
print(numpy.stack([sim_list, cupy.asnumpy(similarity.reshape((-1,))[similar_pos])[:p]], axis=1))
|
|
||||||
os.makedirs(os.path.join(DEST_ROOT, os.path.splitext(os.path.basename(SAMPLE_FILE))[0] + f"_{TYPE}"), exist_ok=True)
|
|
||||||
for file in sim_list:
|
|
||||||
shutil.copyfile(os.path.join(r"D:\helloproject-ai-data\face_cropped", file.split("=")[0], file),
|
|
||||||
os.path.join(DEST_ROOT, os.path.splitext(os.path.basename(SAMPLE_FILE))[0] + f"_{TYPE}", file))
|
|
||||||
print(f"Copied {p} file(s).")
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
||||||
import gradio
|
|
||||||
import pathlib
|
|
||||||
import cupy
|
|
||||||
import numpy
|
|
||||||
import os
|
|
||||||
|
|
||||||
TYPE = "facenet"
|
|
||||||
|
|
||||||
embeddings = cupy.load(rf"C:\Users\tomokazu\PycharmProjects\helloproject-ai\embeddings_{TYPE}.npy")
|
|
||||||
embeddings_label = numpy.load(rf"C:\Users\tomokazu\PycharmProjects\helloproject-ai\embeddings_{TYPE}_label.npy")
|
|
||||||
|
|
||||||
|
|
||||||
def get_similar_pic(_input: pathlib.Path):
|
|
||||||
search_key = os.path.basename(_input)
|
|
||||||
key_pos = numpy.argwhere(embeddings_label == search_key)
|
|
||||||
search_emb = embeddings[*key_pos, :].T
|
|
||||||
similarity_array: cupy.ndarray = (cupy.dot(embeddings, search_emb).T /
|
|
||||||
(cupy.linalg.norm(embeddings, axis=1) * cupy.linalg.norm(search_emb)))
|
|
||||||
|
|
@ -1,83 +0,0 @@
|
||||||
import os.path
|
|
||||||
from io import BytesIO
|
|
||||||
from itertools import chain
|
|
||||||
from PIL import Image
|
|
||||||
from more_itertools import chunked
|
|
||||||
from os import listdir
|
|
||||||
from torchinfo import summary
|
|
||||||
from torchvision import transforms
|
|
||||||
from torchvision.io import decode_jpeg
|
|
||||||
from tqdm import tqdm
|
|
||||||
from facenet_pytorch import InceptionResnetV1
|
|
||||||
import torch
|
|
||||||
import numpy
|
|
||||||
from insightface.app import FaceAnalysis
|
|
||||||
|
|
||||||
CROPPED_DIR = r"D:\helloproject-ai-data\face_cropped"
|
|
||||||
CHUNK_SIZE = 64
|
|
||||||
DEVICE = torch.device("cuda")
|
|
||||||
INPUT_SIZE = 256
|
|
||||||
TYPE = "facenet"
|
|
||||||
face_analysis = FaceAnalysis(providers=[
|
|
||||||
('TensorrtExecutionProvider', {
|
|
||||||
'trt_engine_cache_enable': True,
|
|
||||||
'trt_engine_cache_path': 'trt_cache',
|
|
||||||
'trt_fp16_enable': True,
|
|
||||||
}),
|
|
||||||
'CUDAExecutionProvider',
|
|
||||||
'CPUExecutionProvider',
|
|
||||||
])
|
|
||||||
face_analysis.prepare(ctx_id=0, det_size=(INPUT_SIZE, INPUT_SIZE))
|
|
||||||
|
|
||||||
transform = transforms.Compose([
|
|
||||||
# transforms.ToTensor(),
|
|
||||||
transforms.Resize(size=int(INPUT_SIZE * 1.2), interpolation=transforms.InterpolationMode.BILINEAR),
|
|
||||||
transforms.CenterCrop(size=INPUT_SIZE)
|
|
||||||
# transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
|
|
||||||
])
|
|
||||||
|
|
||||||
# model: torch.nn.Module = get_model(name=MODEL_NAME)
|
|
||||||
# model.load_state_dict(
|
|
||||||
# torch.load(os.path.join(os.path.dirname(__file__), "edgeface", f"{MODEL_NAME}.pt"), weights_only=False))
|
|
||||||
# model = torch.load(r"\\192.168.250.1\share\helloproject-ai-data\artifact\vggface2_facenet.pth", weights_only=False)
|
|
||||||
# model = model.eval().cuda(device=DEVICE)
|
|
||||||
with torch.no_grad():
|
|
||||||
# print(model.eval())
|
|
||||||
# summary(model, input_size=[CHUNK_SIZE, 3, INPUT_SIZE, INPUT_SIZE])
|
|
||||||
# trt_model = torch.compile(model)
|
|
||||||
|
|
||||||
embeddings: numpy.ndarray | None = None
|
|
||||||
labels = []
|
|
||||||
if os.path.exists(f"embeddings_{TYPE}_label.npy"):
|
|
||||||
embeddings: numpy.ndarray = numpy.load(f"embeddings_{TYPE}.npy")
|
|
||||||
labels: list[str] = numpy.load(f"embeddings_{TYPE}_label.npy").tolist()
|
|
||||||
|
|
||||||
all_cropped_list = list(
|
|
||||||
chain.from_iterable([listdir(os.path.join(CROPPED_DIR, name)) for name in listdir(CROPPED_DIR)]))
|
|
||||||
# all_cropped_list = all_cropped_list[:1000]
|
|
||||||
|
|
||||||
labels_set = set(labels)
|
|
||||||
pbar = tqdm(total=all_cropped_list.__len__())
|
|
||||||
|
|
||||||
for file_name in all_cropped_list:
|
|
||||||
sub_dir_name = file_name.split("=")[0]
|
|
||||||
pbar.update(1)
|
|
||||||
if pbar.desc != sub_dir_name:
|
|
||||||
pbar.set_description(sub_dir_name)
|
|
||||||
if file_name in labels_set:
|
|
||||||
continue
|
|
||||||
|
|
||||||
image = numpy.array(Image.open(os.path.join(CROPPED_DIR, sub_dir_name, file_name)))[:, :, [2, 1, 0]]
|
|
||||||
emb = face_analysis.get(image)
|
|
||||||
if not emb:
|
|
||||||
continue
|
|
||||||
if embeddings is not None:
|
|
||||||
embeddings = numpy.concatenate([embeddings, numpy.expand_dims(emb[0].embedding, 0)], axis=0)
|
|
||||||
# print(embeddings.shape)
|
|
||||||
else:
|
|
||||||
embeddings = numpy.expand_dims(emb[0].embedding, 0)
|
|
||||||
labels.append(file_name)
|
|
||||||
|
|
||||||
# print(embeddings.shape, labels.__len__())
|
|
||||||
numpy.save(f"embeddings_{TYPE}.npy", embeddings)
|
|
||||||
numpy.save(f"embeddings_{TYPE}_label.npy", numpy.array(labels))
|
|
||||||
|
|
@ -1,96 +0,0 @@
|
||||||
import concurrent.futures
|
|
||||||
import os.path
|
|
||||||
from concurrent.futures.process import ProcessPoolExecutor
|
|
||||||
from itertools import chain
|
|
||||||
from PIL import Image
|
|
||||||
from more_itertools import chunked
|
|
||||||
from os import listdir
|
|
||||||
from torchvision import transforms
|
|
||||||
from torchvision.io import decode_jpeg
|
|
||||||
from tqdm import tqdm
|
|
||||||
from edgeface.backbones import get_model
|
|
||||||
import torch
|
|
||||||
import numpy
|
|
||||||
from edgeface.face_alignment import align
|
|
||||||
|
|
||||||
CROPPED_DIR = r"D:\helloproject-ai-data\face_cropped"
|
|
||||||
MODEL_NAME = "edgeface_s_gamma_05"
|
|
||||||
CHUNK_SIZE = 64
|
|
||||||
DEVICE = torch.device("cuda")
|
|
||||||
INPUT_SIZE = 112
|
|
||||||
TYPE = "edgeface"
|
|
||||||
|
|
||||||
transform = transforms.Compose([
|
|
||||||
transforms.ToTensor(),
|
|
||||||
# lambda x: x.to(torch.float32) / 255.,
|
|
||||||
transforms.Resize(size=INPUT_SIZE, interpolation=transforms.InterpolationMode.BILINEAR),
|
|
||||||
# transforms.Resize(size=int(INPUT_SIZE * 1.2), interpolation=transforms.InterpolationMode.BILINEAR),
|
|
||||||
# transforms.CenterCrop(size=INPUT_SIZE)
|
|
||||||
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
|
|
||||||
])
|
|
||||||
|
|
||||||
|
|
||||||
def align_edgeface(p: str):
|
|
||||||
sub_dir_name = p.split("=")[0]
|
|
||||||
aligned = align.get_aligned_face(os.path.join(CROPPED_DIR, sub_dir_name, p))
|
|
||||||
if aligned is None:
|
|
||||||
return None
|
|
||||||
return transform(aligned).to(DEVICE)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
model: torch.nn.Module = get_model(name=MODEL_NAME)
|
|
||||||
model.load_state_dict(
|
|
||||||
torch.load(os.path.join(os.path.dirname(__file__), "edgeface", "checkpoints", f"{MODEL_NAME}.pt"),
|
|
||||||
weights_only=False))
|
|
||||||
model = model.eval().cuda(device=DEVICE)
|
|
||||||
with torch.no_grad(), torch.autocast(device_type='cuda', dtype=torch.float16):
|
|
||||||
# print(model.eval())
|
|
||||||
# summary(model, input_size=[CHUNK_SIZE, 3, INPUT_SIZE, INPUT_SIZE])
|
|
||||||
# trt_model = torch.compile(model)
|
|
||||||
|
|
||||||
embeddings: numpy.ndarray | None = None
|
|
||||||
labels = []
|
|
||||||
if os.path.exists(f"embeddings_{TYPE}_label.npy"):
|
|
||||||
embeddings: numpy.ndarray = numpy.load(f"embeddings_{TYPE}.npy")
|
|
||||||
labels: list[str] = numpy.load(f"embeddings_{TYPE}_label.npy").tolist()
|
|
||||||
|
|
||||||
all_cropped_list = list(
|
|
||||||
chain.from_iterable([listdir(os.path.join(CROPPED_DIR, name)) for name in listdir(CROPPED_DIR)]))
|
|
||||||
# all_cropped_list = all_cropped_list[:1000]
|
|
||||||
|
|
||||||
labels_set = set(labels)
|
|
||||||
pbar = tqdm(total=all_cropped_list.__len__())
|
|
||||||
|
|
||||||
for chk in chunked(all_cropped_list, n=CHUNK_SIZE):
|
|
||||||
decoded_images = []
|
|
||||||
pool_res_list = []
|
|
||||||
for file_name in chk:
|
|
||||||
pbar.update(1)
|
|
||||||
if pbar.desc != file_name.split("=")[0]:
|
|
||||||
pbar.set_description(file_name.split("=")[0])
|
|
||||||
if file_name in labels_set:
|
|
||||||
continue
|
|
||||||
pool_res = align_edgeface(file_name)
|
|
||||||
pool_res_list.append(pool_res)
|
|
||||||
for result, name in zip(pool_res_list, chk):
|
|
||||||
# result = result.result()
|
|
||||||
if result is not None:
|
|
||||||
decoded_images.append(result)
|
|
||||||
labels.append(name)
|
|
||||||
|
|
||||||
if not decoded_images:
|
|
||||||
continue
|
|
||||||
stacked = torch.stack(decoded_images)
|
|
||||||
# print(stacked.shape)
|
|
||||||
res = model(stacked)
|
|
||||||
# print(res.shape)
|
|
||||||
if embeddings is not None:
|
|
||||||
embeddings = numpy.concatenate([embeddings, res.cpu().numpy()], axis=0)
|
|
||||||
# print(embeddings.shape)
|
|
||||||
else:
|
|
||||||
embeddings = res.cpu().numpy()
|
|
||||||
|
|
||||||
# print(embeddings.shape, labels.__len__())
|
|
||||||
numpy.save(f"embeddings_{TYPE}.npy", embeddings)
|
|
||||||
numpy.save(f"embeddings_{TYPE}_label.npy", numpy.array(labels))
|
|
||||||
|
|
@ -17,13 +17,13 @@ if not isfile(argv[1]):
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
face_analysis = FaceAnalysis(providers=[
|
face_analysis = FaceAnalysis(providers=[
|
||||||
|
# 'CUDAExecutionProvider',
|
||||||
|
# 'CPUExecutionProvider',
|
||||||
('TensorrtExecutionProvider', {
|
('TensorrtExecutionProvider', {
|
||||||
'trt_engine_cache_enable': True,
|
'trt_engine_cache_enable': True,
|
||||||
'trt_engine_cache_path': join(getcwd(), 'onnx_cache'),
|
'trt_engine_cache_path': join(getcwd(), 'onnx_cache'),
|
||||||
'trt_fp16_enable': True,
|
'trt_fp16_enable': True,
|
||||||
}),
|
})
|
||||||
'CUDAExecutionProvider',
|
|
||||||
'CPUExecutionProvider',
|
|
||||||
], allowed_modules=['recognition', 'detection'])
|
], allowed_modules=['recognition', 'detection'])
|
||||||
face_analysis.prepare(ctx_id=0, det_size=(160, 160))
|
face_analysis.prepare(ctx_id=0, det_size=(160, 160))
|
||||||
|
|
||||||
|
|
@ -55,7 +55,7 @@ for file in image_files:
|
||||||
cosine = dot(emb[0].embedding, collect_image_emb[0].embedding) / \
|
cosine = dot(emb[0].embedding, collect_image_emb[0].embedding) / \
|
||||||
(norm(emb[0].embedding) * norm(collect_image_emb[0].embedding))
|
(norm(emb[0].embedding) * norm(collect_image_emb[0].embedding))
|
||||||
print(file, cosine)
|
print(file, cosine)
|
||||||
if cosine > 0.6:
|
if cosine > 0.3:
|
||||||
copyfile(join(getcwd(), argv[2], file), join(getcwd(), dir_name, "true", file))
|
copyfile(join(getcwd(), argv[2], file), join(getcwd(), dir_name, "true", file))
|
||||||
else:
|
else:
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,64 +0,0 @@
|
||||||
import time
|
|
||||||
from itertools import chain
|
|
||||||
from shutil import copyfile
|
|
||||||
from insightface.app import FaceAnalysis
|
|
||||||
from os import getcwd, listdir, makedirs, supports_bytes_environ
|
|
||||||
from os.path import join, isdir, isfile, basename, dirname
|
|
||||||
from numpy import dot, array
|
|
||||||
from numpy.linalg import norm
|
|
||||||
from PIL import Image
|
|
||||||
from sys import argv
|
|
||||||
|
|
||||||
if argv.__len__() != 2:
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
if not isfile(argv[1]):
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
face_analysis = FaceAnalysis(providers=[
|
|
||||||
# 'CUDAExecutionProvider',
|
|
||||||
# 'CPUExecutionProvider',
|
|
||||||
('TensorrtExecutionProvider', {
|
|
||||||
'trt_engine_cache_enable': True,
|
|
||||||
'trt_engine_cache_path': join(getcwd(), 'onnx_cache'),
|
|
||||||
'trt_fp16_enable': True,
|
|
||||||
})
|
|
||||||
], allowed_modules=['recognition', 'detection'])
|
|
||||||
face_analysis.prepare(ctx_id=0, det_size=(160, 160))
|
|
||||||
|
|
||||||
print(argv)
|
|
||||||
ROOT_DIR = r"D:\helloproject-ai-data\face_cropped"
|
|
||||||
collect_image = array(Image.open(join(getcwd(), argv[1])))[:, :, [2, 1, 0]]
|
|
||||||
image_files: list[str] = chain.from_iterable([[join() for file in listdir(join(ROOT_DIR, subdir))] for subdir in listdir(ROOT_DIR)])
|
|
||||||
|
|
||||||
collect_image_emb = face_analysis.get(collect_image)
|
|
||||||
if collect_image_emb.__len__() == 0:
|
|
||||||
print("Not found face: ", argv[1])
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
# collect_image_emb = collect_image_emb[0].embedding
|
|
||||||
|
|
||||||
dir_name = basename(dirname(argv[2]))
|
|
||||||
print(dir_name)
|
|
||||||
makedirs(join(getcwd(), dir_name, "true"), exist_ok=True)
|
|
||||||
makedirs(join(getcwd(), dir_name, "false"), exist_ok=True)
|
|
||||||
|
|
||||||
images = []
|
|
||||||
begin = time.time()
|
|
||||||
for file in image_files:
|
|
||||||
if isfile(join(getcwd(), argv[2], file)):
|
|
||||||
# print(join(getcwd(), argv[2], file))
|
|
||||||
image = array(Image.open(join(getcwd(), argv[2], file)))[:, :, [2, 1, 0]]
|
|
||||||
emb = face_analysis.get(image)
|
|
||||||
if not emb:
|
|
||||||
continue
|
|
||||||
cosine = dot(emb[0].embedding, collect_image_emb[0].embedding) / \
|
|
||||||
(norm(emb[0].embedding) * norm(collect_image_emb[0].embedding))
|
|
||||||
print(file, cosine)
|
|
||||||
if cosine > 0.3:
|
|
||||||
copyfile(join(getcwd(), argv[2], file), join(getcwd(), dir_name, "true", file))
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
# copyfile(join(getcwd(), argv[2], file), join(getcwd(), dir_name, "false", file))
|
|
||||||
|
|
||||||
print(f"{time.time() - begin}sec")
|
|
||||||
|
|
@ -1,115 +0,0 @@
|
||||||
# import cv2
|
|
||||||
import msgspec
|
|
||||||
from torch import tensor
|
|
||||||
import torch
|
|
||||||
from torchvision.transforms import functional, InterpolationMode
|
|
||||||
from torchvision.io import decode_jpeg
|
|
||||||
import os
|
|
||||||
# import shutil
|
|
||||||
import numpy
|
|
||||||
from PIL import Image
|
|
||||||
from io import BytesIO
|
|
||||||
from more_itertools import chunked
|
|
||||||
from tqdm import tqdm
|
|
||||||
import math
|
|
||||||
|
|
||||||
ROOT_DIR = r"D:\helloproject-ai-data\blog_images"
|
|
||||||
CROPPED_DIR = r"D:\helloproject-ai-data\face_cropped"
|
|
||||||
CROP_THRESHOLD = 0.8
|
|
||||||
|
|
||||||
inference_size = 640
|
|
||||||
device = torch.device("cuda")
|
|
||||||
|
|
||||||
|
|
||||||
def calc_rotate(landmark: list[list[float]]) -> tuple[tuple[int, int], float]:
|
|
||||||
left_eye, right_eye, nose, left_mouth, right_mouth = landmark
|
|
||||||
center_x = sum((left_eye[0], right_eye[0], left_mouth[0], right_mouth[0])) / 4
|
|
||||||
center_y = sum((left_eye[1], right_eye[1], left_mouth[1], right_mouth[1])) / 4
|
|
||||||
eye_center = (right_eye[0] + left_eye[0]) / 2, (right_eye[1] + left_eye[1]) / 2
|
|
||||||
mouth_center = (right_mouth[0] + left_mouth[0]) / 2, (right_mouth[1] + left_mouth[1]) / 2
|
|
||||||
return (int(center_x), int(center_y)), numpy.arctan2(eye_center[0] - mouth_center[0],
|
|
||||||
mouth_center[1] - eye_center[1])
|
|
||||||
|
|
||||||
|
|
||||||
def cropper(pos_list: list[tuple[str, list[list] | None]], tqdm_pbar: tqdm, exist_set: set[str]):
|
|
||||||
for name, pos_s in pos_list:
|
|
||||||
file_name = name
|
|
||||||
sub_dir_name = file_name.split("=", maxsplit=1)[0]
|
|
||||||
if pos_s:
|
|
||||||
decoded_image = None
|
|
||||||
# host_image = decoded_image.cpu().numpy().transpose([1, 2, 0])
|
|
||||||
for order, pos in enumerate(pos_s):
|
|
||||||
dest_name = file_name.split(".")[0] + f"-{order}.jpg"
|
|
||||||
if dest_name in exist_set:
|
|
||||||
continue
|
|
||||||
# print(dest_name)
|
|
||||||
if decoded_image is None:
|
|
||||||
dat = numpy.fromfile(os.path.join(ROOT_DIR, sub_dir_name, file_name),
|
|
||||||
dtype=numpy.uint8)
|
|
||||||
try:
|
|
||||||
decoded_image = decode_jpeg(tensor(dat), device=device)
|
|
||||||
except:
|
|
||||||
decoded_image = tensor(
|
|
||||||
numpy.array(Image.open(BytesIO(dat.tobytes()))).transpose([2, 0, 1])).to(device)
|
|
||||||
bbox, acc, landmark = pos
|
|
||||||
if acc > CROP_THRESHOLD:
|
|
||||||
# scale = 1.0
|
|
||||||
# if max(decoded_image.shape[1:]) > inference_size:
|
|
||||||
scale = max(decoded_image.shape[1:]) / inference_size
|
|
||||||
|
|
||||||
bbox = list(map(lambda x: x * scale, bbox))
|
|
||||||
landmark = list(map(lambda x: x * scale, landmark))
|
|
||||||
|
|
||||||
# print(file_name, decoded_image.shape[1:], scale, acc)
|
|
||||||
center, rotate_angle = calc_rotate(list(chunked(landmark, n=2)))
|
|
||||||
# print(bbox, landmark, center)
|
|
||||||
rotated = functional.rotate(decoded_image, angle=(360 / (2 * math.pi)) * rotate_angle,
|
|
||||||
center=list(center), interpolation=InterpolationMode.BILINEAR)
|
|
||||||
crop_size = max([int(bbox[3] - bbox[1]), int(bbox[2] - bbox[0])])
|
|
||||||
if crop_size < 100:
|
|
||||||
continue
|
|
||||||
cropped = functional.crop(rotated,
|
|
||||||
top=int(center[1] - crop_size / 2),
|
|
||||||
left=int(center[0] - crop_size / 2),
|
|
||||||
height=crop_size, width=crop_size)
|
|
||||||
with open(os.path.join(CROPPED_DIR, sub_dir_name, dest_name), mode="wb") as fp:
|
|
||||||
functional.to_pil_image(cropped, mode="RGB").save(fp, format="jpeg", quality=85)
|
|
||||||
if tqdm_pbar.desc != sub_dir_name:
|
|
||||||
tqdm_pbar.set_description(sub_dir_name)
|
|
||||||
tqdm_pbar.update(n=1)
|
|
||||||
|
|
||||||
|
|
||||||
# cv2.rectangle(host_image, (int(bbox[0]), int(bbox[1])),
|
|
||||||
# (int(bbox[2]), int(bbox[3])),
|
|
||||||
# (255, 0, 0), 2, cv2.LINE_AA)
|
|
||||||
# with open(
|
|
||||||
# os.path.join(CROPPED_DIR, sub_dir_name, file_name),
|
|
||||||
# mode="wb") as fp:
|
|
||||||
# cv2.imencode(".jpg", cv2.cvtColor(host_image, cv2.COLOR_BGR2RGB))[1].tofile(fp)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
with open(file="faces.jsonl", mode="r", encoding="utf-8") as fp:
|
|
||||||
face_pos_list: list[dict[str, list | None]] = [msgspec.json.decode(line) for line in
|
|
||||||
fp.read().removesuffix("\n").split("\n")]
|
|
||||||
|
|
||||||
face_pos_dict = {}
|
|
||||||
for face_pos in face_pos_list:
|
|
||||||
if next(iter(face_pos.values())) is None:
|
|
||||||
face_pos_dict[next(iter(face_pos.keys()))] = None
|
|
||||||
else:
|
|
||||||
if not next(iter(face_pos.keys())) in face_pos_dict.keys():
|
|
||||||
face_pos_dict[next(iter(face_pos.keys()))] = []
|
|
||||||
face_pos_dict[next(iter(face_pos.keys()))].append(next(iter(face_pos.values())))
|
|
||||||
|
|
||||||
# shutil.rmtree(CROPPED_DIR)
|
|
||||||
os.makedirs(CROPPED_DIR, exist_ok=True)
|
|
||||||
# print(face_pos)
|
|
||||||
names_set = {next(iter(_dict.keys())).split("=", maxsplit=1)[0] for _dict in face_pos_list}
|
|
||||||
print(names_set)
|
|
||||||
[os.makedirs(os.path.join(CROPPED_DIR, name), exist_ok=True) for name in names_set]
|
|
||||||
pbar = tqdm(total=face_pos_dict.items().__len__())
|
|
||||||
|
|
||||||
exist_set = set().union(
|
|
||||||
*[set(os.listdir(os.path.join(CROPPED_DIR, sub_dir))) for sub_dir in os.listdir(CROPPED_DIR)])
|
|
||||||
cropper(list(face_pos_dict.items()), pbar, exist_set)
|
|
||||||
Binary file not shown.
|
|
@ -5,7 +5,7 @@ import tkinter
|
||||||
from nvjpeg_decoder import decode
|
from nvjpeg_decoder import decode
|
||||||
from os import listdir, getcwd
|
from os import listdir, getcwd
|
||||||
from os.path import join
|
from os.path import join
|
||||||
from numpy import array, fromfile, uint8
|
from numpy import array
|
||||||
# from matplotlib import pyplot, figure
|
# from matplotlib import pyplot, figure
|
||||||
# from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
|
# from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
|
||||||
# import matplotlib_fontja
|
# import matplotlib_fontja
|
||||||
|
|
@ -24,9 +24,8 @@ session = InferenceSession(
|
||||||
)
|
)
|
||||||
for member in listdir(datadir):
|
for member in listdir(datadir):
|
||||||
for file in listdir(join(datadir, member)):
|
for file in listdir(join(datadir, member)):
|
||||||
# with open(join(datadir, member, file), mode="rb") as f:
|
with open(join(datadir, member, file), mode="rb") as f:
|
||||||
(data, (scale, (width, height))) = decode(fromfile(join(datadir, member, file), dtype=uint8), "imagenet",
|
(data, (width, height)) = decode((f.read()), "imagenet")
|
||||||
(1080, 1080))
|
|
||||||
print(width, height)
|
print(width, height)
|
||||||
image_arr = array(data).reshape((1, 3, height, width)) # .transpose([1, 2, 0])
|
image_arr = array(data).reshape((1, 3, height, width)) # .transpose([1, 2, 0])
|
||||||
session.run(input_feed={'input': image_arr}, output_names=['bbox', 'confidence', 'landmark'])
|
session.run(input_feed={'input': image_arr}, output_names=['bbox', 'confidence', 'landmark'])
|
||||||
|
|
|
||||||
|
|
@ -1,17 +1,14 @@
|
||||||
import json
|
|
||||||
import os
|
import os
|
||||||
from concurrent.futures.process import ProcessPoolExecutor
|
from concurrent.futures.process import ProcessPoolExecutor
|
||||||
from itertools import chain
|
|
||||||
from multiprocessing import shared_memory
|
from multiprocessing import shared_memory
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from os import listdir, path, pathsep, makedirs
|
from os import listdir, path, pathsep, makedirs
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
import more_itertools
|
import more_itertools
|
||||||
import msgspec
|
import numpy as np
|
||||||
import pandas.io.json
|
|
||||||
import tqdm
|
import tqdm
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from uuid import uuid4
|
from numpy import ndarray
|
||||||
from onnxruntime import InferenceSession
|
from onnxruntime import InferenceSession
|
||||||
from torch import tensor
|
from torch import tensor
|
||||||
import aiofiles
|
import aiofiles
|
||||||
|
|
@ -45,18 +42,21 @@ async def gather_runner(l: list, fn):
|
||||||
return await gather(*[fn(p, sem) for p in l])
|
return await gather(*[fn(p, sem) for p in l])
|
||||||
|
|
||||||
|
|
||||||
# def post_processor(outputs, batch_size, image_size):
|
def post_processor(outputs, batch_size, image_size):
|
||||||
# # print("aaa", flush=True)
|
# print("aaa", flush=True)
|
||||||
# outputs = [numpy.ascontiguousarray(output.astype(numpy.float32)) for output in outputs]
|
outputs = [numpy.ascontiguousarray(output.astype(numpy.float32)) for output in outputs]
|
||||||
# res = resnet_post_process([output.__array_interface__["data"][0] for output in outputs], batch_size, image_size)
|
res = resnet_post_process([output.__array_interface__["data"][0] for output in outputs], batch_size, image_size)
|
||||||
# return res
|
return res
|
||||||
#
|
|
||||||
#
|
|
||||||
# def post_processor_memmap(tmp_filename, sizes, batch_size, image_size): # print("aaa", flush=True) outputs = [
|
def post_processor_memmap(tmp_filename, sizes, batch_size, image_size):
|
||||||
# numpy.memmap(filename=path.join("memmap", tmp_filename + str(order)), dtype=numpy.float16, mode="r", shape=size)
|
# print("aaa", flush=True)
|
||||||
# for order, size in enumerate(sizes)] outputs = [numpy.ascontiguousarray(output.astype(numpy.float32)) for output in
|
outputs = [
|
||||||
# outputs] res = resnet_post_process([output.__array_interface__["data"][0] for output in outputs], batch_size,
|
numpy.memmap(filename=path.join("memmap", tmp_filename + str(order)), dtype=numpy.float16, mode="r", shape=size)
|
||||||
# image_size) return res
|
for order, size in enumerate(sizes)]
|
||||||
|
outputs = [numpy.ascontiguousarray(output.astype(numpy.float32)) for output in outputs]
|
||||||
|
res = resnet_post_process([output.__array_interface__["data"][0] for output in outputs], batch_size, image_size)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
def post_processor_shm(shm_name, sizes, batch_size, image_size):
|
def post_processor_shm(shm_name, sizes, batch_size, image_size):
|
||||||
|
|
@ -65,7 +65,7 @@ def post_processor_shm(shm_name, sizes, batch_size, image_size):
|
||||||
[numpy.ascontiguousarray(numpy.ndarray(shape=size, dtype=numpy.float16, buffer=shm.buf).astype(numpy.float32))
|
[numpy.ascontiguousarray(numpy.ndarray(shape=size, dtype=numpy.float16, buffer=shm.buf).astype(numpy.float32))
|
||||||
for size, shm in zip(sizes, shms)]
|
for size, shm in zip(sizes, shms)]
|
||||||
res = resnet_post_process([output.__array_interface__["data"][0] for output in outputs], batch_size, image_size)
|
res = resnet_post_process([output.__array_interface__["data"][0] for output in outputs], batch_size, image_size)
|
||||||
# print(res)
|
[shm.close() for shm in shms]
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -73,7 +73,7 @@ if __name__ == '__main__':
|
||||||
from kornia.augmentation import LongestMaxSize, PadTo, Normalize
|
from kornia.augmentation import LongestMaxSize, PadTo, Normalize
|
||||||
from kornia.constants import Resample
|
from kornia.constants import Resample
|
||||||
|
|
||||||
longest_max_size = LongestMaxSize(max_size=640, resample=Resample.NEAREST)
|
longest_max_size = LongestMaxSize(max_size=640,resample=Resample.NEAREST)
|
||||||
pad_to = PadTo(size=(640, 640), pad_value=1.)
|
pad_to = PadTo(size=(640, 640), pad_value=1.)
|
||||||
normalize = Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
|
normalize = Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
|
||||||
|
|
||||||
|
|
@ -92,51 +92,35 @@ if __name__ == '__main__':
|
||||||
'CPUExecutionProvider'
|
'CPUExecutionProvider'
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
with open(file="faces.jsonl", mode="r", encoding="utf-8") as fp:
|
|
||||||
already = {list(msgspec.json.decode(line).keys())[0] for line in fp.read().removesuffix("\n").split("\n")}
|
|
||||||
pbar = tqdm.tqdm(
|
|
||||||
total=(set().union(*[listdir(path.join(root_dir, name)) for name in listdir(root_dir)]) - already).__len__())
|
|
||||||
|
|
||||||
# print(len(already))
|
|
||||||
# exit(0)
|
|
||||||
|
|
||||||
for name in listdir(root_dir):
|
for name in listdir(root_dir):
|
||||||
with (ProcessPoolExecutor(max_workers=4) as executor):
|
with (ProcessPoolExecutor(max_workers=4) as executor):
|
||||||
pbar.set_description_str(desc=name, refresh=True)
|
if name != "下井谷幸穂":
|
||||||
if name != "ブログ":
|
|
||||||
# continue
|
# continue
|
||||||
pass
|
pass
|
||||||
file_names = listdir(path.join(root_dir, name))
|
file_names = listdir(path.join(root_dir, name))
|
||||||
file_names_set = set(file_names) - already
|
|
||||||
file_names = list(file_names_set)
|
|
||||||
name_files = [path.join(root_dir, name, file_name) for file_name in file_names]
|
name_files = [path.join(root_dir, name, file_name) for file_name in file_names]
|
||||||
files_data = {file_name: numpy.frombuffer(dat, dtype=numpy.uint8) for file_name, dat in
|
files_data = {file_name: numpy.frombuffer(dat, dtype=numpy.uint8) for file_name, dat in
|
||||||
zip(file_names, run(gather_runner(name_files, async_read)))}
|
zip(file_names, run(gather_runner(name_files, async_read)))}
|
||||||
if files_data.__len__() == 0:
|
|
||||||
continue
|
|
||||||
futures = []
|
|
||||||
shms = []
|
|
||||||
namess = []
|
|
||||||
# print(k_1)
|
# print(k_1)
|
||||||
for cnk in more_itertools.chunked(files_data.items(), n=chunk_size):
|
for cnk in more_itertools.chunked(tqdm.tqdm(files_data.items(), desc=name), n=chunk_size):
|
||||||
stack = []
|
stack = []
|
||||||
names = []
|
tmp_file_name = ""
|
||||||
for file, dat in cnk:
|
for file, dat in cnk:
|
||||||
|
tmp_file_name = file
|
||||||
try:
|
try:
|
||||||
decoded_image = decode_jpeg(tensor(dat), device=device)
|
decoded_image = decode_jpeg(tensor(dat), device=device)
|
||||||
except:
|
except:
|
||||||
decoded_image = tensor(
|
decoded_image = tensor(numpy.array(Image.open(BytesIO(dat.tobytes()))).transpose([2, 0, 1])).to(
|
||||||
numpy.array(Image.open(BytesIO(dat.tobytes()))).transpose([2, 0, 1])).to(
|
|
||||||
device)
|
device)
|
||||||
decoded_image = decoded_image.to(torch.float16) / 255
|
decoded_image = decoded_image.to(torch.float16) / 255
|
||||||
decoded_image = normalize(decoded_image)
|
decoded_image = normalize(decoded_image)
|
||||||
decoded_image_resized = longest_max_size(decoded_image)
|
decoded_image_resized = longest_max_size(decoded_image)
|
||||||
decoded_image_padded = pad_to(decoded_image_resized)
|
decoded_image_padded = pad_to(decoded_image_resized)
|
||||||
stack.append(decoded_image_padded.squeeze())
|
stack.append(decoded_image_padded.squeeze())
|
||||||
names.append(file)
|
|
||||||
namess.append(names)
|
|
||||||
[stack.append(torch.zeros(size=[3, 640, 640], dtype=torch.float16, device=device)) for _ in
|
[stack.append(torch.zeros(size=[3, 640, 640], dtype=torch.float16, device=device)) for _ in
|
||||||
range(chunk_size - stack.__len__())]
|
range(32 - stack.__len__())]
|
||||||
stacked = torch.stack(stack).contiguous()
|
stacked = torch.stack(stack).contiguous()
|
||||||
# print(stacked.shape)
|
# print(stacked.shape)
|
||||||
io_binding = session.io_binding()
|
io_binding = session.io_binding()
|
||||||
|
|
@ -155,46 +139,15 @@ if __name__ == '__main__':
|
||||||
outputs: list[numpy.ndarray] = io_binding.copy_outputs_to_cpu()
|
outputs: list[numpy.ndarray] = io_binding.copy_outputs_to_cpu()
|
||||||
# [numpy.memmap(filename=path.join("memmap", tmp_file_name + str(order)), dtype=numpy.float16,
|
# [numpy.memmap(filename=path.join("memmap", tmp_file_name + str(order)), dtype=numpy.float16,
|
||||||
# mode="w+", shape=output.shape) for order, output in enumerate(outputs)]
|
# mode="w+", shape=output.shape) for order, output in enumerate(outputs)]
|
||||||
uuid = uuid4().__str__()
|
|
||||||
shared_array: list[shared_memory.SharedMemory] = \
|
shared_array: list[shared_memory.SharedMemory] = \
|
||||||
[shared_memory.SharedMemory(name=uuid + "_" + str(order), create=True, size=output.nbytes)
|
[shared_memory.SharedMemory(name=tmp_file_name + "_" + str(order), create=True, size=output.nbytes)
|
||||||
for order, output in enumerate(outputs)]
|
for order, output in enumerate(outputs)]
|
||||||
shared_ndarray = [numpy.ndarray(shape=output.shape, dtype=numpy.float16, buffer=shm.buf)
|
shared_ndarray = [numpy.ndarray(shape=output.shape, dtype=numpy.float16, buffer=shm.buf)
|
||||||
for shm, output in zip(shared_array, outputs, strict=True)]
|
for shm, output in zip(shared_array, outputs, strict=True)]
|
||||||
for shm, output in zip(shared_ndarray, outputs, strict=True):
|
for shm, output in zip(shared_ndarray, outputs, strict=True):
|
||||||
shm[:] = output[:]
|
shm[:] = output[:]
|
||||||
future = executor.submit(post_processor_shm, uuid, [output.shape for output in outputs],
|
future = executor.submit(post_processor_shm, tmp_file_name, [output.shape for output in outputs],
|
||||||
chunk_size, [image_size, image_size])
|
chunk_size, [image_size, image_size])
|
||||||
futures.append(future)
|
# print(future.result())
|
||||||
shms.extend(shared_array)
|
# future.add_done_callback(pprint)
|
||||||
# exit(0)
|
# exit(0)
|
||||||
pbar.update(n=cnk.__len__())
|
|
||||||
# result_dict = dict()
|
|
||||||
with open("faces.jsonl", mode="a", encoding="utf-8") as fp:
|
|
||||||
futures_results = [future.result() for future in futures]
|
|
||||||
# pprint(futures_results)
|
|
||||||
for names, futures_result in zip(namess, futures_results):
|
|
||||||
|
|
||||||
for name, results in zip(names, futures_result):
|
|
||||||
results_list = []
|
|
||||||
if results:
|
|
||||||
# print(name)
|
|
||||||
for result in results:
|
|
||||||
# [print(int(a), end=" ") for a in result[0]]
|
|
||||||
# print(*result[1], end=" ")
|
|
||||||
# [print(int(a), end=" ") for a in result[2]]
|
|
||||||
# print()
|
|
||||||
# results_list.append(list(chain.from_iterable([result])))
|
|
||||||
fp.write(
|
|
||||||
pandas.io.json.ujson_dumps({name: [result[0], result[1][0], result[2]]},
|
|
||||||
ensure_ascii=False, double_precision=5) + "\n")
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
fp.write(
|
|
||||||
pandas.io.json.ujson_dumps({name: None}, ensure_ascii=False) + "\n")
|
|
||||||
|
|
||||||
# print(name, [])
|
|
||||||
pass
|
|
||||||
# result_dict[name] = results_list
|
|
||||||
# pprint(result_dict)
|
|
||||||
[shm.close() for shm in shms]
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue