38 lines
1.7 KiB
Python
38 lines
1.7 KiB
Python
import cupy
|
|
import numpy
|
|
import os
|
|
import shutil
|
|
|
|
TYPE = "facenet"
|
|
|
|
DEST_ROOT = r"D:\helloproject-ai-data\similar_face"
|
|
SAMPLE_FILE = r"D:\helloproject-ai-data\face_cropped\井上春華\井上春華=morningmusume16ki=12815441476-0-0.jpg"
|
|
PICK_N = 500
|
|
PROBE = 0.6
|
|
embeddings = cupy.load(rf"C:\Users\tomokazu\PycharmProjects\helloproject-ai\embeddings_{TYPE}.npy")
|
|
embeddings_label = numpy.load(rf"C:\Users\tomokazu\PycharmProjects\helloproject-ai\embeddings_{TYPE}_label.npy")
|
|
|
|
sample_pos = numpy.argwhere(embeddings_label == os.path.basename(SAMPLE_FILE))
|
|
sample_emb = embeddings[*sample_pos, :].T
|
|
print(sample_pos)
|
|
print(embeddings_label.shape)
|
|
print(embeddings.shape)
|
|
print(sample_emb.shape)
|
|
similarity: cupy.ndarray = cupy.dot(embeddings, sample_emb).T / (
|
|
cupy.linalg.norm(embeddings, axis=1) * cupy.linalg.norm(sample_emb))
|
|
# sample_norm = cupy.linalg.norm(sample_emb)
|
|
# similarity = cupy.apply_along_axis(lambda x: cupy.dot(x, sample_emb) / (cupy.linalg.norm(x) * sample_norm), 1,
|
|
# embeddings)
|
|
print(similarity.shape)
|
|
similar_pos: cupy.ndarray = similarity.reshape((-1,)).argsort()[::-1]
|
|
print(similar_pos)
|
|
p = int(cupy.count_nonzero(cupy.argwhere(similarity > PROBE)))
|
|
# p = 300
|
|
sim_list = embeddings_label[cupy.asnumpy(similar_pos)[:p]]
|
|
print(numpy.stack([sim_list, cupy.asnumpy(similarity.reshape((-1,))[similar_pos])[:p]], axis=1))
|
|
os.makedirs(os.path.join(DEST_ROOT, os.path.splitext(os.path.basename(SAMPLE_FILE))[0] + f"_{TYPE}"), exist_ok=True)
|
|
for file in sim_list:
|
|
shutil.copyfile(os.path.join(r"D:\helloproject-ai-data\face_cropped", file.split("=")[0], file),
|
|
os.path.join(DEST_ROOT, os.path.splitext(os.path.basename(SAMPLE_FILE))[0] + f"_{TYPE}", file))
|
|
print(f"Copied {p} file(s).")
|