Initial Commit
This commit is contained in:
commit
e5acdf8f8d
|
|
@ -0,0 +1,334 @@
|
|||
import torch.cuda
|
||||
from numpy import ndarray
|
||||
import numpy as np
|
||||
from DBFace_without_OpenCV import DBFace
|
||||
import settings
|
||||
from os import makedirs, listdir, stat, utime
|
||||
from os.path import join, exists
|
||||
from tqdm import tqdm
|
||||
from PIL import Image, ImageDraw
|
||||
from numpy import array, arctan2, pi, zeros, uint8, float32
|
||||
from aiofiles import open as a_open
|
||||
from asyncio import gather, run
|
||||
from multiprocessing import Queue, Process, get_start_method, set_start_method
|
||||
from time import time, sleep
|
||||
from io import BytesIO
|
||||
from math import ceil, sqrt
|
||||
from torch import from_numpy, cuda, Tensor, inference_mode, nn
|
||||
import atexit
|
||||
|
||||
face_dir = join(settings.datadir(), 'face_cropped')
|
||||
blog_images = join(settings.datadir(), 'blog_images')
|
||||
|
||||
if not exists(face_dir):
|
||||
makedirs(face_dir)
|
||||
|
||||
|
||||
def truncate(landmark: list[tuple[float]]) -> tuple[tuple[int, int], float]:
|
||||
left_eye, right_eye, nose, left_mouth, right_mouth = landmark
|
||||
center_x = sum((left_eye[0], right_eye[0], left_mouth[0], right_mouth[0])) / 4
|
||||
center_y = sum((left_eye[1], right_eye[1], left_mouth[1], right_mouth[1])) / 4
|
||||
eye_center = (right_eye[0] + left_eye[0]) / 2, (right_eye[1] + left_eye[1]) / 2
|
||||
mouth_center = (right_mouth[0] + left_mouth[0]) / 2, (right_mouth[1] + left_mouth[1]) / 2
|
||||
return (int(center_x), int(center_y)), arctan2(eye_center[0] - mouth_center[0], mouth_center[1] - eye_center[1])
|
||||
|
||||
|
||||
def load_image(basedir: str, queue: Queue) -> None:
|
||||
def list_up():
|
||||
for name in listdir(basedir):
|
||||
for image_file in listdir(join(basedir, name)):
|
||||
yield name, image_file
|
||||
|
||||
async def single_read(path: tuple[str, str]):
|
||||
async with a_open(join(basedir, *path), mode='rb') as f:
|
||||
return await f.read(), path
|
||||
|
||||
async def parallel_read(paths: list[tuple[str, str]]):
|
||||
return await gather(*[single_read(path) for path in paths])
|
||||
|
||||
file_list = [i for i in list_up()]
|
||||
bar = tqdm(total=file_list.__len__())
|
||||
for i in range(0, file_list.__len__(), 20):
|
||||
|
||||
while queue.qsize() > 300:
|
||||
sleep(1e-3)
|
||||
|
||||
chunk = file_list[i:i + 20]
|
||||
|
||||
img_bins = run(parallel_read(chunk))
|
||||
for img_bin, p in img_bins:
|
||||
queue.put((Image.open(BytesIO(img_bin)), p))
|
||||
bar.update(1)
|
||||
return
|
||||
|
||||
|
||||
def pre_process(q1: Queue, q2: Queue):
|
||||
mean = [0.408, 0.447, 0.47]
|
||||
std = [0.289, 0.274, 0.278]
|
||||
while True:
|
||||
while q2.qsize() > 4:
|
||||
sleep(1e-4)
|
||||
image, path = q1.get()
|
||||
width, height = image.size
|
||||
if width * height > 400_0000:
|
||||
image = image.resize(size=(width // 2, height // 2))
|
||||
# print(path, image.size)
|
||||
image = image.crop((0, 0, ceil(width / 32) * 32, ceil(height / 32) * 32)) # padding
|
||||
img_arr = array(image)
|
||||
img_arr = ((img_arr / 255.0 - mean) / std).astype(float32).transpose(2, 0, 1)
|
||||
torch_image = from_numpy(img_arr)[None]
|
||||
q2.put((torch_image.cuda(), path))
|
||||
pass
|
||||
|
||||
|
||||
def predict(q1: Queue, q2: Queue):
|
||||
model_path = '/home/tomokazu/PycharmProjects/helloproject-ai/DBFace_without_OpenCV/model/dbface.pth'
|
||||
db_face = DBFace()
|
||||
db_face.eval()
|
||||
db_face.cuda()
|
||||
db_face.load(model_path)
|
||||
i = 0
|
||||
start = time()
|
||||
while True:
|
||||
# if i % 5000 == 0:
|
||||
# cuda.empty_cache()
|
||||
# torch_image:Tensor=torch_image
|
||||
# print(i, path, torch_image.size())
|
||||
i += 1
|
||||
try:
|
||||
torch_image, path = q1.get()
|
||||
with inference_mode():
|
||||
q2.put((db_face(torch_image), path))
|
||||
except Exception as e:
|
||||
print(e)
|
||||
del db_face
|
||||
cuda.empty_cache()
|
||||
db_face = DBFace()
|
||||
db_face.eval()
|
||||
db_face.cuda()
|
||||
db_face.load(model_path)
|
||||
|
||||
|
||||
def exp(v):
|
||||
if isinstance(v, tuple) or isinstance(v, list):
|
||||
return [exp(item) for item in v]
|
||||
elif isinstance(v, ndarray):
|
||||
return np.array([exp(item) for item in v], v.dtype)
|
||||
|
||||
gate = 1
|
||||
base = np.exp(1)
|
||||
if abs(v) < gate:
|
||||
return v * base
|
||||
|
||||
if v > 0:
|
||||
return np.exp(v)
|
||||
else:
|
||||
return -np.exp(-v)
|
||||
|
||||
|
||||
def nms(objs, iou=0.5):
|
||||
if objs is None or len(objs) <= 1:
|
||||
return objs
|
||||
|
||||
objs = sorted(objs, key=lambda obj: obj.score, reverse=True)
|
||||
keep = []
|
||||
flags = [0] * len(objs)
|
||||
for index, obj in enumerate(objs):
|
||||
|
||||
if flags[index] != 0:
|
||||
continue
|
||||
|
||||
keep.append(obj)
|
||||
for j in range(index + 1, len(objs)):
|
||||
if flags[j] == 0 and obj.iou(objs[j]) > iou:
|
||||
flags[j] = 1
|
||||
return keep
|
||||
|
||||
|
||||
class BBox:
|
||||
|
||||
def __init__(self, label, xyrb, score=0, landmark=None, rotate=False):
|
||||
self.label = label
|
||||
self.score = score
|
||||
self.landmark = landmark
|
||||
self.x, self.y, self.r, self.b = xyrb
|
||||
self.rotate = rotate
|
||||
|
||||
minx = min(self.x, self.r)
|
||||
maxx = max(self.x, self.r)
|
||||
miny = min(self.y, self.b)
|
||||
maxy = max(self.y, self.b)
|
||||
self.x, self.y, self.r, self.b = minx, miny, maxx, maxy
|
||||
|
||||
def __repr__(self):
|
||||
landmark_formated = ",".join(
|
||||
[str(item[:2]) for item in self.landmark]) if self.landmark is not None else "empty"
|
||||
return f"(BBox[{self.label}]: x={self.x:.2f}, y={self.y:.2f}, r={self.r:.2f}, " + \
|
||||
f"b={self.b:.2f}, width={self.width:.2f}, height={self.height:.2f}, landmark={landmark_formated})"
|
||||
|
||||
@property
|
||||
def width(self):
|
||||
return self.r - self.x + 1
|
||||
|
||||
@property
|
||||
def height(self):
|
||||
return self.b - self.y + 1
|
||||
|
||||
@property
|
||||
def area(self):
|
||||
return self.width * self.height
|
||||
|
||||
@property
|
||||
def haslandmark(self):
|
||||
return self.landmark is not None
|
||||
|
||||
@property
|
||||
def xxxxxyyyyy_cat_landmark(self):
|
||||
x, y = zip(*self.landmark)
|
||||
return x + y
|
||||
|
||||
@property
|
||||
def box(self):
|
||||
return [self.x, self.y, self.r, self.b]
|
||||
|
||||
@box.setter
|
||||
def box(self, newvalue):
|
||||
self.x, self.y, self.r, self.b = newvalue
|
||||
|
||||
@property
|
||||
def xywh(self):
|
||||
return [self.x, self.y, self.width, self.height]
|
||||
|
||||
@property
|
||||
def center(self):
|
||||
return [(self.x + self.r) * 0.5, (self.y + self.b) * 0.5]
|
||||
|
||||
# return cx, cy, cx.diff, cy.diff
|
||||
def safe_scale_center_and_diff(self, scale, limit_x, limit_y):
|
||||
cx = clip_value((self.x + self.r) * 0.5 * scale, limit_x - 1)
|
||||
cy = clip_value((self.y + self.b) * 0.5 * scale, limit_y - 1)
|
||||
return [int(cx), int(cy), cx - int(cx), cy - int(cy)]
|
||||
|
||||
def safe_scale_center(self, scale, limit_x, limit_y):
|
||||
cx = int(clip_value((self.x + self.r) * 0.5 * scale, limit_x - 1))
|
||||
cy = int(clip_value((self.y + self.b) * 0.5 * scale, limit_y - 1))
|
||||
return [cx, cy]
|
||||
|
||||
def clip(self, width, height):
|
||||
self.x = clip_value(self.x, width - 1)
|
||||
self.y = clip_value(self.y, height - 1)
|
||||
self.r = clip_value(self.r, width - 1)
|
||||
self.b = clip_value(self.b, height - 1)
|
||||
return self
|
||||
|
||||
def iou(self, other):
|
||||
return computeIOU(self.box, other.box)
|
||||
|
||||
|
||||
def computeIOU(rec1, rec2):
|
||||
cx1, cy1, cx2, cy2 = rec1
|
||||
gx1, gy1, gx2, gy2 = rec2
|
||||
S_rec1 = (cx2 - cx1 + 1) * (cy2 - cy1 + 1)
|
||||
S_rec2 = (gx2 - gx1 + 1) * (gy2 - gy1 + 1)
|
||||
x1 = max(cx1, gx1)
|
||||
y1 = max(cy1, gy1)
|
||||
x2 = min(cx2, gx2)
|
||||
y2 = min(cy2, gy2)
|
||||
|
||||
w = max(0, x2 - x1 + 1)
|
||||
h = max(0, y2 - y1 + 1)
|
||||
area = w * h
|
||||
iou = area / (S_rec1 + S_rec2 - area)
|
||||
return iou
|
||||
|
||||
|
||||
def clip_value(value, high, low=0):
|
||||
return max(min(value, high), low)
|
||||
|
||||
|
||||
def post_process(queue: Queue, threshold: float = 0.4, nms_iou: float = 0.5):
|
||||
while True:
|
||||
tensor, path = queue.get()
|
||||
hm, box, landmark = tensor
|
||||
del tensor
|
||||
name, file = path
|
||||
hm_pool = nn.functional.max_pool2d(hm, 3, 1, 1)
|
||||
t = ((hm == hm_pool).float() * hm).view(1, -1).cpu()
|
||||
if t.size()[1] < 1000:
|
||||
continue
|
||||
scores, indices = t.topk(1000)
|
||||
hm_height, hm_width = hm.shape[2:]
|
||||
del hm
|
||||
scores = scores.squeeze()
|
||||
indices = indices.squeeze()
|
||||
ys = list((indices / hm_width).int().data.numpy())
|
||||
xs = list((indices % hm_width).int().data.numpy())
|
||||
scores = list(scores.data.numpy())
|
||||
box = box.cpu().squeeze().data.numpy()
|
||||
landmark = landmark.cpu().squeeze().data.numpy()
|
||||
|
||||
stride = 4
|
||||
objs = []
|
||||
for cx, cy, score in zip(xs, ys, scores):
|
||||
if score < threshold:
|
||||
break
|
||||
|
||||
x, y, r, b = box[:, cy, cx]
|
||||
xyrb = (array([cx, cy, cx, cy]) + [-x, -y, r, b]) * stride
|
||||
x5y5 = landmark[:, cy, cx]
|
||||
x5y5 = (exp(x5y5 * 4) + ([cx] * 5 + [cy] * 5)) * stride
|
||||
box_landmark = list(zip(x5y5[:5], x5y5[5:]))
|
||||
objs.append(BBox(0, xyrb=xyrb, score=score, landmark=box_landmark))
|
||||
predicted = nms(objs, iou=nms_iou)
|
||||
image = Image.open(join(blog_images, *path))
|
||||
|
||||
width, height = image.size
|
||||
if width * height > 400_0000:
|
||||
image = image.resize(size=(width // 2, height // 2))
|
||||
|
||||
for order, face in enumerate(predicted):
|
||||
trans = truncate(face.landmark)
|
||||
rotated = image.rotate(angle=trans[1] * 360 / (2 * pi), center=trans[0])
|
||||
image_size = max(face.width, face.height) * sqrt(2) // 2
|
||||
if image_size < 100:
|
||||
continue
|
||||
cropped = rotated.crop((trans[0][0] - image_size, trans[0][1] - image_size, trans[0][0] + image_size,
|
||||
trans[0][1] + image_size))
|
||||
if not exists(join(face_dir, name)):
|
||||
makedirs(join(face_dir, name), exist_ok=True)
|
||||
saved_path = join(face_dir, name, file.replace('.jpg', '-' + str(order + 1) + '.jpg'))
|
||||
cropped.save(saved_path)
|
||||
utime(path=saved_path, times=(stat(join(blog_images, *path)).st_atime,
|
||||
stat(join(blog_images, *path)).st_mtime))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if get_start_method() == 'fork':
|
||||
set_start_method('spawn', force=True)
|
||||
|
||||
try:
|
||||
Load_Q, PreProcess_Q, Predict_Q, PostProcess_Q = (Queue() for i in range(4))
|
||||
Load_Processes = [Process(target=load_image, args=(blog_images, Load_Q))
|
||||
for _ in range(settings.FaceCropProcesses.load)]
|
||||
PreProcesses = [Process(target=pre_process, args=(Load_Q, PreProcess_Q))
|
||||
for _ in range(settings.FaceCropProcesses.pre_process)]
|
||||
Predict_Process = [Process(target=predict, args=(PreProcess_Q, Predict_Q))
|
||||
for _ in range(settings.FaceCropProcesses.predict)]
|
||||
PostProcesses = [Process(target=post_process, args=(Predict_Q,))
|
||||
for _ in range(settings.FaceCropProcesses.post_process)]
|
||||
[p.start() for p in Load_Processes]
|
||||
[p.start() for p in PreProcesses]
|
||||
[p.start() for p in Predict_Process]
|
||||
[p.start() for p in PostProcesses]
|
||||
while True:
|
||||
sleep(5)
|
||||
# print(Load_Q.qsize(), PreProcess_Q.qsize(), Predict_Q.qsize(), PostProcess_Q.qsize())
|
||||
if sum((Load_Q.qsize(), PreProcess_Q.qsize(), Predict_Q.qsize(), PostProcess_Q.qsize())) == 0:
|
||||
raise KeyboardInterrupt
|
||||
|
||||
except KeyboardInterrupt as e:
|
||||
print(e)
|
||||
[p.terminate() for p in Load_Processes]
|
||||
[p.terminate() for p in PreProcesses]
|
||||
[p.terminate() for p in Predict_Process]
|
||||
[p.terminate() for p in PostProcesses]
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
DBFace*
|
||||
*.jpg
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
from shutil import copyfile
|
||||
from time import sleep
|
||||
|
||||
from insightface.app import FaceAnalysis
|
||||
from sklearn.neighbors import NearestNeighbors
|
||||
from json import load
|
||||
from os import listdir, makedirs
|
||||
from os.path import join, isfile, isdir, basename, exists
|
||||
from settings import datadir
|
||||
from PIL import Image
|
||||
from numpy import array, sqrt
|
||||
|
||||
with open(file=join(datadir(), 'sample_emb.json'), mode='r') as f:
|
||||
sample_dict: dict = load(f)
|
||||
|
||||
sample_key = [basename(s).split('=')[0] for s in sample_dict.keys()]
|
||||
sample_emb = list(sample_dict.values())
|
||||
del sample_dict
|
||||
|
||||
neighbors = NearestNeighbors(n_neighbors=15, n_jobs=-1, metric='cosine')
|
||||
# print(sample_emb[0])
|
||||
neighbors.fit(sample_emb)
|
||||
|
||||
makedirs(join(datadir(), 'NN_predict'), exist_ok=True)
|
||||
|
||||
face_analysis = FaceAnalysis(allowed_modules=['recognition', 'detection'])
|
||||
face_analysis.prepare(ctx_id=0, det_size=(160, 160))
|
||||
|
||||
for name in listdir(join(datadir(), 'face_cropped')):
|
||||
for file in listdir(join(datadir(), 'face_cropped', name)):
|
||||
im_path = join(datadir(), 'face_cropped', name, file)
|
||||
if not isfile(im_path):
|
||||
continue
|
||||
image = array(Image.open(im_path))[:, :, [2, 1, 0]]
|
||||
emb = face_analysis.get(image)
|
||||
if emb.__len__() == 0:
|
||||
continue
|
||||
distances, nears = neighbors.kneighbors([emb[0].embedding], return_distance=True)
|
||||
# print(nears)
|
||||
# print(distances)
|
||||
scores = dict()
|
||||
for dist, key_id in zip(distances[0], nears[0]):
|
||||
if key_id != 0.0:
|
||||
if not sample_key[key_id] in scores.keys():
|
||||
scores[sample_key[key_id]] = 0.0
|
||||
scores[sample_key[key_id]] += 1 / sqrt(dist)
|
||||
# print(scores)
|
||||
print(im_path)
|
||||
val_sum = sum(scores.values())
|
||||
sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
||||
for key, reliability in sorted_scores:
|
||||
print(f'{key:<7}\t{(reliability * 100 / val_sum):05.2f}%')
|
||||
|
||||
if sorted(distances[0], reverse=False)[0] < 0.4:
|
||||
prediction = sorted_scores[0][0]
|
||||
elif sorted_scores[0][1] / val_sum > 0.6:
|
||||
prediction = sorted_scores[0][0]
|
||||
else:
|
||||
prediction = 'others'
|
||||
print(prediction) # , sorted(distances[0], reverse=False))
|
||||
# print(sorted_scores)
|
||||
# print(prediction)
|
||||
|
||||
if not exists(join(datadir(), 'NN_predict', prediction)):
|
||||
makedirs(join(datadir(), 'NN_predict', prediction))
|
||||
copyfile(im_path, join(datadir(), 'NN_predict', prediction, basename(im_path)))
|
||||
|
||||
print('\n\n')
|
||||
# sleep(1)
|
||||
# exit()
|
||||
|
|
@ -0,0 +1,174 @@
|
|||
import settings
|
||||
import re
|
||||
import sys
|
||||
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
|
||||
from aiohttp import ClientSession, ClientConnectorError, ClientTimeout
|
||||
from itertools import chain
|
||||
from asyncio import run, Semaphore, sleep
|
||||
from datetime import datetime
|
||||
from aiofiles import open
|
||||
from os import path, utime, stat, cpu_count, makedirs
|
||||
from tqdm.asyncio import tqdm
|
||||
from concurrent.futures import as_completed, ProcessPoolExecutor, Future
|
||||
from ujson import loads
|
||||
from warnings import filterwarnings
|
||||
|
||||
PARALLEL_LIMIT = 300
|
||||
|
||||
makedirs(path.join(settings.datadir(), 'blog_images'), exist_ok=True)
|
||||
makedirs(path.join(settings.datadir(), 'blog_text'), exist_ok=True)
|
||||
|
||||
filterwarnings('ignore', category=MarkupResemblesLocatorWarning, module='bs4')
|
||||
|
||||
|
||||
async def run_each(name: str) -> None:
|
||||
sem: Semaphore = Semaphore(PARALLEL_LIMIT)
|
||||
session: ClientSession = ClientSession(trust_env=True, headers=settings.request_header,
|
||||
timeout=ClientTimeout(total=10 * 60))
|
||||
|
||||
list_pages_count = await parse_list_pages_count(name)
|
||||
|
||||
print(name, list_pages_count)
|
||||
|
||||
url_lists = await tqdm.gather(*[parse_list_page(name, i, sem, session) for i in range(1, list_pages_count + 1)],
|
||||
desc=name)
|
||||
|
||||
url_list = list(chain.from_iterable(url_lists))
|
||||
|
||||
for url in url_list:
|
||||
if 'html' not in url:
|
||||
print(url)
|
||||
|
||||
executor = ProcessPoolExecutor(max_workers=cpu_count())
|
||||
futures = await tqdm.gather(*[parse_blog_post(url, sem, session, executor) for url in url_list], desc='scan blog')
|
||||
images_list = list()
|
||||
for future in tqdm(as_completed(futures), desc='waiting processing ' + name, total=len(futures)):
|
||||
images_list.append(future.result())
|
||||
executor.shutdown()
|
||||
image_link_package = list(chain.from_iterable(images_list))
|
||||
|
||||
await tqdm.gather(
|
||||
*[download_image(filename, url, date, sem, session) for filename, url, date in image_link_package],
|
||||
desc='downloading images')
|
||||
|
||||
await session.close()
|
||||
|
||||
|
||||
async def parse_list_pages_count(blog_name: str) -> int:
|
||||
async with ClientSession(trust_env=True, headers=settings.request_header) as session:
|
||||
async with session.get(f'https://ameblo.jp/{blog_name}/entrylist.html') as resp:
|
||||
resp_html = await resp.text()
|
||||
json_obj = loads(re.findall(r'<script>window.INIT_DATA=(.*?)};', resp_html)[0] + '}')
|
||||
return list(json_obj['entryState']['blogPageMap'].values())[0]['paging']['max_page']
|
||||
|
||||
|
||||
async def parse_list_page(blog_name: str, order: int, sem: Semaphore, session: ClientSession) -> list[str]:
|
||||
async with sem:
|
||||
async with session.get(f'https://ameblo.jp/{blog_name}/entrylist-{order}.html') as resp:
|
||||
resp_html = await resp.text()
|
||||
try:
|
||||
json_obj = loads(re.findall(r'<script>window.INIT_DATA=(.*?)};', resp_html)[0] + '}')
|
||||
page_url_list: list[str] = list()
|
||||
for blog_post_desc in list(json_obj['entryState']['entryMap'].values()):
|
||||
if blog_post_desc['publish_flg'] == 'open':
|
||||
page_url_list.append(f"https://ameblo.jp/{blog_name}/entry-{blog_post_desc['entry_id']}.html")
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print(f'https://ameblo.jp/{blog_name}/entrylist-{order}.html')
|
||||
return []
|
||||
return page_url_list
|
||||
|
||||
|
||||
def parse_image(html: str, url: str) -> list:
|
||||
blog_account = url.split('/')[-2]
|
||||
try:
|
||||
json_obj = list(loads(re.findall(r'<script>window.INIT_DATA=(.*?)};', html)[0] + '}')['entryState'][
|
||||
'entryMap'].values())[0]
|
||||
except IndexError as e:
|
||||
print(e, url)
|
||||
exit()
|
||||
theme = settings.theme_curator(json_obj['theme_name'], blog_account)
|
||||
date = datetime.fromisoformat(json_obj['last_edit_datetime'])
|
||||
blog_entry = json_obj['entry_id']
|
||||
entry_body = BeautifulSoup(json_obj['entry_text'].replace('<br>', '\n'), 'lxml')
|
||||
# print(entry_body)
|
||||
for emoji in entry_body.find_all('img', class_='emoji'):
|
||||
emoji.decompose()
|
||||
image_divs = entry_body.find_all('img', class_='PhotoSwipeImage')
|
||||
return_list = list()
|
||||
for div in image_divs:
|
||||
# print(div)
|
||||
if not div.has_attr('data-src'):
|
||||
return_list.append((
|
||||
'='.join([theme, blog_account, str(blog_entry)]) + '-' + str(div['data-image-order']) + '.jpg',
|
||||
str(div['src']).split('?')[0],
|
||||
date
|
||||
))
|
||||
entry_body.find('img', class_='PhotoSwipeImage').replaceWith(
|
||||
'--blog-image-' + str(div["data-image-order"]) + '--\n')
|
||||
if not path.isdir(path.join(settings.datadir(), 'blog_text', theme)):
|
||||
makedirs(path.join(settings.datadir(), 'blog_text', theme), exist_ok=True)
|
||||
for i in entry_body.find_all('br'):
|
||||
i.replaceWith('\n')
|
||||
|
||||
async def save_text(save_path: str, content: str, last_modified_time: datetime):
|
||||
async with open(save_path, mode='w') as f:
|
||||
await f.write(content)
|
||||
utime(path=save_path, times=(stat(path=save_path).st_atime, last_modified_time.timestamp()))
|
||||
|
||||
run(save_text(path.join(settings.datadir(), 'blog_text', theme, blog_account + '=' + str(blog_entry) + '.txt'),
|
||||
entry_body.text, date))
|
||||
# print(return_list)
|
||||
return return_list
|
||||
|
||||
|
||||
async def parse_blog_post(url: str, sem: Semaphore, session: ClientSession, executor: ProcessPoolExecutor) -> Future:
|
||||
# -> list[tuple[str, str, datetime]]:
|
||||
# print(url)
|
||||
while True:
|
||||
async with sem:
|
||||
try:
|
||||
async with session.get(url) as resp:
|
||||
resp_html = await resp.text()
|
||||
# await sleep(1.0)
|
||||
break
|
||||
except ClientConnectorError as e:
|
||||
await sleep(5.0)
|
||||
print(e, file=sys.stderr)
|
||||
|
||||
return executor.submit(parse_image, resp_html, url)
|
||||
|
||||
|
||||
async def download_image(filename: str, url: str, date: datetime, sem: Semaphore, session: ClientSession) -> None:
|
||||
tag = filename.split('=')[0]
|
||||
if not path.isdir(path.join(settings.datadir(), "blog_images", tag)):
|
||||
makedirs(path.join(settings.datadir(), "blog_images", tag), exist_ok=True)
|
||||
filepath = path.join(settings.datadir(), "blog_images", tag, filename)
|
||||
if path.isfile(filepath):
|
||||
# print(f"file already downloaded.: {filename}")
|
||||
return
|
||||
async with sem:
|
||||
# print("download: ", url)
|
||||
async with session.get(url) as resp:
|
||||
if resp.content_type != "image/jpeg":
|
||||
return
|
||||
async with open(file=filepath, mode="wb") as f:
|
||||
await f.write(await resp.read())
|
||||
utime(path=filepath, times=(stat(path=filepath).st_atime, date.timestamp()))
|
||||
|
||||
|
||||
theme_regex = re.compile('"theme_name":"(.*?)"')
|
||||
modified_time_regex = re.compile('"dateModified":"(.*?)"')
|
||||
|
||||
|
||||
def grep_theme(html: str) -> str:
|
||||
return str(theme_regex.search(html).group(1))
|
||||
|
||||
|
||||
def grep_modified_time(html: str) -> str:
|
||||
return str(modified_time_regex.search(html).group(1))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
for blog in settings.blog_list:
|
||||
run(run_each(blog))
|
||||
|
|
@ -0,0 +1,159 @@
|
|||
import torch.cuda
|
||||
from numpy import ndarray
|
||||
import numpy as np
|
||||
import settings
|
||||
from os import makedirs, listdir, stat, utime, devnull
|
||||
from os.path import join, exists
|
||||
from tqdm import tqdm
|
||||
from PIL import Image, ImageDraw
|
||||
from numpy import array, arctan2, pi, zeros, uint8, float32
|
||||
from aiofiles import open as a_open
|
||||
from asyncio import gather, run
|
||||
from multiprocessing import Queue, Process, get_start_method, set_start_method
|
||||
from time import time, sleep
|
||||
from io import BytesIO
|
||||
from math import ceil, sqrt
|
||||
from torch import from_numpy, cuda, Tensor, inference_mode, nn
|
||||
import atexit
|
||||
from insightface.app import FaceAnalysis
|
||||
from contextlib import redirect_stdout
|
||||
from collections import OrderedDict
|
||||
|
||||
face_dir = join(settings.datadir(), 'face_cropped')
|
||||
blog_images = join(settings.datadir(), 'blog_images')
|
||||
|
||||
if not exists(face_dir):
|
||||
makedirs(face_dir)
|
||||
|
||||
|
||||
def truncate(landmark: list[tuple[float]]) -> tuple[tuple[int, int], float]:
|
||||
left_eye, right_eye, nose, left_mouth, right_mouth = landmark
|
||||
center_x = sum((left_eye[0], right_eye[0], left_mouth[0], right_mouth[0])) / 4
|
||||
center_y = sum((left_eye[1], right_eye[1], left_mouth[1], right_mouth[1])) / 4
|
||||
eye_center = (right_eye[0] + left_eye[0]) / 2, (right_eye[1] + left_eye[1]) / 2
|
||||
mouth_center = (right_mouth[0] + left_mouth[0]) / 2, (right_mouth[1] + left_mouth[1]) / 2
|
||||
return (int(center_x), int(center_y)), arctan2(eye_center[0] - mouth_center[0], mouth_center[1] - eye_center[1])
|
||||
|
||||
|
||||
def load_image(basedir: str, queue: Queue, progress: tuple[Queue]) -> None:
|
||||
def list_up():
|
||||
for name in ["田中れいな", "田中れいな"]: # listdir(basedir):
|
||||
for image_file in listdir(join(basedir, name)):
|
||||
yield name, image_file
|
||||
|
||||
async def single_read(path: tuple[str, str]):
|
||||
async with a_open(join(basedir, *path), mode='rb') as f:
|
||||
return await f.read(), path
|
||||
|
||||
async def parallel_read(paths: list[tuple[str, str]]):
|
||||
return await gather(*[single_read(path) for path in paths])
|
||||
|
||||
file_list = [i for i in list_up()]
|
||||
bar = tqdm(total=file_list.__len__())
|
||||
for i in range(0, file_list.__len__(), 20):
|
||||
|
||||
while queue.qsize() > 150:
|
||||
sleep(1e-3)
|
||||
|
||||
chunk = file_list[i:i + 20]
|
||||
|
||||
img_bins = run(parallel_read(chunk))
|
||||
for img_bin, p in img_bins:
|
||||
queue.put((Image.open(BytesIO(img_bin)), p))
|
||||
bar.update(1)
|
||||
bar.set_postfix(OrderedDict(name=p[0], qsize=[q.qsize() for q in progress]))
|
||||
return
|
||||
|
||||
|
||||
def pre_process(q1: Queue, q2: Queue):
|
||||
while True:
|
||||
while q2.qsize() > 50:
|
||||
# print("occur wait")
|
||||
pass
|
||||
# sleep(1e-4)
|
||||
image, path = q1.get()
|
||||
# print(type(image))
|
||||
img_arr = array(image)[:, :, ::-1]
|
||||
q2.put((img_arr, path))
|
||||
|
||||
|
||||
def predict(q1: Queue, q2: Queue, gpu: int):
|
||||
sleep(gpu * 4)
|
||||
with redirect_stdout(open(devnull, mode='w')):
|
||||
face_analysis = FaceAnalysis(providers=['CUDAExecutionProvider'], allowed_modules=['detection'])
|
||||
face_analysis.prepare(ctx_id=gpu)
|
||||
while True:
|
||||
image, path = q1.get()
|
||||
|
||||
res = face_analysis.get(image)
|
||||
|
||||
if not res:
|
||||
continue
|
||||
|
||||
faces = []
|
||||
for face in res:
|
||||
faces.append((face.kps, face.det_score, face.bbox))
|
||||
q2.put((faces, path))
|
||||
|
||||
|
||||
def post_process(queue: Queue):
|
||||
while True:
|
||||
|
||||
res, path = queue.get()
|
||||
image = Image.open(join(blog_images, *path))
|
||||
name, file = path
|
||||
|
||||
width, height = image.size
|
||||
# if width * height > 400_0000:
|
||||
# image = image.resize(size=(width // 2, height // 2))
|
||||
|
||||
for order, face in enumerate(res):
|
||||
kps, score, bbox = face
|
||||
face_width = bbox[2] - bbox[0]
|
||||
face_height = bbox[3] - bbox[1]
|
||||
trans = truncate(kps)
|
||||
rotated = image.rotate(angle=trans[1] * 360 / (2 * pi), center=trans[0])
|
||||
image_size = max(face_width, face_height) * sqrt(2) // 2
|
||||
if image_size < 100:
|
||||
continue
|
||||
cropped = rotated.crop((trans[0][0] - image_size, trans[0][1] - image_size, trans[0][0] + image_size,
|
||||
trans[0][1] + image_size))
|
||||
if not exists(join(face_dir, name)):
|
||||
makedirs(join(face_dir, name), exist_ok=True)
|
||||
saved_path = join(face_dir, name, file.replace('.jpg', '-' + str(order + 1) + '.jpg'))
|
||||
cropped.save(saved_path)
|
||||
utime(path=saved_path, times=(stat(join(blog_images, *path)).st_atime,
|
||||
stat(join(blog_images, *path)).st_mtime))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if get_start_method() == 'fork':
|
||||
set_start_method('spawn', force=True)
|
||||
|
||||
try:
|
||||
Load_Q, PreProcess_Q, Predict_Q, PostProcess_Q = (Queue() for i in range(4))
|
||||
Load_Processes = [
|
||||
Process(target=load_image, args=(blog_images, Load_Q, (Load_Q, PreProcess_Q, Predict_Q, PostProcess_Q)))
|
||||
for _ in range(settings.FaceCropProcesses.load)]
|
||||
PreProcesses = [Process(target=pre_process, args=(Load_Q, PreProcess_Q))
|
||||
for _ in range(settings.FaceCropProcesses.pre_process)]
|
||||
Predict_Process = [Process(target=predict, args=(PreProcess_Q, Predict_Q, gpu_id))
|
||||
for gpu_id in range(settings.FaceCropProcesses.predict)]
|
||||
PostProcesses = [Process(target=post_process, args=(Predict_Q,))
|
||||
for _ in range(settings.FaceCropProcesses.post_process)]
|
||||
[p.start() for p in Load_Processes]
|
||||
[p.start() for p in PreProcesses]
|
||||
[p.start() for p in Predict_Process]
|
||||
[p.start() for p in PostProcesses]
|
||||
while True:
|
||||
sleep(5)
|
||||
# print(Load_Q.qsize(), PreProcess_Q.qsize(), Predict_Q.qsize(), PostProcess_Q.qsize())
|
||||
if sum((Load_Q.qsize(), PreProcess_Q.qsize(), Predict_Q.qsize(), PostProcess_Q.qsize())) == 0:
|
||||
raise KeyboardInterrupt
|
||||
|
||||
except KeyboardInterrupt as e:
|
||||
print(e)
|
||||
[p.terminate() for p in Load_Processes]
|
||||
[p.terminate() for p in PreProcesses]
|
||||
[p.terminate() for p in Predict_Process]
|
||||
[p.terminate() for p in PostProcesses]
|
||||
|
|
@ -0,0 +1,165 @@
|
|||
from os import makedirs
|
||||
from torchvision.models import Inception_V3_Weights, inception_v3
|
||||
from torch.nn import Linear
|
||||
from torchvision.transforms import Compose, RandomResizedCrop, RandomRotation, ToTensor, \
|
||||
RandomHorizontalFlip, \
|
||||
Resize, CenterCrop, RandomAffine, RandomAdjustSharpness, RandomAutocontrast, RandomEqualize, GaussianBlur
|
||||
import matplotlib.pyplot as plt
|
||||
from numpy import arange
|
||||
from torchsummary import summary
|
||||
from torch.nn import CrossEntropyLoss
|
||||
from torch.optim import SGD, Adam, lr_scheduler
|
||||
from torchvision.datasets import ImageFolder
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm import tqdm
|
||||
from settings import datadir
|
||||
from os.path import join
|
||||
from torch.cuda import is_available
|
||||
from torch import no_grad, save
|
||||
from datetime import datetime
|
||||
|
||||
device = 'cuda' if is_available() else 'cpu'
|
||||
transform = {
|
||||
'train': Compose([
|
||||
# CenterCrop(200),
|
||||
RandomHorizontalFlip(p=0.1),
|
||||
# Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
||||
RandomAdjustSharpness(sharpness_factor=2, p=0.2),
|
||||
GaussianBlur(kernel_size=3),
|
||||
RandomAutocontrast(),
|
||||
RandomEqualize(p=0.5),
|
||||
ToTensor(),
|
||||
RandomRotation(degrees=15),
|
||||
RandomResizedCrop(size=299, scale=(0.7, 1.0), ratio=(1.0, 1.0), antialias=True)
|
||||
]),
|
||||
'val': Compose([
|
||||
# CenterCrop(200),
|
||||
ToTensor(),
|
||||
RandomAffine(scale=(0.8, 0.8), degrees=(0, 0)),
|
||||
Resize(299, antialias=True)
|
||||
])
|
||||
}
|
||||
image_folder = {
|
||||
'train': ImageFolder(root=join(datadir(), 'dataset', 'train'), transform=transform['train']),
|
||||
'val': ImageFolder(root=join(datadir(), 'dataset', 'val'), transform=transform['val'])
|
||||
}
|
||||
|
||||
dataloader = {
|
||||
'train': DataLoader(image_folder['train'], batch_size=16, shuffle=True, num_workers=3),
|
||||
'val': DataLoader(image_folder['val'], batch_size=16, shuffle=False, num_workers=3)
|
||||
}
|
||||
|
||||
model = inception_v3(weights=Inception_V3_Weights.IMAGENET1K_V1)
|
||||
# model = resnet50(weights=None)
|
||||
|
||||
print()
|
||||
|
||||
tune = False
|
||||
for name, layer in model.named_parameters():
|
||||
print(name)
|
||||
if 'Mixed_7' in name:
|
||||
tune = True
|
||||
layer.requires_grad = tune
|
||||
|
||||
print(model)
|
||||
|
||||
model.fc = Linear(in_features=2048, out_features=image_folder['train'].classes.__len__(), bias=True)
|
||||
summary(model=model, input_size=(3, 299, 299), device='cpu')
|
||||
|
||||
model_gpu = model.to(device=device)
|
||||
criterion = CrossEntropyLoss()
|
||||
# optimizer = Adam(model_gpu.parameters(), lr=1e-4)
|
||||
optimizer = Adam(params=[
|
||||
{'params': model_gpu.Mixed_7a.parameters(), 'lr': 1e-5},
|
||||
{'params': model_gpu.Mixed_7b.parameters(), 'lr': 1e-5},
|
||||
{'params': model_gpu.Mixed_7c.parameters(), 'lr': 1e-5},
|
||||
{'params': model_gpu.avgpool.parameters(), 'lr': 1e-4},
|
||||
{'params': model_gpu.fc.parameters(), 'lr': 1e-4},
|
||||
|
||||
])
|
||||
scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=5, gamma=0.5)
|
||||
epochs = 50
|
||||
|
||||
train_loss_list = list()
|
||||
train_acc_list = list()
|
||||
val_loss_list = list()
|
||||
val_acc_list = list()
|
||||
|
||||
save_dir = join(datadir(), 'artifact', datetime.now().__str__())
|
||||
print(save_dir)
|
||||
makedirs(save_dir, exist_ok=True)
|
||||
|
||||
for i in range(epochs):
|
||||
train_loss = .0
|
||||
train_acc = .0
|
||||
val_loss = .0
|
||||
val_acc = .0
|
||||
|
||||
model_gpu.train()
|
||||
for images, labels in tqdm(dataloader['train']):
|
||||
optimizer.zero_grad()
|
||||
images = images.to(device)
|
||||
labels = labels.to(device)
|
||||
|
||||
outputs, _ = model_gpu(images)
|
||||
|
||||
loss = criterion(outputs, labels)
|
||||
train_loss += loss.item()
|
||||
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
predicted = outputs.max(1)[1]
|
||||
train_acc += (predicted == labels).sum()
|
||||
|
||||
avg_train_loss = train_loss / dataloader['train'].dataset.__len__()
|
||||
avg_train_acc = train_acc / dataloader['train'].dataset.__len__()
|
||||
|
||||
model_gpu.eval()
|
||||
with no_grad():
|
||||
for images, labels in dataloader['val']:
|
||||
images = images.to(device)
|
||||
labels = labels.to(device)
|
||||
outputs = model_gpu(images)
|
||||
loss = criterion(outputs, labels)
|
||||
val_loss += loss.item()
|
||||
predicted = outputs.max(1)[1]
|
||||
val_acc += (predicted == labels).sum()
|
||||
avg_val_loss = val_loss / dataloader['val'].dataset.__len__()
|
||||
avg_val_acc = val_acc / dataloader['val'].dataset.__len__()
|
||||
|
||||
print(f'Epoch [{(i + 1):02}/{epochs}], loss: {avg_train_loss:.5f}, '
|
||||
f'acc: {avg_train_acc:.5f}, val_loss: {avg_val_loss:.5f}, val_acc: {avg_val_acc:.5f}, '
|
||||
f'lr: {scheduler.get_last_lr()[0]:.2e}')
|
||||
scheduler.step()
|
||||
|
||||
train_loss_list.append(float(avg_train_loss))
|
||||
train_acc_list.append(float(avg_train_acc))
|
||||
val_loss_list.append(float(avg_val_loss))
|
||||
val_acc_list.append(float(avg_val_acc))
|
||||
|
||||
plt.figure(figsize=(8, 6))
|
||||
plt.plot(val_acc_list, label='val', lw=2, c='b')
|
||||
plt.plot(train_acc_list, label='train', lw=2, c='k')
|
||||
plt.title('learning rate')
|
||||
plt.xticks(size=14)
|
||||
plt.yticks(size=14)
|
||||
plt.grid(lw=2)
|
||||
plt.legend(fontsize=14)
|
||||
plt.xticks(arange(0, epochs, 2))
|
||||
plt.savefig(join(save_dir, 'learning_rate.png'))
|
||||
plt.close()
|
||||
|
||||
plt.figure(figsize=(8, 6))
|
||||
plt.plot(val_loss_list, label='val', lw=2, c='b')
|
||||
plt.plot(train_loss_list, label='train', lw=2, c='k')
|
||||
plt.title('loss')
|
||||
plt.xticks(size=14)
|
||||
plt.yticks(size=14)
|
||||
plt.grid(lw=2)
|
||||
plt.legend(fontsize=14)
|
||||
plt.xticks(arange(0, epochs, 2))
|
||||
plt.savefig(join(save_dir, 'loss.png'))
|
||||
plt.close()
|
||||
|
||||
save(model_gpu.cpu(), join(save_dir, 'model.pth'))
|
||||
|
|
@ -0,0 +1 @@
|
|||
open "https://ameblo.jp/$(basename "$1" | cut -d '=' -f 2)/entry-$(basename "$1" | cut -d '=' -f 3 | cut -d '-' -f 1).html"
|
||||
|
|
@ -0,0 +1,164 @@
|
|||
from os import makedirs
|
||||
from torchvision.models import ResNet50_Weights, resnet50
|
||||
from torch.nn import Linear
|
||||
from torchvision.transforms import Compose, RandomResizedCrop, RandomRotation, ToTensor, \
|
||||
RandomHorizontalFlip, \
|
||||
Resize, CenterCrop, RandomAffine
|
||||
import matplotlib.pyplot as plt
|
||||
from numpy import arange
|
||||
from torchsummary import summary
|
||||
from torch.nn import CrossEntropyLoss
|
||||
from torch.optim import SGD, Adam, lr_scheduler
|
||||
from torchvision.datasets import ImageFolder
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm import tqdm
|
||||
from settings import datadir
|
||||
from os.path import join
|
||||
from torch.cuda import is_available
|
||||
from torch import no_grad, save
|
||||
from datetime import datetime
|
||||
|
||||
device = 'cuda' if is_available() else 'cpu'
|
||||
transform = {
|
||||
'train': Compose([
|
||||
CenterCrop(200),
|
||||
RandomHorizontalFlip(p=0.1),
|
||||
# Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
||||
ToTensor(),
|
||||
RandomRotation(degrees=15),
|
||||
RandomResizedCrop(size=224, scale=(0.7, 1.0), ratio=(1.0, 1.0), antialias=True)
|
||||
]),
|
||||
'val': Compose([
|
||||
CenterCrop(200),
|
||||
ToTensor(),
|
||||
RandomAffine(scale=(0.8, 0.8), degrees=(0, 0)),
|
||||
Resize(224, antialias=True)
|
||||
])
|
||||
}
|
||||
image_folder = {
|
||||
'train': ImageFolder(root=join(datadir(), 'dataset', 'train'), transform=transform['train']),
|
||||
'val': ImageFolder(root=join(datadir(), 'dataset', 'val'), transform=transform['val'])
|
||||
}
|
||||
|
||||
dataloader = {
|
||||
'train': DataLoader(image_folder['train'], batch_size=16, shuffle=True, num_workers=3),
|
||||
'val': DataLoader(image_folder['val'], batch_size=16, shuffle=False, num_workers=3)
|
||||
}
|
||||
|
||||
model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)
|
||||
# model = resnet50(weights=None)
|
||||
|
||||
print()
|
||||
|
||||
tune = False
|
||||
for name, layer in model.named_parameters():
|
||||
if 'layer3' in name:
|
||||
tune = True
|
||||
layer.requires_grad = tune
|
||||
|
||||
print(model)
|
||||
|
||||
model.fc = Linear(in_features=2048, out_features=image_folder['train'].classes.__len__(), bias=True)
|
||||
summary(model=model, input_size=(3, 224, 224), device='cpu')
|
||||
|
||||
model_gpu = model.to(device=device)
|
||||
criterion = CrossEntropyLoss()
|
||||
# optimizer = Adam(model_gpu.parameters(), lr=1e-4)
|
||||
optimizer = Adam(params=[
|
||||
# {'params': model_gpu.conv1.parameters(), 'lr': 1e-8},
|
||||
# {'params': model_gpu.bn1.parameters(), 'lr': 1e-8},
|
||||
# {'params': model_gpu.relu.parameters(), 'lr': 1e-8},
|
||||
# {'params': model_gpu.maxpool.parameters(), 'lr': 1e-8},
|
||||
# {'params': model_gpu.layer1.parameters(), 'lr': 1e-8},
|
||||
# {'params': model_gpu.layer2.parameters(), 'lr': 1e-8},
|
||||
{'params': model_gpu.layer3.parameters(), 'lr': 1e-5},
|
||||
{'params': model_gpu.layer4.parameters(), 'lr': 1e-4},
|
||||
{'params': model_gpu.fc.parameters(), 'lr': 1e-4},
|
||||
|
||||
])
|
||||
scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=5, gamma=0.5)
|
||||
epochs = 50
|
||||
|
||||
train_loss_list = list()
|
||||
train_acc_list = list()
|
||||
val_loss_list = list()
|
||||
val_acc_list = list()
|
||||
|
||||
save_dir = join(datadir(), 'artifact', datetime.now().__str__())
|
||||
print(save_dir)
|
||||
makedirs(save_dir, exist_ok=True)
|
||||
|
||||
for i in range(epochs):
|
||||
train_loss = .0
|
||||
train_acc = .0
|
||||
val_loss = .0
|
||||
val_acc = .0
|
||||
|
||||
model_gpu.train()
|
||||
for images, labels in tqdm(dataloader['train'], leave=False):
|
||||
optimizer.zero_grad()
|
||||
images = images.to(device)
|
||||
labels = labels.to(device)
|
||||
|
||||
outputs = model_gpu(images)
|
||||
|
||||
loss = criterion(outputs, labels)
|
||||
train_loss += loss.item()
|
||||
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
predicted = outputs.max(1)[1]
|
||||
train_acc += (predicted == labels).sum()
|
||||
|
||||
avg_train_loss = train_loss / dataloader['train'].dataset.__len__()
|
||||
avg_train_acc = train_acc / dataloader['train'].dataset.__len__()
|
||||
|
||||
model_gpu.eval()
|
||||
with no_grad():
|
||||
for images, labels in dataloader['val']:
|
||||
images = images.to(device)
|
||||
labels = labels.to(device)
|
||||
outputs = model_gpu(images)
|
||||
loss = criterion(outputs, labels)
|
||||
val_loss += loss.item()
|
||||
predicted = outputs.max(1)[1]
|
||||
val_acc += (predicted == labels).sum()
|
||||
avg_val_loss = val_loss / dataloader['val'].dataset.__len__()
|
||||
avg_val_acc = val_acc / dataloader['val'].dataset.__len__()
|
||||
|
||||
print(f'Epoch [{(i + 1):02}/{epochs}], loss: {avg_train_loss:.5f}, '
|
||||
f'acc: {avg_train_acc:.5f}, val_loss: {avg_val_loss:.5f}, val_acc: {avg_val_acc:.5f}, '
|
||||
f'lr: {scheduler.get_last_lr()[0]:.2e}')
|
||||
scheduler.step()
|
||||
|
||||
train_loss_list.append(float(avg_train_loss))
|
||||
train_acc_list.append(float(avg_train_acc))
|
||||
val_loss_list.append(float(avg_val_loss))
|
||||
val_acc_list.append(float(avg_val_acc))
|
||||
|
||||
plt.figure(figsize=(8, 6))
|
||||
plt.plot(val_acc_list, label='val', lw=2, c='b')
|
||||
plt.plot(train_acc_list, label='train', lw=2, c='k')
|
||||
plt.title('learning rate')
|
||||
plt.xticks(size=14)
|
||||
plt.yticks(size=14)
|
||||
plt.grid(lw=2)
|
||||
plt.legend(fontsize=14)
|
||||
plt.xticks(arange(0, epochs, 2))
|
||||
plt.savefig(join(save_dir, 'learning_rate.png'))
|
||||
plt.close()
|
||||
|
||||
plt.figure(figsize=(8, 6))
|
||||
plt.plot(val_loss_list, label='val', lw=2, c='b')
|
||||
plt.plot(train_loss_list, label='train', lw=2, c='k')
|
||||
plt.title('loss')
|
||||
plt.xticks(size=14)
|
||||
plt.yticks(size=14)
|
||||
plt.grid(lw=2)
|
||||
plt.legend(fontsize=14)
|
||||
plt.xticks(arange(0, epochs, 2))
|
||||
plt.savefig(join(save_dir, 'loss.png'))
|
||||
plt.close()
|
||||
|
||||
save(model_gpu.cpu(), join(save_dir, 'model.pth'))
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
from insightface.app import FaceAnalysis
|
||||
from settings import datadir
|
||||
from os import listdir
|
||||
from os.path import join, isfile
|
||||
from PIL import Image
|
||||
from numpy import array
|
||||
from json import dump
|
||||
|
||||
face_analysis = FaceAnalysis()
|
||||
face_analysis.prepare(ctx_id=0, det_size=(160, 160))
|
||||
|
||||
teacher_dir = join(datadir(), 'sample_set')
|
||||
teacher_files = []
|
||||
teacher_embeddings = []
|
||||
teacher_dict = {}
|
||||
for name in listdir(teacher_dir):
|
||||
for file in listdir(join(teacher_dir, name)):
|
||||
# print(file)
|
||||
im_path = join(teacher_dir, name, file)
|
||||
if isfile(im_path):
|
||||
print(im_path)
|
||||
image = Image.open(im_path)
|
||||
embedding = face_analysis.get(array(image)[:, :, [2, 1, 0]])
|
||||
if embedding.__len__() != 0:
|
||||
teacher_embeddings.append(embedding[0].embedding.tolist())
|
||||
teacher_files.append(name)
|
||||
teacher_dict[im_path] = embedding[0].embedding.tolist()
|
||||
|
||||
with open(file=join(datadir(), 'sample_emb.json'), mode='w') as f:
|
||||
dump(teacher_dict, f)
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
from functools import cache
|
||||
from os import getcwd
|
||||
from os.path import join
|
||||
|
||||
blog_list = ['angerme-ss-shin', 'angerme-amerika', 'angerme-new', 'juicejuice-official', 'tsubaki-factory',
|
||||
'morningmusume-9ki', 'morningmusume-10ki', 'mm-12ki', 'morningm-13ki', 'morningmusume15ki',
|
||||
'morningmusume16ki', 'beyooooonds-rfro', 'beyooooonds-chicatetsu', 'beyooooonds', 'ocha-norma',
|
||||
'countrygirls', 'risa-ogata', # "shimizu--saki",
|
||||
'kumai-yurina-blog', 'sudou-maasa-blog', 'sugaya-risako-blog', 'miyamotokarin-official',
|
||||
'kobushi-factory', 'sayumimichishige-blog', 'kudo--haruka', 'airisuzuki-officialblog',
|
||||
'angerme-ayakawada', 'miyazaki-yuka-blog', 'tsugunaga-momoko-blog', 'tokunaga-chinami-blog',
|
||||
'c-ute-official', 'tanakareina-blog']
|
||||
|
||||
|
||||
@cache
|
||||
def theme_curator(theme: str, blog_id: str) -> str:
|
||||
if theme == "":
|
||||
theme = 'None'
|
||||
elif 'risa-ogata' == blog_id:
|
||||
theme = '小片リサ'
|
||||
elif 'shimizu--saki' == blog_id:
|
||||
theme = "清水佐紀"
|
||||
elif 'kumai-yurina-blog' == blog_id:
|
||||
theme = "熊井友理奈"
|
||||
elif 'sudou-maasa-blog' == blog_id:
|
||||
theme = "須藤茉麻"
|
||||
elif 'sugaya-risako-blog' == blog_id:
|
||||
theme = "菅谷梨沙子"
|
||||
elif 'miyamotokarin-official' == blog_id:
|
||||
theme = "宮本佳林"
|
||||
elif 'sayumimichishige-blog' == blog_id:
|
||||
theme = "道重さゆみ"
|
||||
elif 'kudo--haruka' == blog_id:
|
||||
theme = "工藤遥"
|
||||
elif 'airisuzuki-officialblog' == blog_id:
|
||||
theme = "鈴木愛理"
|
||||
elif 'angerme-ayakawada' == blog_id:
|
||||
theme = "和田彩花"
|
||||
elif 'miyazaki-yuka-blog' == blog_id:
|
||||
theme = "宮崎由加"
|
||||
elif 'tsugunaga-momoko-blog' == blog_id:
|
||||
theme = "嗣永桃子"
|
||||
elif 'natsuyaki-miyabi-blog' == blog_id:
|
||||
theme = "夏焼雅"
|
||||
elif 'tokunaga-chinami-blog' == blog_id:
|
||||
theme = "徳永千奈美"
|
||||
elif '梁川 奈々美' == theme:
|
||||
theme = '梁川奈々美'
|
||||
elif "tanakareina-blog" == blog_id:
|
||||
theme = "田中れいな"
|
||||
return theme
|
||||
|
||||
|
||||
@cache
|
||||
def datadir():
|
||||
return join(getcwd(), 'data')
|
||||
|
||||
|
||||
request_header = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0'
|
||||
}
|
||||
|
||||
|
||||
class FaceCropProcesses:
|
||||
load = 1
|
||||
pre_process = 10
|
||||
predict = 3
|
||||
post_process = 4
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
from shutil import copyfile
|
||||
|
||||
from insightface.app import FaceAnalysis
|
||||
from os import getcwd, listdir, makedirs
|
||||
from os.path import join, isdir, isfile
|
||||
from numpy import dot, array
|
||||
from numpy.linalg import norm
|
||||
from PIL import Image
|
||||
from sys import argv
|
||||
|
||||
if argv.__len__() != 3:
|
||||
exit(1)
|
||||
if not isdir(argv[2]):
|
||||
exit(1)
|
||||
|
||||
if not isfile(argv[1]):
|
||||
exit(1)
|
||||
|
||||
face_analysis = FaceAnalysis()
|
||||
face_analysis.prepare(ctx_id=0, det_size=(160, 160))
|
||||
|
||||
print(argv)
|
||||
collect_image = array(Image.open(join(getcwd(), argv[1])))[:, :, [2, 1, 0]]
|
||||
image_files: list[str] = listdir(join(getcwd(), argv[2]))
|
||||
|
||||
collect_image_emb = face_analysis.get(collect_image)
|
||||
if collect_image_emb.__len__() == 0:
|
||||
print("Not found face: ", argv[1])
|
||||
exit(1)
|
||||
|
||||
# collect_image_emb = collect_image_emb[0].embedding
|
||||
|
||||
makedirs(join(getcwd(), argv[2], "true"), exist_ok=True)
|
||||
makedirs(join(getcwd(), argv[2], "false"), exist_ok=True)
|
||||
|
||||
images = []
|
||||
for file in image_files:
|
||||
if isfile(join(getcwd(), argv[2], file)):
|
||||
# print(join(getcwd(), argv[2], file))
|
||||
image = array(Image.open(join(getcwd(), argv[2], file)))[:, :, [2, 1, 0]]
|
||||
emb = face_analysis.get(image)
|
||||
if not emb:
|
||||
continue
|
||||
cosine = dot(emb[0].embedding, collect_image_emb[0].embedding) / \
|
||||
(norm(emb[0].embedding) * norm(collect_image_emb[0].embedding))
|
||||
print(file, cosine)
|
||||
if cosine > 0.3:
|
||||
copyfile(join(getcwd(), argv[2], file), join(getcwd(), argv[2], "true", file))
|
||||
else:
|
||||
|
||||
copyfile(join(getcwd(), argv[2], file), join(getcwd(), argv[2], "false", file))
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
from os import makedirs, listdir
|
||||
from os.path import join
|
||||
from settings import datadir
|
||||
from shutil import rmtree, copyfile
|
||||
from random import random
|
||||
|
||||
valid_rate = 0.1
|
||||
|
||||
makedirs(join(datadir(), 'dataset'), exist_ok=True)
|
||||
rmtree(join(datadir(), 'dataset', 'train'), ignore_errors=True)
|
||||
rmtree(join(datadir(), 'dataset', 'val'), ignore_errors=True)
|
||||
makedirs(join(datadir(), 'dataset', 'train'), exist_ok=True)
|
||||
makedirs(join(datadir(), 'dataset', 'val'), exist_ok=True)
|
||||
|
||||
for name in listdir(join(datadir(), 'sample_set')):
|
||||
print(name)
|
||||
makedirs(join(datadir(), 'dataset', 'train', name))
|
||||
makedirs(join(datadir(), 'dataset', 'val', name))
|
||||
for file in listdir(join(datadir(), 'sample_set', name)):
|
||||
if random() > valid_rate:
|
||||
copyfile(src=join(datadir(), 'sample_set', name, file),
|
||||
dst=join(datadir(), 'dataset', 'train', name, file))
|
||||
else:
|
||||
copyfile(src=join(datadir(), 'sample_set', name, file),
|
||||
dst=join(datadir(), 'dataset', 'val', name, file))
|
||||
|
||||
# print(name, file)
|
||||
|
|
@ -0,0 +1,23 @@
|
|||
from torchvision.transforms import Compose
|
||||
import matplotlib.pyplot as plt
|
||||
from numpy import transpose
|
||||
import torchvision.datasets as datasets
|
||||
from torchvision import models
|
||||
from PIL import Image
|
||||
|
||||
print(dir(datasets))
|
||||
|
||||
|
||||
def show_image(x):
|
||||
fig = plt.figure(figsize=(10, 10))
|
||||
for s in range(len(x)):
|
||||
img = x[s].numpy()
|
||||
img = transpose(img, (1, 2, 0))
|
||||
ax1 = fig.add_subplot(1, len(x), s + 1)
|
||||
plt.axis('off')
|
||||
plt.imshow(img)
|
||||
|
||||
|
||||
model = models.inception_v3(pretrained=True)
|
||||
|
||||
print(model)
|
||||
Loading…
Reference in New Issue