import torch.cuda
from numpy import ndarray
import numpy as np
from DBFace_without_OpenCV import DBFace
import settings
from os import makedirs, listdir, stat, utime
from os.path import join, exists
from tqdm import tqdm
from PIL import Image, ImageDraw
from numpy import array, arctan2, pi, zeros, uint8, float32
from aiofiles import open as a_open
from asyncio import gather, run
from multiprocessing import Queue, Process, get_start_method, set_start_method
from time import time, sleep
from io import BytesIO
from math import ceil, sqrt
from torch import from_numpy, cuda, Tensor, inference_mode, nn
import atexit

face_dir = join(settings.datadir(), 'face_cropped')
blog_images = join(settings.datadir(), 'blog_images')

if not exists(face_dir):
    makedirs(face_dir)


def truncate(landmark: list[tuple[float]]) -> tuple[tuple[int, int], float]:
    left_eye, right_eye, nose, left_mouth, right_mouth = landmark
    center_x = sum((left_eye[0], right_eye[0], left_mouth[0], right_mouth[0])) / 4
    center_y = sum((left_eye[1], right_eye[1], left_mouth[1], right_mouth[1])) / 4
    eye_center = (right_eye[0] + left_eye[0]) / 2, (right_eye[1] + left_eye[1]) / 2
    mouth_center = (right_mouth[0] + left_mouth[0]) / 2, (right_mouth[1] + left_mouth[1]) / 2
    return (int(center_x), int(center_y)), arctan2(eye_center[0] - mouth_center[0], mouth_center[1] - eye_center[1])


def load_image(basedir: str, queue: Queue) -> None:
    def list_up():
        for name in listdir(basedir):
            for image_file in listdir(join(basedir, name)):
                yield name, image_file

    async def single_read(path: tuple[str, str]):
        async with a_open(join(basedir, *path), mode='rb') as f:
            return await f.read(), path

    async def parallel_read(paths: list[tuple[str, str]]):
        return await gather(*[single_read(path) for path in paths])

    file_list = [i for i in list_up()]
    bar = tqdm(total=file_list.__len__())
    for i in range(0, file_list.__len__(), 20):

        while queue.qsize() > 300:
            sleep(1e-3)

        chunk = file_list[i:i + 20]

        img_bins = run(parallel_read(chunk))
        for img_bin, p in img_bins:
            queue.put((Image.open(BytesIO(img_bin)), p))
            bar.update(1)
    return


def pre_process(q1: Queue, q2: Queue):
    mean = [0.408, 0.447, 0.47]
    std = [0.289, 0.274, 0.278]
    while True:
        while q2.qsize() > 4:
            sleep(1e-4)
        image, path = q1.get()
        width, height = image.size
        if width * height > 400_0000:
            image = image.resize(size=(width // 2, height // 2))
        # print(path, image.size)
        image = image.crop((0, 0, ceil(width / 32) * 32, ceil(height / 32) * 32))  # padding
        img_arr = array(image)
        img_arr = ((img_arr / 255.0 - mean) / std).astype(float32).transpose(2, 0, 1)
        torch_image = from_numpy(img_arr)[None]
        q2.put((torch_image.cuda(), path))
        pass


def predict(q1: Queue, q2: Queue):
    model_path = '/home/tomokazu/PycharmProjects/helloproject-ai/DBFace_without_OpenCV/model/dbface.pth'
    db_face = DBFace()
    db_face.eval()
    db_face.cuda()
    db_face.load(model_path)
    i = 0
    start = time()
    while True:
        # if i % 5000 == 0:
        #     cuda.empty_cache()
        # torch_image:Tensor=torch_image
        # print(i, path, torch_image.size())
        i += 1
        try:
            torch_image, path = q1.get()
            with inference_mode():
                q2.put((db_face(torch_image), path))
        except Exception as e:
            print(e)
            del db_face
            cuda.empty_cache()
            db_face = DBFace()
            db_face.eval()
            db_face.cuda()
            db_face.load(model_path)


def exp(v):
    if isinstance(v, tuple) or isinstance(v, list):
        return [exp(item) for item in v]
    elif isinstance(v, ndarray):
        return np.array([exp(item) for item in v], v.dtype)

    gate = 1
    base = np.exp(1)
    if abs(v) < gate:
        return v * base

    if v > 0:
        return np.exp(v)
    else:
        return -np.exp(-v)


def nms(objs, iou=0.5):
    if objs is None or len(objs) <= 1:
        return objs

    objs = sorted(objs, key=lambda obj: obj.score, reverse=True)
    keep = []
    flags = [0] * len(objs)
    for index, obj in enumerate(objs):

        if flags[index] != 0:
            continue

        keep.append(obj)
        for j in range(index + 1, len(objs)):
            if flags[j] == 0 and obj.iou(objs[j]) > iou:
                flags[j] = 1
    return keep


class BBox:

    def __init__(self, label, xyrb, score=0, landmark=None, rotate=False):
        self.label = label
        self.score = score
        self.landmark = landmark
        self.x, self.y, self.r, self.b = xyrb
        self.rotate = rotate

        minx = min(self.x, self.r)
        maxx = max(self.x, self.r)
        miny = min(self.y, self.b)
        maxy = max(self.y, self.b)
        self.x, self.y, self.r, self.b = minx, miny, maxx, maxy

    def __repr__(self):
        landmark_formated = ",".join(
            [str(item[:2]) for item in self.landmark]) if self.landmark is not None else "empty"
        return f"(BBox[{self.label}]: x={self.x:.2f}, y={self.y:.2f}, r={self.r:.2f}, " + \
            f"b={self.b:.2f}, width={self.width:.2f}, height={self.height:.2f}, landmark={landmark_formated})"

    @property
    def width(self):
        return self.r - self.x + 1

    @property
    def height(self):
        return self.b - self.y + 1

    @property
    def area(self):
        return self.width * self.height

    @property
    def haslandmark(self):
        return self.landmark is not None

    @property
    def xxxxxyyyyy_cat_landmark(self):
        x, y = zip(*self.landmark)
        return x + y

    @property
    def box(self):
        return [self.x, self.y, self.r, self.b]

    @box.setter
    def box(self, newvalue):
        self.x, self.y, self.r, self.b = newvalue

    @property
    def xywh(self):
        return [self.x, self.y, self.width, self.height]

    @property
    def center(self):
        return [(self.x + self.r) * 0.5, (self.y + self.b) * 0.5]

    # return cx, cy, cx.diff, cy.diff
    def safe_scale_center_and_diff(self, scale, limit_x, limit_y):
        cx = clip_value((self.x + self.r) * 0.5 * scale, limit_x - 1)
        cy = clip_value((self.y + self.b) * 0.5 * scale, limit_y - 1)
        return [int(cx), int(cy), cx - int(cx), cy - int(cy)]

    def safe_scale_center(self, scale, limit_x, limit_y):
        cx = int(clip_value((self.x + self.r) * 0.5 * scale, limit_x - 1))
        cy = int(clip_value((self.y + self.b) * 0.5 * scale, limit_y - 1))
        return [cx, cy]

    def clip(self, width, height):
        self.x = clip_value(self.x, width - 1)
        self.y = clip_value(self.y, height - 1)
        self.r = clip_value(self.r, width - 1)
        self.b = clip_value(self.b, height - 1)
        return self

    def iou(self, other):
        return computeIOU(self.box, other.box)


def computeIOU(rec1, rec2):
    cx1, cy1, cx2, cy2 = rec1
    gx1, gy1, gx2, gy2 = rec2
    S_rec1 = (cx2 - cx1 + 1) * (cy2 - cy1 + 1)
    S_rec2 = (gx2 - gx1 + 1) * (gy2 - gy1 + 1)
    x1 = max(cx1, gx1)
    y1 = max(cy1, gy1)
    x2 = min(cx2, gx2)
    y2 = min(cy2, gy2)

    w = max(0, x2 - x1 + 1)
    h = max(0, y2 - y1 + 1)
    area = w * h
    iou = area / (S_rec1 + S_rec2 - area)
    return iou


def clip_value(value, high, low=0):
    return max(min(value, high), low)


def post_process(queue: Queue, threshold: float = 0.4, nms_iou: float = 0.5):
    while True:
        tensor, path = queue.get()
        hm, box, landmark = tensor
        del tensor
        name, file = path
        hm_pool = nn.functional.max_pool2d(hm, 3, 1, 1)
        t = ((hm == hm_pool).float() * hm).view(1, -1).cpu()
        if t.size()[1] < 1000:
            continue
        scores, indices = t.topk(1000)
        hm_height, hm_width = hm.shape[2:]
        del hm
        scores = scores.squeeze()
        indices = indices.squeeze()
        ys = list((indices / hm_width).int().data.numpy())
        xs = list((indices % hm_width).int().data.numpy())
        scores = list(scores.data.numpy())
        box = box.cpu().squeeze().data.numpy()
        landmark = landmark.cpu().squeeze().data.numpy()

        stride = 4
        objs = []
        for cx, cy, score in zip(xs, ys, scores):
            if score < threshold:
                break

            x, y, r, b = box[:, cy, cx]
            xyrb = (array([cx, cy, cx, cy]) + [-x, -y, r, b]) * stride
            x5y5 = landmark[:, cy, cx]
            x5y5 = (exp(x5y5 * 4) + ([cx] * 5 + [cy] * 5)) * stride
            box_landmark = list(zip(x5y5[:5], x5y5[5:]))
            objs.append(BBox(0, xyrb=xyrb, score=score, landmark=box_landmark))
        predicted = nms(objs, iou=nms_iou)
        image = Image.open(join(blog_images, *path))

        width, height = image.size
        if width * height > 400_0000:
            image = image.resize(size=(width // 2, height // 2))

        for order, face in enumerate(predicted):
            trans = truncate(face.landmark)
            rotated = image.rotate(angle=trans[1] * 360 / (2 * pi), center=trans[0])
            image_size = max(face.width, face.height) * sqrt(2) // 2
            if image_size < 100:
                continue
            cropped = rotated.crop((trans[0][0] - image_size, trans[0][1] - image_size, trans[0][0] + image_size,
                                    trans[0][1] + image_size))
            if not exists(join(face_dir, name)):
                makedirs(join(face_dir, name), exist_ok=True)
            saved_path = join(face_dir, name, file.replace('.jpg', '-' + str(order + 1) + '.jpg'))
            cropped.save(saved_path)
            utime(path=saved_path, times=(stat(join(blog_images, *path)).st_atime,
                                          stat(join(blog_images, *path)).st_mtime))


if __name__ == '__main__':
    if get_start_method() == 'fork':
        set_start_method('spawn', force=True)

    try:
        Load_Q, PreProcess_Q, Predict_Q, PostProcess_Q = (Queue() for i in range(4))
        Load_Processes = [Process(target=load_image, args=(blog_images, Load_Q))
                          for _ in range(settings.FaceCropProcesses.load)]
        PreProcesses = [Process(target=pre_process, args=(Load_Q, PreProcess_Q))
                        for _ in range(settings.FaceCropProcesses.pre_process)]
        Predict_Process = [Process(target=predict, args=(PreProcess_Q, Predict_Q))
                           for _ in range(settings.FaceCropProcesses.predict)]
        PostProcesses = [Process(target=post_process, args=(Predict_Q,))
                         for _ in range(settings.FaceCropProcesses.post_process)]
        [p.start() for p in Load_Processes]
        [p.start() for p in PreProcesses]
        [p.start() for p in Predict_Process]
        [p.start() for p in PostProcesses]
        while True:
            sleep(5)
            # print(Load_Q.qsize(), PreProcess_Q.qsize(), Predict_Q.qsize(), PostProcess_Q.qsize())
            if sum((Load_Q.qsize(), PreProcess_Q.qsize(), Predict_Q.qsize(), PostProcess_Q.qsize())) == 0:
                raise KeyboardInterrupt

    except KeyboardInterrupt as e:
        print(e)
        [p.terminate() for p in Load_Processes]
        [p.terminate() for p in PreProcesses]
        [p.terminate() for p in Predict_Process]
        [p.terminate() for p in PostProcesses]