From 6eb2ba7a55feb727617ea22e6a9a5dc3b2625390 Mon Sep 17 00:00:00 2001 From: yayoimizuha Date: Sun, 20 Aug 2023 00:11:58 +0900 Subject: [PATCH] update --- .drone.yml | 2 +- ameblo_download.py | 13 +- convnext_finetune.py | 231 ++++++++++++++++++++++++++++++++++ dataloader_infer.py | 69 ++++++++++ facenet_gen_model.py | 15 +++ facenet_transfer_learning.py | 236 +++++++++++++++++++++++++++++++++++ resnet_finetune_update.py | 218 ++++++++++++++++++++++++++++++++ settings.py | 2 +- similar_face.py | 13 +- split_train_val.py | 4 +- swin_finetune.py | 221 ++++++++++++++++++++++++++++++++ transform_simulator.py | 74 +++++++++++ vit_b_finetune.py | 221 ++++++++++++++++++++++++++++++++ 13 files changed, 1303 insertions(+), 16 deletions(-) create mode 100644 convnext_finetune.py create mode 100644 dataloader_infer.py create mode 100644 facenet_gen_model.py create mode 100644 facenet_transfer_learning.py create mode 100644 resnet_finetune_update.py create mode 100644 swin_finetune.py create mode 100644 transform_simulator.py create mode 100644 vit_b_finetune.py diff --git a/.drone.yml b/.drone.yml index 43d2c20..5f04db6 100644 --- a/.drone.yml +++ b/.drone.yml @@ -22,4 +22,4 @@ steps: - mkdir -p data - $mount_command - ls data/ - - CI=False python resnet_finetune_vggface.py \ No newline at end of file + - CI=False python facenet_transfer_learning.py \ No newline at end of file diff --git a/ameblo_download.py b/ameblo_download.py index 6f8a9f7..9e1d7b6 100755 --- a/ameblo_download.py +++ b/ameblo_download.py @@ -52,8 +52,8 @@ async def run_each(name: str) -> None: executor = ProcessPoolExecutor(max_workers=cpu_count()) lock = Lock() futures = await tqdm.gather( - *[parse_blog_post(url, sem, session, executor, lock) for url in url_list], - desc='scan blog') + *[parse_blog_post(url, sem, session, executor, lock) for url in url_list], + desc='scan blog') executor.shutdown() image_link_package = list(chain.from_iterable(futures)) @@ -105,7 +105,7 @@ def parse_image(html: str, url: str) -> list[tuple[str, str, datetime]]: theme = settings.theme_curator(json_obj['theme_name'], blog_account) date = datetime.fromisoformat(json_obj['last_edit_datetime']) blog_entry = json_obj['entry_id'] - entry_body = BeautifulSoup(json_obj['entry_text'].replace('
', '\n'), 'lxml') + entry_body = BeautifulSoup('
{}
'.format(json_obj['entry_text'].replace('
', '\n')), 'lxml') # print(entry_body) for emoji in entry_body.find_all('img', class_='emoji'): emoji.decompose() @@ -159,11 +159,12 @@ async def parse_blog_post(urls: str, sem: Semaphore, session: ClientSession, exe try: async with session.get(page_url) as resp: resp_html = await resp.text() + if resp.status != 200: + raise Exception # await sleep(1.0) break - except ClientConnectorError as e: + except: await sleep(5.0) - print(e, file=sys.stderr) o = executor.submit(parse_image, resp_html, page_url) async with lock: @@ -205,7 +206,7 @@ def grep_modified_time(html: str) -> str: if __name__ == '__main__': - with open(file=path.join(settings.datadir(),'api_urls.txt'),mode='w') as f: + with open(file=path.join(settings.datadir(), 'api_urls.txt'), mode='w') as f: f.write("") for blog in settings.blog_list: run(run_each(blog)) diff --git a/convnext_finetune.py b/convnext_finetune.py new file mode 100644 index 0000000..0e53e9e --- /dev/null +++ b/convnext_finetune.py @@ -0,0 +1,231 @@ +from os import makedirs, environ + +from torchinfo import summary +from torchvision.models import convnext_large, ConvNeXt_Large_Weights, convnext_base, ConvNeXt_Base_Weights +from torch.nn import Linear, Dropout3d, Sequential, Dropout +from torchvision.transforms import Compose, RandomResizedCrop, RandomRotation, ToTensor, \ + RandomHorizontalFlip, \ + Resize, CenterCrop, RandomAffine, GaussianBlur, RandomAutocontrast, InterpolationMode, AugMix, RandomErasing +import matplotlib + +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from numpy import arange, ndarray, ceil, full, uint8 +from torch.nn import CrossEntropyLoss +from torch.optim import SGD, Adam, lr_scheduler +from torchvision.datasets import ImageFolder +from torch.utils.data import DataLoader +from tqdm import tqdm +from PIL import Image, ImageDraw, ImageFont +from settings import datadir +from os.path import join +from torch.cuda import is_available +from torch import no_grad, save, Tensor +from datetime import datetime +from distutils.util import strtobool + +CI = bool(strtobool(environ['CI'])) +device = 'cuda' if is_available() else 'cpu' +transform = { + 'train': Compose([ + RandomHorizontalFlip(p=0.1), + GaussianBlur(kernel_size=3), + RandomAutocontrast(), + # Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ToTensor(), + RandomErasing(), + RandomRotation(degrees=15), + RandomResizedCrop(size=232, scale=(0.7, 1.0), ratio=(1.0, 1.0), antialias=True, + interpolation=InterpolationMode.BILINEAR), + # ConvNeXt_Large_Weights.IMAGENET1K_V1.transforms() + ]), + 'val': Compose([ + # ConvNeXt_Large_Weights.IMAGENET1K_V1.transforms(), + RandomAffine(scale=(0.8, 0.8), degrees=(0, 0)), + Resize(232, antialias=True, interpolation=InterpolationMode.BILINEAR), + ToTensor() + ]) +} +image_folder = { + 'train': ImageFolder(root=join(datadir(), 'dataset', 'train'), transform=transform['train']), + 'val': ImageFolder(root=join(datadir(), 'dataset', 'val'), transform=transform['val']) +} + +dataloader = { + 'train': DataLoader(image_folder['train'], batch_size=32, shuffle=True, num_workers=3), + 'val': DataLoader(image_folder['val'], batch_size=32, shuffle=False, num_workers=3) +} + + +def plot_dataset(dataloader: DataLoader | tuple, col_len: int = 8, + label_text: str | None = None) -> Image.Image: + if isinstance(dataloader, DataLoader): + images, labels = iter(dataloader).__next__() + else: + images, labels = dataloader + + images: Tensor = images + labels: Tensor = labels + images: ndarray = images.numpy() + + if label_text is None: + labels: list[str] = [str(i) for i in labels.tolist()] + else: + labels: list[str] = [label_text[i] for i in labels.tolist()] + + batch_size, _, width, height = images.shape + # print(batch_size, width, height) + # print(images.dtype) + rows = ceil(batch_size / col_len) + # print(amax(images), amin(images)) + space_y, space_x, font_size = 50, 30, 20 + shape_y, shape_x = images.shape[-2:] + base_img = full(shape=((height + space_y) * int(rows), width * col_len + space_x * (col_len - 1), 3), dtype=uint8, + fill_value=255) + for order, image in enumerate(images): + order_y, order_x = order // col_len, order % col_len + image = (image.transpose([1, 2, 0]) * 255).astype(uint8) + # print(order_y, order_x) + # print(order_y * (shape_y + 30) + 30, (order_y + 1) * (shape_y + 30), + # order_x * (shape_x + 20), (order_x + 1) * (shape_x + 20) - 20) + base_img[order_y * (shape_y + space_y) + space_y:(order_y + 1) * (shape_y + space_y), + order_x * (shape_x + space_x):(order_x + 1) * (shape_x + space_x) - space_x, :] = image + pil_image = Image.fromarray(base_img) + font = ImageFont.truetype(font=r'/usr/share/fonts/opentype/noto/NotoSansCJK-Medium.ttc', size=24) + draw = ImageDraw.Draw(pil_image) + pad = 5 + for order, label in enumerate(labels): + order_y, order_x = order // col_len, order % col_len + draw.text(((shape_x + space_x) * order_x + pad, (shape_y + space_y) * order_y + pad), label, 'black', font=font) + + return pil_image + # pyplot.imshow((images[0].transpose([1, 2, 0]) * 255).astype(uint8)) + + +model = convnext_base(weights=ConvNeXt_Base_Weights.IMAGENET1K_V1) +# model = resnet50(weights=None) + +print() + +tune = False +for name, layer in model.named_parameters(): + if 'features.6' in name: + tune = True + layer.requires_grad = tune + +# print(model) + +model.classifier[2] = Linear(in_features=1024, out_features=image_folder['train'].classes.__len__(), bias=True) +# model.classifier = Sequential(Dropout(p=.5), model.classifier) +model.classifier.insert(0, Dropout3d(p=.5)) +summary(model=model, input_size=(1, 3, 518, 518), device='cpu') + +model_gpu = model.to(device=device) +criterion = CrossEntropyLoss() +# optimizer = Adam(model_gpu.parameters(), lr=1e-4) +optimizer = Adam(params=[ + # {'params': model_gpu.conv1.parameters(), 'lr': 1e-8}, + # {'params': model_gpu.bn1.parameters(), 'lr': 1e-8}, + # {'params': model_gpu.relu.parameters(), 'lr': 1e-8}, + # {'params': model_gpu.maxpool.parameters(), 'lr': 1e-8}, + # {'params': model_gpu.layer1.parameters(), 'lr': 1e-8}, + # {'params': model_gpu.layer2.parameters(), 'lr': 1e-8}, + {'params': model_gpu.features[6].parameters(), 'lr': 1e-5}, + {'params': model_gpu.features[7].parameters(), 'lr': 1e-4}, + {'params': model_gpu.classifier.parameters(), 'lr': 1e-4}, + +]) + +# scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=5, gamma=0.5) +epochs = 100 + +train_loss_list = list() +train_acc_list = list() +val_loss_list = list() +val_acc_list = list() + +save_dir = join(datadir(), 'artifact', 'convnext-base_' + datetime.now().__str__()) +print(save_dir) +makedirs(save_dir, exist_ok=True) +makedirs(join(save_dir, 'pallets'), exist_ok=True) + +for epoch in range(epochs): + train_loss = .0 + train_acc = .0 + val_loss = .0 + val_acc = .0 + + model_gpu.train() + makedirs(join(save_dir, 'pallets', str(epoch)), exist_ok=True) + + for count, (images, labels) in enumerate(tqdm(dataloader['train'])): + if count == 1: + image_pallets = plot_dataset(dataloader=(images, labels), col_len=6, + label_text=image_folder['train'].classes) + image_pallets.save(join(save_dir, 'pallets', str(epoch), 'pallet.jpg')) + optimizer.zero_grad() + images = images.to(device) + labels = labels.to(device) + + outputs = model(images) + + loss = criterion(outputs, labels) + train_loss += loss.item() + + loss.backward() + optimizer.step() + + predicted = outputs.max(1)[1] + train_acc += (predicted == labels).sum() + + avg_train_loss = train_loss / dataloader['train'].dataset.__len__() + avg_train_acc = train_acc / dataloader['train'].dataset.__len__() + + model_gpu.eval() + with no_grad(): + for images, labels in dataloader['val']: + images = images.to(device) + labels = labels.to(device) + outputs = model_gpu(images) + loss = criterion(outputs, labels) + val_loss += loss.item() + predicted = outputs.max(1)[1] + val_acc += (predicted == labels).sum() + avg_val_loss = val_loss / dataloader['val'].dataset.__len__() + avg_val_acc = val_acc / dataloader['val'].dataset.__len__() + + print(f'Epoch [{(epoch + 1):02}/{epochs}], loss: {avg_train_loss:.5f}, ' + f'acc: {avg_train_acc:.5f}, val_loss: {avg_val_loss:.5f}, val_acc: {avg_val_acc:.5f}, ' + ) # f'lr: {scheduler.get_last_lr()[0]:.2e}') + # scheduler.step() + + train_loss_list.append(float(avg_train_loss)) + train_acc_list.append(float(avg_train_acc)) + val_loss_list.append(float(avg_val_loss)) + val_acc_list.append(float(avg_val_acc)) + + plt.figure(figsize=(8, 6)) + plt.plot(val_acc_list, label='val', lw=2, c='b') + plt.plot(train_acc_list, label='train', lw=2, c='k') + plt.title('learning rate') + plt.xticks(size=14) + plt.yticks(size=14) + plt.grid(lw=2) + plt.legend(fontsize=14) + plt.xticks(arange(0, epochs, 2)) + plt.savefig(join(save_dir, 'learning_rate.png')) + plt.close() + + plt.figure(figsize=(8, 6)) + plt.plot(val_loss_list, label='val', lw=2, c='b') + plt.plot(train_loss_list, label='train', lw=2, c='k') + plt.title('loss') + plt.xticks(size=14) + plt.yticks(size=14) + plt.grid(lw=2) + plt.legend(fontsize=14) + plt.xticks(arange(0, epochs, 2)) + plt.savefig(join(save_dir, 'loss.png')) + plt.close() + +save(model_gpu.cpu(), join(save_dir, 'model.pth')) diff --git a/dataloader_infer.py b/dataloader_infer.py new file mode 100644 index 0000000..c7786bd --- /dev/null +++ b/dataloader_infer.py @@ -0,0 +1,69 @@ +from os import makedirs +from os.path import join, exists, basename +from shutil import rmtree, copyfile +from more_itertools import chunked +from torch import load, no_grad, device +from torch.cuda import is_available +from torch.utils.data import DataLoader +from torchvision.datasets import ImageFolder +from torchvision.transforms import Compose, ToTensor, Resize, CenterCrop +from torchinfo import summary +from tqdm import tqdm +from settings import datadir +from concurrent.futures import ThreadPoolExecutor +from pandas import DataFrame +from seaborn import heatmap, color_palette, set_palette +from matplotlib import pyplot +from japanize_matplotlib import japanize + +device = device('cuda' if is_available() else 'cpu') +# device = 'cpu' +print(f'device: {device}') +model_path: str = join(datadir(), 'artifact', 'facenet-tl_2023-06-03 23:48:19.808311', 'model.pth') +print(f'model path: {model_path}') +input_shape: int = 256 +batch_size = 64 +source_dir = join(datadir(), 'dataset', 'val') +print(f'judge file: {source_dir}') +dest_dir = join(datadir(), 'test_infer') +image_class = ImageFolder(root=join(datadir(), 'dataset', 'train')).classes +with open(join(datadir(), 'class_text'), mode='w') as f: + f.write(str(image_class)) +rmtree(dest_dir) +makedirs(dest_dir) + +transform = Compose([Resize(size=256), ToTensor()]) +image_folder = ImageFolder(root=source_dir, transform=transform) +dataloader = DataLoader(image_folder, batch_size=batch_size, shuffle=False, num_workers=8) + +model = load(f=model_path) +model = model.to(device) +model.eval() +for layer in model.parameters(): + layer.requires_grad = False + +# summary(model=model, input_size=(batch_size, 3, input_shape, input_shape), device=device) + +heatmap_df = DataFrame(index=image_class, columns=image_folder.classes).fillna(0) +with ThreadPoolExecutor(max_workers=60) as executor, no_grad(): + for (images, labels), fileinfo in zip(tqdm(dataloader), chunked(image_folder.imgs, n=batch_size)): + # print(labels, fileinfo) + res = model(images.to(device)) + for name, (filename, person) in zip(res.to(device).max(1).indices.tolist(), fileinfo): + if not exists(join(dest_dir, image_class[name])): + makedirs(join(dest_dir, image_class[name]), exist_ok=True) + # print(name, filename, person) + # copyfile(src=filename, + # dst=join(dest_dir, image_folder.classes[name], basename(filename))) + if image_class[name] != image_folder.classes[person]: + heatmap_df[image_folder.classes[person]][image_class[name]] += 1 + executor.submit(copyfile, filename, join(dest_dir, image_class[name], basename(filename))) + +print(heatmap_df) +set_palette('Blues') +pyplot.figure(figsize=(40, 40)) +heat_img = heatmap(heatmap_df, cmap='Blues', linewidths=1) +japanize() +heatmap_df.max() +pyplot.savefig(join(dest_dir, 'confusion_matrix.png')) +print(f'acc: {1 - heatmap_df.to_numpy().flatten().sum() / image_folder.__len__()}') diff --git a/facenet_gen_model.py b/facenet_gen_model.py new file mode 100644 index 0000000..6678b22 --- /dev/null +++ b/facenet_gen_model.py @@ -0,0 +1,15 @@ +from os.path import join + +from facenet_pytorch import InceptionResnetV1 +from torchinfo import summary +from torch import save + +from settings import datadir + +model = InceptionResnetV1(pretrained='vggface2') +model.eval() +summary(model=model, input_size=(1, 3, 256, 256)) +print(model) +for name, layer in model.named_parameters(): + print(name) +save(model.cpu(), f=join(datadir(), 'artifact', 'vggface2_facenet.pth')) diff --git a/facenet_transfer_learning.py b/facenet_transfer_learning.py new file mode 100644 index 0000000..34cfd29 --- /dev/null +++ b/facenet_transfer_learning.py @@ -0,0 +1,236 @@ +from os import makedirs, environ + +from torchinfo import summary +from torchvision.models import Swin_V2_B_Weights, swin_v2_b +from torch.nn import Linear, Dropout3d, Sequential, Dropout, Conv2d, CrossEntropyLoss, Identity, MaxPool2d, ReLU, \ + Softmax +from torchvision.transforms import Compose, RandomResizedCrop, RandomRotation, ToTensor, \ + RandomHorizontalFlip, \ + Resize, CenterCrop, RandomAffine, GaussianBlur, RandomAutocontrast, InterpolationMode, AugMix, RandomErasing, \ + RandomEqualize, RandomPosterize, RandomPerspective, RandomGrayscale +import matplotlib + +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from numpy import arange, ndarray, ceil, full, uint8 +from torch.optim import SGD, Adam, lr_scheduler +from torchvision.datasets import ImageFolder +from torch.utils.data import DataLoader +from tqdm import tqdm +from PIL import Image, ImageDraw, ImageFont +from settings import datadir +from os.path import join +from torch.cuda import is_available +from torch import no_grad, save, Tensor, load, device +from datetime import datetime +from distutils.util import strtobool + +CI = bool(strtobool(environ['CI'])) +device = device('cuda' if is_available() else 'cpu') + +model_path: str = join(datadir(), 'artifact', 'vggface2_facenet.pth') +input_shape: int = 256 +batch_size = 32 + +transform = { + 'train': Compose([ + RandomGrayscale(p=.25), + RandomHorizontalFlip(p=0.2), + RandomAutocontrast(), + RandomEqualize(p=.25), + RandomPosterize(bits=4), + ToTensor(), + RandomRotation(degrees=30, fill=1), + RandomPerspective(fill=1, distortion_scale=.2), + RandomErasing(scale=(0.05, 0.1), value='random', p=.3), + RandomResizedCrop(size=224, scale=(0.7, 1.0), ratio=(1.0, 1.0), antialias=True) + ]), + 'val': Compose([ + # RandomAffine(scale=(0.8, 0.8), degrees=(0, 0), fill=1), + Resize(224, antialias=True, interpolation=InterpolationMode.BILINEAR), + ToTensor() + ]) +} +image_folder = { + 'train': ImageFolder(root=join(datadir(), 'dataset', 'train'), transform=transform['train']), + 'val': ImageFolder(root=join(datadir(), 'dataset', 'val'), transform=transform['val']) +} + +dataloader = { + 'train': DataLoader(image_folder['train'], batch_size=batch_size, shuffle=True, num_workers=8), + 'val': DataLoader(image_folder['val'], batch_size=batch_size, shuffle=True, num_workers=8) +} + + +def plot_dataset(dataloader: DataLoader | tuple, col_len: int = 8, + label_text: str | None = None) -> Image.Image: + if isinstance(dataloader, DataLoader): + images, labels = iter(dataloader).__next__() + else: + images, labels = dataloader + + images: Tensor = images + labels: Tensor = labels + images: ndarray = images.numpy() + + if label_text is None: + labels: list[str] = [str(i) for i in labels.tolist()] + else: + labels: list[str] = [label_text[i] for i in labels.tolist()] + + batch_size, _, width, height = images.shape + rows = ceil(batch_size / col_len) + space_y, space_x, font_size = 50, 30, 20 + shape_y, shape_x = images.shape[-2:] + base_img = full(shape=((height + space_y) * int(rows), width * col_len + space_x * (col_len - 1), 3), dtype=uint8, + fill_value=255) + for order, image in enumerate(images): + order_y, order_x = order // col_len, order % col_len + image = (image.transpose([1, 2, 0]) * 255).astype(uint8) + base_img[order_y * (shape_y + space_y) + space_y:(order_y + 1) * (shape_y + space_y), + order_x * (shape_x + space_x):(order_x + 1) * (shape_x + space_x) - space_x, :] = image + pil_image = Image.fromarray(base_img) + font = ImageFont.truetype(font=r'/usr/share/fonts/opentype/noto/NotoSansCJK-Medium.ttc', size=24) + draw = ImageDraw.Draw(pil_image) + pad = 5 + for order, label in enumerate(labels): + order_y, order_x = order // col_len, order % col_len + draw.text(((shape_x + space_x) * order_x + pad, (shape_y + space_y) * order_y + pad), label, 'black', font=font) + + return pil_image + + +model = load(model_path) + +tune = False +for name, layer in model.named_parameters(): + if 'block8' in name: + tune = True + layer.requires_grad = tune + +model.last_linear = Identity() +model.last_bn = Identity() +model.logits = Identity() +model.dropout = Identity() +model = Sequential(model, + Linear(in_features=1792, out_features=2 ** 12), ReLU(inplace=True), Dropout(), + Linear(in_features=2 ** 12, out_features=2 ** 11), ReLU(inplace=True), Dropout(), + Linear(in_features=2 ** 11, out_features=image_folder['train'].classes.__len__(), bias=True), + ) + +summary(model=model, input_size=(batch_size, 3, input_shape, input_shape), device='cpu') + +model_gpu = model.to(device=device) +criterion = CrossEntropyLoss() + +optimizer = Adam(params=[ + {'params': model_gpu[0].block8.parameters(), 'lr': 1e-5}, + {'params': model_gpu[1].parameters(), 'lr': 1e-3}, +]) + +scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=10, gamma=0.9) +epochs = 100 + +train_loss_list = list() +train_acc_list = list() +val_loss_list = list() +val_acc_list = list() + +save_dir = join(datadir(), 'artifact', 'facenet-tl_' + datetime.now().__str__()) +print(save_dir) +makedirs(save_dir, exist_ok=True) +makedirs(join(save_dir, 'pallets'), exist_ok=True) +makedirs(join(save_dir, 'checkpoints'), exist_ok=True) + +for epoch in range(epochs): + train_loss = .0 + train_acc = .0 + val_loss = .0 + val_acc = .0 + + model_gpu.train() + # makedirs(join(save_dir, 'pallets', str(epoch)), exist_ok=True) + + for count, (images, labels) in enumerate(tqdm(dataloader['train'])): + if count == 1: + image_pallets = plot_dataset(dataloader=(images, labels), col_len=6, + label_text=image_folder['train'].classes) + image_pallets.save(join(save_dir, 'pallets', str(epoch) + '_train.jpg')) + optimizer.zero_grad() + images = images.to(device) + labels = labels.to(device) + + outputs = model(images) + + loss = criterion(outputs, labels) + train_loss += loss.item() + + loss.backward() + optimizer.step() + + predicted = outputs.max(1)[1] + train_acc += (predicted == labels).sum() + + avg_train_loss = train_loss / dataloader['train'].dataset.__len__() + avg_train_acc = train_acc / dataloader['train'].dataset.__len__() + + model_gpu.eval() + with no_grad(): + for count, (images, labels) in enumerate(dataloader['val']): + if count == 1: + image_pallets = plot_dataset(dataloader=(images, labels), col_len=6, + label_text=image_folder['train'].classes) + image_pallets.save(join(save_dir, 'pallets', str(epoch) + '_val.jpg')) + images = images.to(device) + labels = labels.to(device) + outputs = model_gpu(images) + loss = criterion(outputs, labels) + val_loss += loss.item() + predicted = outputs.max(1)[1] + val_acc += (predicted == labels).sum() + avg_val_loss = val_loss / dataloader['val'].dataset.__len__() + avg_val_acc = val_acc / dataloader['val'].dataset.__len__() + + print(f'Epoch [{(epoch + 1):02}/{epochs}], loss: {avg_train_loss:.5f}, ' + f'acc: {avg_train_acc:.5f}, val_loss: {avg_val_loss:.5f}, val_acc: {avg_val_acc:.5f}, ' + f'lr: {scheduler.get_last_lr()[0]:.2e}') + scheduler.step() + + train_loss_list.append(float(avg_train_loss)) + train_acc_list.append(float(avg_train_acc)) + val_loss_list.append(float(avg_val_loss)) + val_acc_list.append(float(avg_val_acc)) + + plt.figure(figsize=(8, 6)) + plt.plot(val_acc_list, label='val', lw=2, c='b') + plt.plot(train_acc_list, label='train', lw=2, c='k') + plt.title('learning rate') + plt.xticks(size=14) + plt.yticks(size=14) + plt.xlim([-int(epochs * .1), int(epochs * 1.1)]) + plt.ylim([-0.1, 1.1]) + plt.grid(lw=2) + plt.legend(fontsize=14) + plt.xticks(arange(0, epochs + 1, 10)) + plt.yticks(arange(0, 1.1, .1)) + plt.savefig(join(save_dir, 'learning_rate.png')) + plt.close() + + plt.figure(figsize=(8, 6)) + plt.plot(val_loss_list, label='val', lw=2, c='b') + plt.plot(train_loss_list, label='train', lw=2, c='k') + plt.title('loss') + plt.xticks(size=14) + plt.yticks(size=14) + plt.xlim([-int(epochs * .1), int(epochs * 1.1)]) + plt.ylim(bottom=-0) + plt.grid(lw=2) + plt.legend(fontsize=14) + plt.xticks(arange(0, epochs + 1, 10)) + plt.savefig(join(save_dir, 'loss.png')) + plt.close() + if (epoch + 1) % 10 == 0: + save(model_gpu.cpu(), join(save_dir, 'checkpoints', f'{epoch + 1}.pth')) + model.to(device=device) + +save(model_gpu.cpu(), join(save_dir, 'model.pth')) diff --git a/resnet_finetune_update.py b/resnet_finetune_update.py new file mode 100644 index 0000000..7cfff88 --- /dev/null +++ b/resnet_finetune_update.py @@ -0,0 +1,218 @@ +from os import makedirs, environ + +from torchinfo import summary +from torchvision.models import ResNet50_Weights, resnet50 +from torch.nn import Linear, Dropout3d, Sequential, Dropout +from torchvision.transforms import Compose, RandomResizedCrop, RandomRotation, ToTensor, \ + RandomHorizontalFlip, \ + Resize, CenterCrop, RandomAffine, GaussianBlur, RandomAutocontrast, InterpolationMode, AugMix, RandomErasing, \ + RandomEqualize, RandomPosterize, RandomPerspective, RandomGrayscale +import matplotlib + +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from numpy import arange, ndarray, ceil, full, uint8 +from torch.nn import CrossEntropyLoss +from torch.optim import SGD, Adam, lr_scheduler +from torchvision.datasets import ImageFolder +from torch.utils.data import DataLoader +from tqdm import tqdm +from PIL import Image, ImageDraw, ImageFont +from settings import datadir +from os.path import join +from torch.cuda import is_available +from torch import no_grad, save, Tensor +from datetime import datetime +from distutils.util import strtobool + +CI = bool(strtobool(environ['CI'])) +device = 'cuda' if is_available() else 'cpu' +transform = { + 'train': Compose([ + RandomGrayscale(p=.25), + RandomHorizontalFlip(p=0.2), + RandomAutocontrast(), + RandomEqualize(p=.25), + RandomPosterize(bits=4), + ToTensor(), + RandomRotation(degrees=30, fill=1), + RandomPerspective(fill=1, distortion_scale=.2), + RandomErasing(scale=(0.05, 0.1), value='random', p=.3), + RandomResizedCrop(size=224, scale=(0.7, 1.0), ratio=(1.0, 1.0), antialias=True) + ]), + 'val': Compose([ + # RandomAffine(scale=(0.8, 0.8), degrees=(0, 0), fill=1), + Resize(224, antialias=True, interpolation=InterpolationMode.BILINEAR), + ToTensor() + ]) +} +image_folder = { + 'train': ImageFolder(root=join(datadir(), 'dataset', 'train'), transform=transform['train']), + 'val': ImageFolder(root=join(datadir(), 'dataset', 'val'), transform=transform['val']) +} + +dataloader = { + 'train': DataLoader(image_folder['train'], batch_size=32, shuffle=True, num_workers=3), + 'val': DataLoader(image_folder['val'], batch_size=32, shuffle=True, num_workers=3) +} + + +def plot_dataset(dataloader: DataLoader | tuple, col_len: int = 8, + label_text: str | None = None) -> Image.Image: + if isinstance(dataloader, DataLoader): + images, labels = iter(dataloader).__next__() + else: + images, labels = dataloader + + images: Tensor = images + labels: Tensor = labels + images: ndarray = images.numpy() + + if label_text is None: + labels: list[str] = [str(i) for i in labels.tolist()] + else: + labels: list[str] = [label_text[i] for i in labels.tolist()] + + batch_size, _, width, height = images.shape + rows = ceil(batch_size / col_len) + space_y, space_x, font_size = 50, 30, 20 + shape_y, shape_x = images.shape[-2:] + base_img = full(shape=((height + space_y) * int(rows), width * col_len + space_x * (col_len - 1), 3), dtype=uint8, + fill_value=255) + for order, image in enumerate(images): + order_y, order_x = order // col_len, order % col_len + image = (image.transpose([1, 2, 0]) * 255).astype(uint8) + base_img[order_y * (shape_y + space_y) + space_y:(order_y + 1) * (shape_y + space_y), + order_x * (shape_x + space_x):(order_x + 1) * (shape_x + space_x) - space_x, :] = image + pil_image = Image.fromarray(base_img) + font = ImageFont.truetype(font=r'/usr/share/fonts/opentype/noto/NotoSansCJK-Medium.ttc', size=24) + draw = ImageDraw.Draw(pil_image) + pad = 5 + for order, label in enumerate(labels): + order_y, order_x = order // col_len, order % col_len + draw.text(((shape_x + space_x) * order_x + pad, (shape_y + space_y) * order_y + pad), label, 'black', font=font) + + return pil_image + + +model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2) + +tune = False +for name, layer in model.named_parameters(): + if 'layer3' in name: + tune = True + layer.requires_grad = tune + +model.layer3.insert(0, Dropout3d(p=.4)) +for i in range(model.layer4.__len__()): + model.layer4.insert(i * 2, Dropout3d(p=.2)) +model.fc = Sequential(Dropout(p=.6), + Linear(in_features=2048, out_features=image_folder['train'].classes.__len__(), bias=True)) +summary(model=model, input_size=(1, 3, 224, 224), device='cpu') + +model_gpu = model.to(device=device) +criterion = CrossEntropyLoss() + +optimizer = Adam(params=[ + {'params': model_gpu.layer3.parameters(), 'lr': 1e-6}, + {'params': model_gpu.layer4.parameters(), 'lr': 1e-4}, + {'params': model_gpu.fc.parameters(), 'lr': 1e-4}, +]) + +scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=10, gamma=0.9) +epochs = 200 + +train_loss_list = list() +train_acc_list = list() +val_loss_list = list() +val_acc_list = list() + +save_dir = join(datadir(), 'artifact', 'resnet_' + datetime.now().__str__()) +print(save_dir) +makedirs(save_dir, exist_ok=True) +makedirs(join(save_dir, 'pallets'), exist_ok=True) + +for epoch in range(epochs): + train_loss = .0 + train_acc = .0 + val_loss = .0 + val_acc = .0 + + model_gpu.train() + # makedirs(join(save_dir, 'pallets', str(epoch)), exist_ok=True) + + for count, (images, labels) in enumerate(tqdm(dataloader['train'])): + if count == 1: + image_pallets = plot_dataset(dataloader=(images, labels), col_len=6, + label_text=image_folder['train'].classes) + image_pallets.save(join(save_dir, 'pallets', str(epoch) + '_train.jpg')) + optimizer.zero_grad() + images = images.to(device) + labels = labels.to(device) + + outputs = model(images) + + loss = criterion(outputs, labels) + train_loss += loss.item() + + loss.backward() + optimizer.step() + + predicted = outputs.max(1)[1] + train_acc += (predicted == labels).sum() + + avg_train_loss = train_loss / dataloader['train'].dataset.__len__() + avg_train_acc = train_acc / dataloader['train'].dataset.__len__() + + model_gpu.eval() + with no_grad(): + for count, (images, labels) in enumerate(dataloader['val']): + if count == 1: + image_pallets = plot_dataset(dataloader=(images, labels), col_len=6, + label_text=image_folder['train'].classes) + image_pallets.save(join(save_dir, 'pallets', str(epoch) + '_val.jpg')) + images = images.to(device) + labels = labels.to(device) + outputs = model_gpu(images) + loss = criterion(outputs, labels) + val_loss += loss.item() + predicted = outputs.max(1)[1] + val_acc += (predicted == labels).sum() + avg_val_loss = val_loss / dataloader['val'].dataset.__len__() + avg_val_acc = val_acc / dataloader['val'].dataset.__len__() + + print(f'Epoch [{(epoch + 1):02}/{epochs}], loss: {avg_train_loss:.5f}, ' + f'acc: {avg_train_acc:.5f}, val_loss: {avg_val_loss:.5f}, val_acc: {avg_val_acc:.5f}, ' + f'lr: {scheduler.get_last_lr()[0]:.2e}') + scheduler.step() + + train_loss_list.append(float(avg_train_loss)) + train_acc_list.append(float(avg_train_acc)) + val_loss_list.append(float(avg_val_loss)) + val_acc_list.append(float(avg_val_acc)) + + plt.figure(figsize=(8, 6)) + plt.plot(val_acc_list, label='val', lw=2, c='b') + plt.plot(train_acc_list, label='train', lw=2, c='k') + plt.title('learning rate') + plt.xticks(size=14) + plt.yticks(size=14) + plt.grid(lw=2) + plt.legend(fontsize=14) + plt.xticks(arange(0, epochs, 10)) + plt.savefig(join(save_dir, 'learning_rate.png')) + plt.close() + + plt.figure(figsize=(8, 6)) + plt.plot(val_loss_list, label='val', lw=2, c='b') + plt.plot(train_loss_list, label='train', lw=2, c='k') + plt.title('loss') + plt.xticks(size=14) + plt.yticks(size=14) + plt.grid(lw=2) + plt.legend(fontsize=14) + plt.xticks(arange(0, epochs, 10)) + plt.savefig(join(save_dir, 'loss.png')) + plt.close() + +save(model_gpu.cpu(), join(save_dir, 'model.pth')) diff --git a/settings.py b/settings.py index ed38587..e40fddd 100755 --- a/settings.py +++ b/settings.py @@ -64,5 +64,5 @@ request_header = { class FaceCropProcesses: load = 1 pre_process = 10 - predict = 2 + predict = 3 post_process = 4 diff --git a/similar_face.py b/similar_face.py index d07b32c..71cdeb1 100755 --- a/similar_face.py +++ b/similar_face.py @@ -1,8 +1,7 @@ from shutil import copyfile - from insightface.app import FaceAnalysis from os import getcwd, listdir, makedirs -from os.path import join, isdir, isfile +from os.path import join, isdir, isfile, basename, dirname from numpy import dot, array from numpy.linalg import norm from PIL import Image @@ -30,8 +29,10 @@ if collect_image_emb.__len__() == 0: # collect_image_emb = collect_image_emb[0].embedding -makedirs(join(getcwd(), argv[2], "true"), exist_ok=True) -makedirs(join(getcwd(), argv[2], "false"), exist_ok=True) +dir_name = basename(dirname(argv[2])) +print(dir_name) +makedirs(join(getcwd(), dir_name, "true"), exist_ok=True) +makedirs(join(getcwd(), dir_name, "false"), exist_ok=True) images = [] for file in image_files: @@ -45,7 +46,7 @@ for file in image_files: (norm(emb[0].embedding) * norm(collect_image_emb[0].embedding)) print(file, cosine) if cosine > 0.3: - copyfile(join(getcwd(), argv[2], file), join(getcwd(), argv[2], "true", file)) + copyfile(join(getcwd(), argv[2], file), join(getcwd(), dir_name, "true", file)) else: - copyfile(join(getcwd(), argv[2], file), join(getcwd(), argv[2], "false", file)) + copyfile(join(getcwd(), argv[2], file), join(getcwd(), dir_name, "false", file)) diff --git a/split_train_val.py b/split_train_val.py index b3fd121..b1a9795 100644 --- a/split_train_val.py +++ b/split_train_val.py @@ -9,8 +9,8 @@ from asyncio import to_thread, gather, run from aiofiles import open as a_open valid_rate = 0.1 -SRC_DIR = join(r'/mnt/share/dataset/vggface2/train') -DEST_DIR = join(datadir(), 'vggface2') +SRC_DIR = join(r'/home/tomokazu/PycharmProjects/helloproject-ai/data/sample_set/') +DEST_DIR = join(datadir(), 'dataset') makedirs(DEST_DIR, exist_ok=True) rmtree(join(DEST_DIR, 'train'), ignore_errors=True) diff --git a/swin_finetune.py b/swin_finetune.py new file mode 100644 index 0000000..81146f6 --- /dev/null +++ b/swin_finetune.py @@ -0,0 +1,221 @@ +from os import makedirs, environ + +from torchinfo import summary +from torchvision.models import Swin_V2_B_Weights, swin_v2_b +from torch.nn import Linear, Dropout3d, Sequential, Dropout +from torchvision.transforms import Compose, RandomResizedCrop, RandomRotation, ToTensor, \ + RandomHorizontalFlip, \ + Resize, CenterCrop, RandomAffine, GaussianBlur, RandomAutocontrast, InterpolationMode, AugMix, RandomErasing, \ + RandomEqualize, RandomPosterize, RandomPerspective, RandomGrayscale +import matplotlib + +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from numpy import arange, ndarray, ceil, full, uint8 +from torch.nn import CrossEntropyLoss +from torch.optim import SGD, Adam, lr_scheduler +from torchvision.datasets import ImageFolder +from torch.utils.data import DataLoader +from tqdm import tqdm +from PIL import Image, ImageDraw, ImageFont +from settings import datadir +from os.path import join +from torch.cuda import is_available +from torch import no_grad, save, Tensor +from datetime import datetime +from distutils.util import strtobool + +CI = bool(strtobool(environ['CI'])) +device = 'cuda' if is_available() else 'cpu' +transform = { + 'train': Compose([ + RandomGrayscale(p=.25), + RandomHorizontalFlip(p=0.2), + RandomAutocontrast(), + RandomEqualize(p=.25), + RandomPosterize(bits=4), + ToTensor(), + RandomRotation(degrees=30, fill=1), + RandomPerspective(fill=1, distortion_scale=.2), + RandomErasing(scale=(0.05, 0.1), value='random', p=.3), + RandomResizedCrop(size=224, scale=(0.7, 1.0), ratio=(1.0, 1.0), antialias=True) + ]), + 'val': Compose([ + # RandomAffine(scale=(0.8, 0.8), degrees=(0, 0), fill=1), + Resize(224, antialias=True, interpolation=InterpolationMode.BILINEAR), + ToTensor() + ]) +} +image_folder = { + 'train': ImageFolder(root=join(datadir(), 'dataset', 'train'), transform=transform['train']), + 'val': ImageFolder(root=join(datadir(), 'dataset', 'val'), transform=transform['val']) +} + +dataloader = { + 'train': DataLoader(image_folder['train'], batch_size=32, shuffle=True, num_workers=5), + 'val': DataLoader(image_folder['val'], batch_size=32, shuffle=True, num_workers=5) +} + + +def plot_dataset(dataloader: DataLoader | tuple, col_len: int = 8, + label_text: str | None = None) -> Image.Image: + if isinstance(dataloader, DataLoader): + images, labels = iter(dataloader).__next__() + else: + images, labels = dataloader + + images: Tensor = images + labels: Tensor = labels + images: ndarray = images.numpy() + + if label_text is None: + labels: list[str] = [str(i) for i in labels.tolist()] + else: + labels: list[str] = [label_text[i] for i in labels.tolist()] + + batch_size, _, width, height = images.shape + rows = ceil(batch_size / col_len) + space_y, space_x, font_size = 50, 30, 20 + shape_y, shape_x = images.shape[-2:] + base_img = full(shape=((height + space_y) * int(rows), width * col_len + space_x * (col_len - 1), 3), dtype=uint8, + fill_value=255) + for order, image in enumerate(images): + order_y, order_x = order // col_len, order % col_len + image = (image.transpose([1, 2, 0]) * 255).astype(uint8) + base_img[order_y * (shape_y + space_y) + space_y:(order_y + 1) * (shape_y + space_y), + order_x * (shape_x + space_x):(order_x + 1) * (shape_x + space_x) - space_x, :] = image + pil_image = Image.fromarray(base_img) + font = ImageFont.truetype(font=r'/usr/share/fonts/opentype/noto/NotoSansCJK-Medium.ttc', size=24) + draw = ImageDraw.Draw(pil_image) + pad = 5 + for order, label in enumerate(labels): + order_y, order_x = order // col_len, order % col_len + draw.text(((shape_x + space_x) * order_x + pad, (shape_y + space_y) * order_y + pad), label, 'black', font=font) + + return pil_image + + +model = swin_v2_b(weights=Swin_V2_B_Weights.IMAGENET1K_V1) + +tune = False +for name, layer in model.named_parameters(): + if 'features.6' in name: + tune = True + layer.requires_grad = tune + +for layers in model.features[7]: + layers.mlp[2] = Dropout(p=.2) + layers.mlp[4] = Dropout(p=.2) + +model.head = Sequential(Dropout(), + Linear(in_features=1024, out_features=image_folder['train'].classes.__len__(), bias=True)) + +summary(model=model, input_size=(32, 3, 224, 224), device='cpu') + +model_gpu = model.to(device=device) +criterion = CrossEntropyLoss() + +optimizer = Adam(params=[ + {'params': model_gpu.features[6].parameters(), 'lr': 1e-5}, + {'params': model_gpu.features[7].parameters(), 'lr': 1e-4}, + {'params': model_gpu.norm.parameters(), 'lr': 1e-3}, + {'params': model_gpu.head.parameters(), 'lr': 1e-3}, +]) + +scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=10, gamma=0.9) +epochs = 200 + +train_loss_list = list() +train_acc_list = list() +val_loss_list = list() +val_acc_list = list() + +save_dir = join(datadir(), 'artifact', 'swin-v2-b_' + datetime.now().__str__()) +print(save_dir) +makedirs(save_dir, exist_ok=True) +makedirs(join(save_dir, 'pallets'), exist_ok=True) + +for epoch in range(epochs): + train_loss = .0 + train_acc = .0 + val_loss = .0 + val_acc = .0 + + model_gpu.train() + # makedirs(join(save_dir, 'pallets', str(epoch)), exist_ok=True) + + for count, (images, labels) in enumerate(tqdm(dataloader['train'])): + if count == 1: + image_pallets = plot_dataset(dataloader=(images, labels), col_len=6, + label_text=image_folder['train'].classes) + image_pallets.save(join(save_dir, 'pallets', str(epoch) + '_train.jpg')) + optimizer.zero_grad() + images = images.to(device) + labels = labels.to(device) + + outputs = model(images) + + loss = criterion(outputs, labels) + train_loss += loss.item() + + loss.backward() + optimizer.step() + + predicted = outputs.max(1)[1] + train_acc += (predicted == labels).sum() + + avg_train_loss = train_loss / dataloader['train'].dataset.__len__() + avg_train_acc = train_acc / dataloader['train'].dataset.__len__() + + model_gpu.eval() + with no_grad(): + for count, (images, labels) in enumerate(dataloader['val']): + if count == 1: + image_pallets = plot_dataset(dataloader=(images, labels), col_len=6, + label_text=image_folder['train'].classes) + image_pallets.save(join(save_dir, 'pallets', str(epoch) + '_val.jpg')) + images = images.to(device) + labels = labels.to(device) + outputs = model_gpu(images) + loss = criterion(outputs, labels) + val_loss += loss.item() + predicted = outputs.max(1)[1] + val_acc += (predicted == labels).sum() + avg_val_loss = val_loss / dataloader['val'].dataset.__len__() + avg_val_acc = val_acc / dataloader['val'].dataset.__len__() + + print(f'Epoch [{(epoch + 1):02}/{epochs}], loss: {avg_train_loss:.5f}, ' + f'acc: {avg_train_acc:.5f}, val_loss: {avg_val_loss:.5f}, val_acc: {avg_val_acc:.5f}, ' + f'lr: {scheduler.get_last_lr()[0]:.2e}') + scheduler.step() + + train_loss_list.append(float(avg_train_loss)) + train_acc_list.append(float(avg_train_acc)) + val_loss_list.append(float(avg_val_loss)) + val_acc_list.append(float(avg_val_acc)) + + plt.figure(figsize=(8, 6)) + plt.plot(val_acc_list, label='val', lw=2, c='b') + plt.plot(train_acc_list, label='train', lw=2, c='k') + plt.title('learning rate') + plt.xticks(size=14) + plt.yticks(size=14) + plt.grid(lw=2) + plt.legend(fontsize=14) + plt.xticks(arange(0, epochs, 10)) + plt.savefig(join(save_dir, 'learning_rate.png')) + plt.close() + + plt.figure(figsize=(8, 6)) + plt.plot(val_loss_list, label='val', lw=2, c='b') + plt.plot(train_loss_list, label='train', lw=2, c='k') + plt.title('loss') + plt.xticks(size=14) + plt.yticks(size=14) + plt.grid(lw=2) + plt.legend(fontsize=14) + plt.xticks(arange(0, epochs, 10)) + plt.savefig(join(save_dir, 'loss.png')) + plt.close() + +save(model_gpu.cpu(), join(save_dir, 'model.pth')) diff --git a/transform_simulator.py b/transform_simulator.py new file mode 100644 index 0000000..33bd251 --- /dev/null +++ b/transform_simulator.py @@ -0,0 +1,74 @@ +from os.path import join +from matplotlib.pyplot import imshow, show, figure +from torchvision.datasets import ImageFolder +from torch.utils.data import DataLoader +from PIL import Image, ImageDraw, ImageFont +from torch import Tensor +from numpy import ndarray, ceil, full, uint8 +from torchvision.transforms import Compose, CenterCrop, RandomHorizontalFlip, GaussianBlur, RandomAutocontrast, \ + ToTensor, RandomRotation, RandomResizedCrop, RandomErasing, RandomEqualize, RandomPerspective, RandomPosterize, \ + RandomGrayscale + +from settings import datadir + + +def plot_dataset(dataloader: DataLoader | tuple, col_len: int = 8, + label_text: str | None = None) -> Image.Image: + if isinstance(dataloader, DataLoader): + images, labels = iter(dataloader).__next__() + else: + images, labels = dataloader + + images: Tensor = images + labels: Tensor = labels + images: ndarray = images.numpy() + + if label_text is None: + labels: list[str] = [str(i) for i in labels.tolist()] + else: + labels: list[str] = [label_text[i] for i in labels.tolist()] + + batch_size, _, width, height = images.shape + rows = ceil(batch_size / col_len) + space_y, space_x, font_size = 50, 30, 20 + shape_y, shape_x = images.shape[-2:] + base_img = full(shape=((height + space_y) * int(rows), width * col_len + space_x * (col_len - 1), 3), dtype=uint8, + fill_value=255) + for order, image in enumerate(images): + order_y, order_x = order // col_len, order % col_len + image = (image.transpose([1, 2, 0]) * 255).astype(uint8) + base_img[order_y * (shape_y + space_y) + space_y:(order_y + 1) * (shape_y + space_y), + order_x * (shape_x + space_x):(order_x + 1) * (shape_x + space_x) - space_x, :] = image + pil_image = Image.fromarray(base_img) + font = ImageFont.truetype(font=r'/usr/share/fonts/opentype/noto/NotoSansCJK-Medium.ttc', size=24) + draw = ImageDraw.Draw(pil_image) + pad = 5 + for order, label in enumerate(labels): + order_y, order_x = order // col_len, order % col_len + draw.text(((shape_x + space_x) * order_x + pad, (shape_y + space_y) * order_y + pad), label, 'black', font=font) + + return pil_image + + +transform = Compose([ + RandomGrayscale(p=.25), + RandomHorizontalFlip(p=0.2), + # GaussianBlur(kernel_size=3), + RandomAutocontrast(), + # Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + RandomEqualize(p=.25), + RandomPosterize(bits=4), + ToTensor(), + RandomRotation(degrees=30, fill=1), + RandomPerspective(fill=1, distortion_scale=.2), + RandomErasing(scale=(0.05, 0.1), value='random', p=.3), + RandomResizedCrop(size=224, scale=(0.7, 1.0), ratio=(1.0, 1.0), antialias=True) +]) +image_folder = ImageFolder(root=join(datadir(), 'dataset', 'train'), transform=transform) + +dataloader = DataLoader(image_folder, batch_size=36, shuffle=True, num_workers=3) + +figure(figsize=(10, 10), dpi=300) +imshow(plot_dataset(dataloader=dataloader, col_len=6, label_text=image_folder.classes)) +show() +print(image_folder.classes) \ No newline at end of file diff --git a/vit_b_finetune.py b/vit_b_finetune.py new file mode 100644 index 0000000..d131572 --- /dev/null +++ b/vit_b_finetune.py @@ -0,0 +1,221 @@ +from os import makedirs, environ + +from torchinfo import summary +from torchvision.models import ViT_B_32_Weights, vit_b_32 +from torch.nn import Linear, Dropout3d, Sequential, Dropout +from torchvision.transforms import Compose, RandomResizedCrop, RandomRotation, ToTensor, \ + RandomHorizontalFlip, \ + Resize, CenterCrop, RandomAffine, GaussianBlur, RandomAutocontrast, InterpolationMode, AugMix, RandomErasing, \ + RandomEqualize, RandomPosterize, RandomPerspective, RandomGrayscale +import matplotlib + +matplotlib.use('Agg') +import matplotlib.pyplot as plt +from numpy import arange, ndarray, ceil, full, uint8 +from torch.nn import CrossEntropyLoss +from torch.optim import SGD, Adam, lr_scheduler +from torchvision.datasets import ImageFolder +from torch.utils.data import DataLoader +from tqdm import tqdm +from PIL import Image, ImageDraw, ImageFont +from settings import datadir +from os.path import join +from torch.cuda import is_available +from torch import no_grad, save, Tensor +from datetime import datetime +from distutils.util import strtobool + +CI = bool(strtobool(environ['CI'])) +device = 'cuda' if is_available() else 'cpu' +transform = { + 'train': Compose([ + RandomGrayscale(p=.25), + RandomHorizontalFlip(p=0.2), + RandomAutocontrast(), + RandomEqualize(p=.25), + RandomPosterize(bits=4), + ToTensor(), + RandomRotation(degrees=30, fill=1), + RandomPerspective(fill=1, distortion_scale=.2), + RandomErasing(scale=(0.05, 0.1), value='random', p=.3), + RandomResizedCrop(size=224, scale=(0.7, 1.0), ratio=(1.0, 1.0), antialias=True) + ]), + 'val': Compose([ + # RandomAffine(scale=(0.8, 0.8), degrees=(0, 0), fill=1), + Resize(224, antialias=True, interpolation=InterpolationMode.BILINEAR), + ToTensor() + ]) +} +image_folder = { + 'train': ImageFolder(root=join(datadir(), 'dataset', 'train'), transform=transform['train']), + 'val': ImageFolder(root=join(datadir(), 'dataset', 'val'), transform=transform['val']) +} + +dataloader = { + 'train': DataLoader(image_folder['train'], batch_size=32, shuffle=True, num_workers=5), + 'val': DataLoader(image_folder['val'], batch_size=32, shuffle=True, num_workers=5) +} + + +def plot_dataset(dataloader: DataLoader | tuple, col_len: int = 8, + label_text: str | None = None) -> Image.Image: + if isinstance(dataloader, DataLoader): + images, labels = iter(dataloader).__next__() + else: + images, labels = dataloader + + images: Tensor = images + labels: Tensor = labels + images: ndarray = images.numpy() + + if label_text is None: + labels: list[str] = [str(i) for i in labels.tolist()] + else: + labels: list[str] = [label_text[i] for i in labels.tolist()] + + batch_size, _, width, height = images.shape + rows = ceil(batch_size / col_len) + space_y, space_x, font_size = 50, 30, 20 + shape_y, shape_x = images.shape[-2:] + base_img = full(shape=((height + space_y) * int(rows), width * col_len + space_x * (col_len - 1), 3), dtype=uint8, + fill_value=255) + for order, image in enumerate(images): + order_y, order_x = order // col_len, order % col_len + image = (image.transpose([1, 2, 0]) * 255).astype(uint8) + base_img[order_y * (shape_y + space_y) + space_y:(order_y + 1) * (shape_y + space_y), + order_x * (shape_x + space_x):(order_x + 1) * (shape_x + space_x) - space_x, :] = image + pil_image = Image.fromarray(base_img) + font = ImageFont.truetype(font=r'/usr/share/fonts/opentype/noto/NotoSansCJK-Medium.ttc', size=24) + draw = ImageDraw.Draw(pil_image) + pad = 5 + for order, label in enumerate(labels): + order_y, order_x = order // col_len, order % col_len + draw.text(((shape_x + space_x) * order_x + pad, (shape_y + space_y) * order_y + pad), label, 'black', font=font) + + return pil_image + + +model = vit_b_32(weights=ViT_B_32_Weights.IMAGENET1K_V1) + +tune = False +for name, layer in model.named_parameters(): + if 'encoder_layer_10' in name: + tune = True + layer.requires_grad = tune + +for layer in model.encoder.layers[10:]: + layer.dropout = Dropout(p=.2) + layer.mlp[2] = Dropout(p=.2) + layer.mlp[4] = Dropout(p=.2) +model.heads = Sequential(Dropout(), + Linear(in_features=768, out_features=image_folder['train'].classes.__len__(), bias=True)) + +summary(model=model, input_size=(32, 3, 224, 224), device='cpu') + +model_gpu = model.to(device=device) +criterion = CrossEntropyLoss() + +optimizer = Adam(params=[ + {'params': model_gpu.encoder.layers[10].parameters(), 'lr': 1e-5}, + {'params': model_gpu.encoder.layers[11].parameters(), 'lr': 1e-4}, + {'params': model_gpu.encoder.ln.parameters(), 'lr': 1e-3}, + {'params': model_gpu.heads.parameters(), 'lr': 1e-3}, +]) + +scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=10, gamma=0.9) +epochs = 200 + +train_loss_list = list() +train_acc_list = list() +val_loss_list = list() +val_acc_list = list() + +save_dir = join(datadir(), 'artifact', 'vit-b-32_' + datetime.now().__str__()) +print(save_dir) +makedirs(save_dir, exist_ok=True) +makedirs(join(save_dir, 'pallets'), exist_ok=True) + +for epoch in range(epochs): + train_loss = .0 + train_acc = .0 + val_loss = .0 + val_acc = .0 + + model_gpu.train() + # makedirs(join(save_dir, 'pallets', str(epoch)), exist_ok=True) + + for count, (images, labels) in enumerate(tqdm(dataloader['train'])): + if count == 1: + image_pallets = plot_dataset(dataloader=(images, labels), col_len=6, + label_text=image_folder['train'].classes) + image_pallets.save(join(save_dir, 'pallets', str(epoch) + '_train.jpg')) + optimizer.zero_grad() + images = images.to(device) + labels = labels.to(device) + + outputs = model(images) + + loss = criterion(outputs, labels) + train_loss += loss.item() + + loss.backward() + optimizer.step() + + predicted = outputs.max(1)[1] + train_acc += (predicted == labels).sum() + + avg_train_loss = train_loss / dataloader['train'].dataset.__len__() + avg_train_acc = train_acc / dataloader['train'].dataset.__len__() + + model_gpu.eval() + with no_grad(): + for count, (images, labels) in enumerate(dataloader['val']): + if count == 1: + image_pallets = plot_dataset(dataloader=(images, labels), col_len=6, + label_text=image_folder['train'].classes) + image_pallets.save(join(save_dir, 'pallets', str(epoch) + '_val.jpg')) + images = images.to(device) + labels = labels.to(device) + outputs = model_gpu(images) + loss = criterion(outputs, labels) + val_loss += loss.item() + predicted = outputs.max(1)[1] + val_acc += (predicted == labels).sum() + avg_val_loss = val_loss / dataloader['val'].dataset.__len__() + avg_val_acc = val_acc / dataloader['val'].dataset.__len__() + + print(f'Epoch [{(epoch + 1):02}/{epochs}], loss: {avg_train_loss:.5f}, ' + f'acc: {avg_train_acc:.5f}, val_loss: {avg_val_loss:.5f}, val_acc: {avg_val_acc:.5f}, ' + f'lr: {scheduler.get_last_lr()[0]:.2e}') + scheduler.step() + + train_loss_list.append(float(avg_train_loss)) + train_acc_list.append(float(avg_train_acc)) + val_loss_list.append(float(avg_val_loss)) + val_acc_list.append(float(avg_val_acc)) + + plt.figure(figsize=(8, 6)) + plt.plot(val_acc_list, label='val', lw=2, c='b') + plt.plot(train_acc_list, label='train', lw=2, c='k') + plt.title('learning rate') + plt.xticks(size=14) + plt.yticks(size=14) + plt.grid(lw=2) + plt.legend(fontsize=14) + plt.xticks(arange(0, epochs, 10)) + plt.savefig(join(save_dir, 'learning_rate.png')) + plt.close() + + plt.figure(figsize=(8, 6)) + plt.plot(val_loss_list, label='val', lw=2, c='b') + plt.plot(train_loss_list, label='train', lw=2, c='k') + plt.title('loss') + plt.xticks(size=14) + plt.yticks(size=14) + plt.grid(lw=2) + plt.legend(fontsize=14) + plt.xticks(arange(0, epochs, 10)) + plt.savefig(join(save_dir, 'loss.png')) + plt.close() + +save(model_gpu.cpu(), join(save_dir, 'model.pth'))