diff --git a/ameblo_download.py b/ameblo_download.py
index 93dbfb4..6f8a9f7 100755
--- a/ameblo_download.py
+++ b/ameblo_download.py
@@ -1,17 +1,29 @@
+from pprint import pprint
+from typing import List, Tuple
+
+import h5py
+
 import settings
 import re
 import sys
 from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
 from aiohttp import ClientSession, ClientConnectorError, ClientTimeout
 from itertools import chain
-from asyncio import run, Semaphore, sleep
+from asyncio import run, Semaphore, sleep, Lock
 from datetime import datetime
-from aiofiles import open
+from aiofiles import open as a_open
+import time
 from os import path, utime, stat, cpu_count, makedirs
 from tqdm.asyncio import tqdm
-from concurrent.futures import as_completed, ProcessPoolExecutor, Future
+from concurrent.futures import as_completed, ProcessPoolExecutor, Future, ThreadPoolExecutor
 from ujson import loads
 from warnings import filterwarnings
+from h5py import File, special_dtype, string_dtype
+from io import BytesIO
+from numpy import void, array
+import ujson
+import orjson
+import requests
 
 PARALLEL_LIMIT = 300
 
@@ -25,7 +37,6 @@ async def run_each(name: str) -> None:
     sem: Semaphore = Semaphore(PARALLEL_LIMIT)
     session: ClientSession = ClientSession(trust_env=True, headers=settings.request_header,
                                            timeout=ClientTimeout(total=10 * 60))
-
     list_pages_count = await parse_list_pages_count(name)
 
     print(name, list_pages_count)
@@ -38,14 +49,13 @@ async def run_each(name: str) -> None:
     for url in url_list:
         if 'html' not in url:
             print(url)
-
     executor = ProcessPoolExecutor(max_workers=cpu_count())
-    futures = await tqdm.gather(*[parse_blog_post(url, sem, session, executor) for url in url_list], desc='scan blog')
-    images_list = list()
-    for future in tqdm(as_completed(futures), desc='waiting processing ' + name, total=len(futures)):
-        images_list.append(future.result())
+    lock = Lock()
+    futures = await tqdm.gather(
+            *[parse_blog_post(url, sem, session, executor, lock) for url in url_list],
+            desc='scan blog')
     executor.shutdown()
-    image_link_package = list(chain.from_iterable(images_list))
+    image_link_package = list(chain.from_iterable(futures))
 
     await tqdm.gather(
         *[download_image(filename, url, date, sem, session) for filename, url, date in image_link_package],
@@ -58,7 +68,7 @@ async def parse_list_pages_count(blog_name: str) -> int:
     async with ClientSession(trust_env=True, headers=settings.request_header) as session:
         async with session.get(f'https://ameblo.jp/{blog_name}/entrylist.html') as resp:
             resp_html = await resp.text()
-            json_obj = loads(re.findall(r'<script>window.INIT_DATA=(.*?)};', resp_html)[0] + '}')
+            json_obj = ujson.loads(re.findall(r'<script>window.INIT_DATA=(.*?)};', resp_html)[0] + '}')
             return list(json_obj['entryState']['blogPageMap'].values())[0]['paging']['max_page']
 
 
@@ -67,11 +77,16 @@ async def parse_list_page(blog_name: str, order: int, sem: Semaphore, session: C
         async with session.get(f'https://ameblo.jp/{blog_name}/entrylist-{order}.html') as resp:
             resp_html = await resp.text()
     try:
-        json_obj = loads(re.findall(r'<script>window.INIT_DATA=(.*?)};', resp_html)[0] + '}')
+        json_obj = ujson.loads(re.findall(r'<script>window.INIT_DATA=(.*?)};', resp_html)[0] + '}')
         page_url_list: list[str] = list()
         for blog_post_desc in list(json_obj['entryState']['entryMap'].values()):
             if blog_post_desc['publish_flg'] == 'open':
-                page_url_list.append(f"https://ameblo.jp/{blog_name}/entry-{blog_post_desc['entry_id']}.html")
+                page_url_list.append(f"https://ameblo.jp/{blog_name}/entry-{blog_post_desc['entry_id']}.html" +
+                                     "," +
+                                     ";".join(["https://ameblo.jp/_api/blogComments", f"amebaId={blog_name}",
+                                               f"blogId={blog_post_desc['blog_id']}",
+                                               f"entryId={blog_post_desc['entry_id']}",
+                                               "excludeReplies=false", "limit=1", "offset=0"]))
     except Exception as e:
         print(e)
         print(f'https://ameblo.jp/{blog_name}/entrylist-{order}.html')
@@ -79,10 +94,10 @@ async def parse_list_page(blog_name: str, order: int, sem: Semaphore, session: C
     return page_url_list
 
 
-def parse_image(html: str, url: str) -> list:
+def parse_image(html: str, url: str) -> list[tuple[str, str, datetime]]:
     blog_account = url.split('/')[-2]
     try:
-        json_obj = list(loads(re.findall(r'<script>window.INIT_DATA=(.*?)};', html)[0] + '}')['entryState'][
+        json_obj = list(ujson.loads(re.findall(r'<script>window.INIT_DATA=(.*?)};', html)[0] + '}')['entryState'][
                             'entryMap'].values())[0]
     except IndexError as e:
         print(e, url)
@@ -106,29 +121,43 @@ def parse_image(html: str, url: str) -> list:
             ))
             entry_body.find('img', class_='PhotoSwipeImage').replaceWith(
                 '--blog-image-' + str(div["data-image-order"]) + '--\n')
-    if not path.isdir(path.join(settings.datadir(), 'blog_text', theme)):
-        makedirs(path.join(settings.datadir(), 'blog_text', theme), exist_ok=True)
-    for i in entry_body.find_all('br'):
-        i.replaceWith('\n')
-
-    async def save_text(save_path: str, content: str, last_modified_time: datetime):
-        async with open(save_path, mode='w') as f:
-            await f.write(content)
-        utime(path=save_path, times=(stat(path=save_path).st_atime, last_modified_time.timestamp()))
-
-    run(save_text(path.join(settings.datadir(), 'blog_text', theme, blog_account + '=' + str(blog_entry) + '.txt'),
-                  entry_body.text, date))
-    # print(return_list)
     return return_list
 
 
-async def parse_blog_post(url: str, sem: Semaphore, session: ClientSession, executor: ProcessPoolExecutor) -> Future:
-    # -> list[tuple[str, str, datetime]]:
-    # print(url)
+def get_api_json(api_url: str) -> list:
     while True:
-        async with sem:
+        try:
+            with requests.get(api_url) as resp:
+                resp_json = ujson.loads(resp.text)
+                comments_count = resp_json['paging']['total_count']
+                break
+        except Exception as e:
+            time.sleep(5.0)
+            print(api_url)
+            print(e, resp.text, resp.status_code, file=sys.stderr)
+    while True:
+        if comments_count == 0:
+            comments = []
+            break
+        else:
             try:
-                async with session.get(url) as resp:
+                with requests.get(api_url.replace('limit=1', f'limit={comments_count}')) as resp:
+                    comments = list(ujson.loads(resp.text)['commentMap'].values())
+                    break
+            except Exception as e:
+                time.sleep(5.0)
+                print(e, file=sys.stderr)
+    # print(comments.__len__())
+    return comments
+
+
+async def parse_blog_post(urls: str, sem: Semaphore, session: ClientSession, executor: ProcessPoolExecutor,
+                          lock: Lock) -> Future:
+    page_url, comment_api_url = urls.split(',')
+    async with sem:
+        while True:
+            try:
+                async with session.get(page_url) as resp:
                     resp_html = await resp.text()
                     # await sleep(1.0)
                     break
@@ -136,7 +165,13 @@ async def parse_blog_post(url: str, sem: Semaphore, session: ClientSession, exec
                 await sleep(5.0)
                 print(e, file=sys.stderr)
 
-    return executor.submit(parse_image, resp_html, url)
+    o = executor.submit(parse_image, resp_html, page_url)
+    async with lock:
+        async with a_open(file=path.join(settings.datadir(), 'api_urls.txt'), mode='a') as f:
+            await f.write(urls + '\n')
+
+    image_list = o.result()
+    return image_list
 
 
 async def download_image(filename: str, url: str, date: datetime, sem: Semaphore, session: ClientSession) -> None:
@@ -152,7 +187,7 @@ async def download_image(filename: str, url: str, date: datetime, sem: Semaphore
         async with session.get(url) as resp:
             if resp.content_type != "image/jpeg":
                 return
-            async with open(file=filepath, mode="wb") as f:
+            async with a_open(file=filepath, mode="wb") as f:
                 await f.write(await resp.read())
     utime(path=filepath, times=(stat(path=filepath).st_atime, date.timestamp()))
 
@@ -170,5 +205,7 @@ def grep_modified_time(html: str) -> str:
 
 
 if __name__ == '__main__':
+    with open(file=path.join(settings.datadir(),'api_urls.txt'),mode='w') as f:
+        f.write("")
     for blog in settings.blog_list:
         run(run_each(blog))
diff --git a/get_article_and_comments.py b/get_article_and_comments.py
new file mode 100644
index 0000000..436fc51
--- /dev/null
+++ b/get_article_and_comments.py
@@ -0,0 +1,148 @@
+import sys
+import re
+import time
+from io import BytesIO
+from h5py import File, string_dtype
+import requests
+from numpy import array, ceil
+from tqdm import tqdm
+from settings import datadir, theme_curator
+from concurrent.futures import ProcessPoolExecutor
+from os import cpu_count
+from os.path import join
+from bs4 import BeautifulSoup
+import ujson
+from more_itertools import chunked
+from datetime import datetime, timezone, timedelta
+
+JST = timezone(timedelta(hours=9), "JST")
+
+
+def parse_article(url: str) -> tuple[str, str, str, str, str]:
+    while True:
+        with requests.get(url) as resp:
+            html = resp.text
+        try:
+            json_obj = list(ujson.loads(re.findall(r'<script>window.INIT_DATA=(.*?)};', html)[0] + '}')['entryState'][
+                                'entryMap'].values())[0]
+            break
+        except IndexError as e:
+            print(e, url)
+    blog_account = url.split('/')[-2]
+    theme = theme_curator(json_obj['theme_name'], blog_account)
+    date = json_obj['last_edit_datetime']
+    blog_entry = json_obj['entry_id']
+    try:
+        entry_title = json_obj['entry_title']
+    except:
+        entry_title = ''
+    entry_body = BeautifulSoup(json_obj['entry_text'].replace('<br>', '\n'), 'lxml')
+    # print(entry_body)
+    for emoji in entry_body.find_all('img', class_='emoji'):
+        emoji.decompose()
+    image_divs = entry_body.find_all('img', class_='PhotoSwipeImage')
+    for div in image_divs:
+        # print(div)
+        if not div.has_attr('data-src'):
+            entry_body.find('img', class_='PhotoSwipeImage').replaceWith(
+                '--blog-image-' + str(div["data-image-order"]) + '--\n')
+    for i in entry_body.find_all('br'):
+        i.replaceWith('\n')
+    data_path = '/'.join([blog_account, str(blog_entry)])
+    return entry_body.text, entry_title, theme, date, data_path
+
+
+def get_api_json(api_url: str) -> list:
+    while True:
+        try:
+            with requests.get(api_url) as resp:
+                resp_json = ujson.loads(resp.text)
+                comments_count = resp_json['paging']['total_count']
+                break
+        except Exception as e:
+            time.sleep(5.0)
+            print(api_url)
+            print(e, resp.text, resp.status_code, file=sys.stderr)
+    while True:
+        if comments_count == 0:
+            comments = []
+            break
+        else:
+            try:
+                with requests.get(api_url.replace('limit=1', f'limit={comments_count}')) as resp:
+                    comments = list(ujson.loads(resp.text)['commentMap'].values())
+                    break
+            except Exception as e:
+                time.sleep(5.0)
+                print(e, file=sys.stderr)
+    # print(comments.__len__())
+    return comments
+
+
+if __name__ == '__main__':
+    chunk_size = 10
+    article_executor = ProcessPoolExecutor(max_workers=cpu_count() * 2)
+    api_executor = ProcessPoolExecutor(max_workers=chunk_size)
+
+    hdf5_bio = BytesIO()
+    with open(file=join(datadir(), 'blog_text.hdf5'), mode='rb') as hdf5_file:
+        hdf5_bio.write(hdf5_file.read())
+
+    save_cycle = 0
+    num_lines = sum([1 for _ in open(file=join(datadir(), 'api_urls.txt'), mode='r')])
+    with File(name=hdf5_bio, mode='a') as hdf5:
+        with open(file=join(datadir(), 'api_urls.txt'), mode='r') as f:
+            for rows in tqdm(chunked(f, n=chunk_size), total=ceil(num_lines / chunk_size)):
+                # save_cycle += 1
+                article_output = []
+                api_output = []
+                for row in rows:
+                    article_url, comment_api_url = row.split(',')
+                    blog_key = comment_api_url.split(';')[1].split('=')[1]
+                    article_key = comment_api_url.split(';')[3].split('=')[1]
+                    if f"/{blog_key}/{article_key}" in hdf5:
+                        upd_time = datetime.fromisoformat(hdf5[blog_key][article_key]['article'].attrs['update_time'])
+                        if (datetime.now(tz=JST) - upd_time).days > 4:
+                            continue
+                        else:
+                            del hdf5[blog_key][article_key]
+                    save_cycle += 1
+                    article_output.append(article_executor.submit(parse_article, article_url))
+                    api_output.append(api_executor.submit(get_api_json, comment_api_url))
+                for article_res, api_res in zip(article_output, api_output):
+                    entry_text, entry_title, theme, date, data_path = article_res.result()
+                    comments = api_res.result()
+                    post = hdf5.create_group(name=data_path)
+                    article = post.create_dataset('article', dtype=string_dtype(encoding='utf-8'),
+                                                  data=array(entry_text.encode('utf-8')))
+                    article.attrs['theme'] = theme
+                    article.attrs['title'] = entry_title
+                    article.attrs['update_time'] = date
+
+                    comments_dataset = post.create_group(name='comments_dataset')
+                    if comments.__len__() != 0:
+                        for order, text in enumerate(comments):
+                            comment_id = str(text['comment_id'])
+                            comment = comments_dataset.create_dataset(name=comment_id,
+                                                                      dtype=string_dtype(encoding='utf-8'), data=array(
+                                    text['comment_text'].replace('<br />', '\n').encode('utf-8')))
+                            if 'comment_author' in text.keys():
+                                comment.attrs['author_id'] = text['comment_author']['ameba_id']
+                                comment.attrs['author_blog_id'] = text['comment_author']['blog_id']
+                                comment.attrs['author_nickname'] = text['comment_author']['nickname']
+                            else:
+                                comment.attrs['author_id'] = ''
+                                comment.attrs['author_blog_id'] = -1
+                                comment.attrs['author_nickname'] = text['comment_name']
+                            comment.attrs['comment_title'] = text['comment_title']
+                            comment.attrs['comment_update_time'] = text['upd_datetime']
+                hdf5.flush()
+                if save_cycle > 1_000:
+                    with open(file=join(datadir(), 'blog_text.hdf5'), mode='wb') as hdf5_file:
+                        hdf5_file.write(hdf5_bio.getvalue())
+                    save_cycle = 0
+                    # exit()
+    with open(file=join(datadir(), 'blog_text.hdf5'), mode='wb') as hdf5_file:
+        hdf5_file.write(hdf5_bio.getvalue())
+    article_executor.shutdown()
+    api_executor.shutdown()
diff --git a/hdf5_compresser.py b/hdf5_compresser.py
new file mode 100644
index 0000000..3b9dbce
--- /dev/null
+++ b/hdf5_compresser.py
@@ -0,0 +1,51 @@
+import sys
+from os.path import join, basename, dirname
+from pprint import pprint
+from io import BytesIO
+from h5py import File
+from datetime import datetime, timezone, timedelta
+from gzip import compress, decompress
+
+from tqdm import tqdm
+
+JST = timezone(timedelta(hours=9), "JST")
+COMPRESS_METHOD = 'gzip'
+COMPRESS_OPT = 9
+# print(sys.argv)
+hdf5_bio = BytesIO()
+hdf5_bio_compressed = BytesIO()
+with open(file=join(sys.argv[1]), mode='rb') as hdf5_file:
+    hdf5_bio.write(hdf5_file.read())
+
+with File(name=hdf5_bio, mode='r') as hdf5, File(name=hdf5_bio_compressed, mode='w') as hdf5_compressed:
+    for group in hdf5.keys():
+        print(group)
+        hdf5_group = hdf5_compressed.create_group(name=group)
+        for article_id in tqdm(hdf5[group].keys()):
+            article = hdf5_group.create_group(name=article_id)
+            # print(group, article_id)
+            article_txt = hdf5[group][article_id]['article']
+            article_txt_compressed = article.create_dataset(name='article', dtype=f'S{article_txt[()].__len__() + 1}',
+                                                            shape=(1,))
+            article_txt_compressed[0] = article_txt[()]
+            for k, v in article_txt.attrs.items():
+                # print(k, v)
+                article['article'].attrs[k] = v
+            comments = article.create_group(name='comments_dataset')
+            for comment_key in hdf5[group][article_id]['comments_dataset']:
+                comment_txt = hdf5[group][article_id]['comments_dataset'][comment_key]
+                # print(group, article_id, comment_key, comment_txt[()].decode('utf-8'))
+                comment_txt_compressed = comments.create_dataset(name=comment_key,
+                                                                 dtype=f'S{comment_txt[()].__len__() + 1}', shape=(1,))
+                comment_txt_compressed[0] = comment_txt[()]
+                for k, v in comment_txt.attrs.items():
+                    comments[comment_key].attrs[k] = v
+
+name, ext = basename(sys.argv[1]).rsplit('.', maxsplit=1)
+with open(file=join(dirname(sys.argv[1]), name + '_compressed' + '.' + ext), mode='wb') as f:
+    f.write(hdf5_bio_compressed.getvalue())
+
+# None (bytes) 12.4 MiB (12,966,690)
+# only article compressed 12.4 MiB (12,973,914)
+# all gzipped 33.2 MiB (34,768,669)
+# all chunked 33.2 MiB (34,768,669)
diff --git a/resnet_finetune_vggface.py b/resnet_finetune_vggface.py
index bf843e1..8f49298 100644
--- a/resnet_finetune_vggface.py
+++ b/resnet_finetune_vggface.py
@@ -130,9 +130,9 @@ optimizer = Adam(params=[
     # {'params': model_gpu.maxpool.parameters(), 'lr': 1e-8},
     {'params': model_gpu.layer1.parameters(), 'lr': 1e-8},
     {'params': model_gpu.layer2.parameters(), 'lr': 1e-8},
-    {'params': model_gpu.layer3.parameters(), 'lr': 1e-5},
-    {'params': model_gpu.layer4.parameters(), 'lr': 1e-4},
-    {'params': model_gpu.fc.parameters(), 'lr': 1e-4},
+    {'params': model_gpu.layer3.parameters(), 'lr': 1e-6},
+    {'params': model_gpu.layer4.parameters(), 'lr': 1e-5},
+    {'params': model_gpu.fc.parameters(), 'lr': 1e-5},
 
 ])
 scheduler = lr_scheduler.StepLR(optimizer=optimizer, step_size=5, gamma=0.5)
diff --git a/yolo_test.py b/yolo_test.py
new file mode 100644
index 0000000..85e3408
--- /dev/null
+++ b/yolo_test.py
@@ -0,0 +1,5 @@
+from super_gradients.training.models import get
+
+yolo_nas = get(model_name='yolo_nas_l', pretrained_weights='coco').cuda()
+
+yolo_nas.predict('橋迫鈴=angerme-new=12687767841-1.jpg', conf=0.8).show()