Make BLIP working without extra downloading.

2026-04-10 15:00:23 +00:00 · 2022-12-19 21:58:41 +09:00
parent 5f7693be04
commit 7cca345345
5 changed files with 1525 additions and 6 deletions
--- a/finetune/make_captions.py
+++ b/finetune/make_captions.py
@@ -1,6 +1,3 @@
-# このスクリプトのライセンスは、Apache License 2.0とします
-# (c) 2022 Kohya S. @kohya_ss
-
 import argparse
 import glob
 import os
@@ -12,7 +9,7 @@ import numpy as np
 import torch
 from torchvision import transforms
 from torchvision.transforms.functional import InterpolationMode
-from models.blip import blip_decoder
+from blip import blip_decoder
 # from Salesforce_BLIP.models.blip import blip_decoder

 DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -25,7 +22,7 @@ def main(args):

  print(f"loading BLIP caption: {args.caption_weights}")
  image_size = 384
-  model = blip_decoder(pretrained=args.caption_weights, image_size=image_size, vit='large')
+  model = blip_decoder(pretrained=args.caption_weights, image_size=image_size, vit='large', med_config="./med_config.json")
  model.eval()
  model = model.to(DEVICE)
  print("BLIP loaded")
@@ -75,7 +72,7 @@ def main(args):
 if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument("train_data_dir", type=str, help="directory for train images / 学習画像データのディレクトリ")
-  parser.add_argument("caption_weights", type=str,
+  parser.add_argument("--caption_weights", type=str, default="https://storage.googleapis.com/sfr-vision-language-research/BLIP/models/model_large_caption.pth", 
                      help="BLIP caption weights (model_large_caption.pth) / BLIP captionの重みファイル(model_large_caption.pth)")
  parser.add_argument("--caption_extention", type=str, default=None,
                      help="extension of caption file (for backward compatibility) / 出力されるキャプションファイルの拡張子（スペルミスしていたのを残してあります）")