diff --git a/.gitignore b/.gitignore index a2e5fbca..7c088d5c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ logs __pycache__ wd14_tagger_model +venv +*.egg-info +build \ No newline at end of file diff --git a/README.md b/README.md index 22813cfb..4d8d5275 100644 --- a/README.md +++ b/README.md @@ -26,3 +26,63 @@ All documents are in Japanese currently, and CUI based. Including BLIP captioning and tagging by DeepDanbooru or WD14 tagger * [Image generation](https://note.com/kohya_ss/n/n2693183a798e) * [Model conversion](https://note.com/kohya_ss/n/n374f316fe4ad) + +## Windows Required Dependencies + +Python 3.10.6 and Git: + +- Python 3.10.6: https://www.python.org/ftp/python/3.10.6/python-3.10.6-amd64.exe +- git: https://git-scm.com/download/win + +Give unrestricted script access to powershell so venv can work: + +- Open an administrator powershell window +- Type `Set-ExecutionPolicy Unrestricted` and answer A +- Close admin powershell window + +## Windows Installation + +Open a regular Powershell terminal and type the following inside: + +```powershell +git clone https://github.com/kohya-ss/sd-scripts.git +cd sd-scripts + +python -m venv --system-site-packages venv +.\venv\Scripts\activate + +pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116 +pip install --upgrade -r requirements_db_finetune.txt +pip install -U -I --no-deps https://github.com/C43H66N12O12S2/stable-diffusion-webui/releases/download/f/xformers-0.0.14.dev0-cp310-cp310-win_amd64.whl + +cp .\bitsandbytes_windows\*.dll .\venv\Lib\site-packages\bitsandbytes\ +cp .\bitsandbytes_windows\cextension.py .\venv\Lib\site-packages\bitsandbytes\cextension.py +cp .\bitsandbytes_windows\main.py .\venv\Lib\site-packages\bitsandbytes\cuda_setup\main.py + +accelerate config + +``` + +Answers to accelerate config: + +```txt +- 0 +- 0 +- NO +- NO +- All +- fp16 +``` + +## Upgrade + +When a new release comes out you can upgrade your repo with the following command: + +```powershell +cd kohya_diffusers_fine_tuning +git pull +.\venv\Scripts\activate +pip install --upgrade -r +``` + +Once the commands have completed successfully you should be ready to use the new version. \ No newline at end of file diff --git a/bitsandbytes_windows/cextension.py b/bitsandbytes_windows/cextension.py new file mode 100644 index 00000000..d38684a2 --- /dev/null +++ b/bitsandbytes_windows/cextension.py @@ -0,0 +1,54 @@ +import ctypes as ct +from pathlib import Path +from warnings import warn + +from .cuda_setup.main import evaluate_cuda_setup + + +class CUDALibrary_Singleton(object): + _instance = None + + def __init__(self): + raise RuntimeError("Call get_instance() instead") + + def initialize(self): + binary_name = evaluate_cuda_setup() + package_dir = Path(__file__).parent + binary_path = package_dir / binary_name + + if not binary_path.exists(): + print(f"CUDA SETUP: TODO: compile library for specific version: {binary_name}") + legacy_binary_name = "libbitsandbytes.so" + print(f"CUDA SETUP: Defaulting to {legacy_binary_name}...") + binary_path = package_dir / legacy_binary_name + if not binary_path.exists(): + print('CUDA SETUP: CUDA detection failed. Either CUDA driver not installed, CUDA not installed, or you have multiple conflicting CUDA libraries!') + print('CUDA SETUP: If you compiled from source, try again with `make CUDA_VERSION=DETECTED_CUDA_VERSION` for example, `make CUDA_VERSION=113`.') + raise Exception('CUDA SETUP: Setup Failed!') + # self.lib = ct.cdll.LoadLibrary(binary_path) + self.lib = ct.cdll.LoadLibrary(str(binary_path)) # $$$ + else: + print(f"CUDA SETUP: Loading binary {binary_path}...") + # self.lib = ct.cdll.LoadLibrary(binary_path) + self.lib = ct.cdll.LoadLibrary(str(binary_path)) # $$$ + + @classmethod + def get_instance(cls): + if cls._instance is None: + cls._instance = cls.__new__(cls) + cls._instance.initialize() + return cls._instance + + +lib = CUDALibrary_Singleton.get_instance().lib +try: + lib.cadam32bit_g32 + lib.get_context.restype = ct.c_void_p + lib.get_cusparse.restype = ct.c_void_p + COMPILED_WITH_CUDA = True +except AttributeError: + warn( + "The installed version of bitsandbytes was compiled without GPU support. " + "8-bit optimizers and GPU quantization are unavailable." + ) + COMPILED_WITH_CUDA = False diff --git a/bitsandbytes_windows/libbitsandbytes_cpu.dll b/bitsandbytes_windows/libbitsandbytes_cpu.dll new file mode 100644 index 00000000..b733af47 Binary files /dev/null and b/bitsandbytes_windows/libbitsandbytes_cpu.dll differ diff --git a/bitsandbytes_windows/libbitsandbytes_cuda116.dll b/bitsandbytes_windows/libbitsandbytes_cuda116.dll new file mode 100644 index 00000000..a999316e Binary files /dev/null and b/bitsandbytes_windows/libbitsandbytes_cuda116.dll differ diff --git a/bitsandbytes_windows/main.py b/bitsandbytes_windows/main.py new file mode 100644 index 00000000..71967a17 --- /dev/null +++ b/bitsandbytes_windows/main.py @@ -0,0 +1,166 @@ +""" +extract factors the build is dependent on: +[X] compute capability + [ ] TODO: Q - What if we have multiple GPUs of different makes? +- CUDA version +- Software: + - CPU-only: only CPU quantization functions (no optimizer, no matrix multipl) + - CuBLAS-LT: full-build 8-bit optimizer + - no CuBLAS-LT: no 8-bit matrix multiplication (`nomatmul`) + +evaluation: + - if paths faulty, return meaningful error + - else: + - determine CUDA version + - determine capabilities + - based on that set the default path +""" + +import ctypes + +from .paths import determine_cuda_runtime_lib_path + + +def check_cuda_result(cuda, result_val): + # 3. Check for CUDA errors + if result_val != 0: + error_str = ctypes.c_char_p() + cuda.cuGetErrorString(result_val, ctypes.byref(error_str)) + print(f"CUDA exception! Error code: {error_str.value.decode()}") + +def get_cuda_version(cuda, cudart_path): + # https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART____VERSION.html#group__CUDART____VERSION + try: + cudart = ctypes.CDLL(cudart_path) + except OSError: + # TODO: shouldn't we error or at least warn here? + print(f'ERROR: libcudart.so could not be read from path: {cudart_path}!') + return None + + version = ctypes.c_int() + check_cuda_result(cuda, cudart.cudaRuntimeGetVersion(ctypes.byref(version))) + version = int(version.value) + major = version//1000 + minor = (version-(major*1000))//10 + + if major < 11: + print('CUDA SETUP: CUDA version lower than 11 are currenlty not supported for LLM.int8(). You will be only to use 8-bit optimizers and quantization routines!!') + + return f'{major}{minor}' + + +def get_cuda_lib_handle(): + # 1. find libcuda.so library (GPU driver) (/usr/lib) + try: + cuda = ctypes.CDLL("libcuda.so") + except OSError: + # TODO: shouldn't we error or at least warn here? + print('CUDA SETUP: WARNING! libcuda.so not found! Do you have a CUDA driver installed? If you are on a cluster, make sure you are on a CUDA machine!') + return None + check_cuda_result(cuda, cuda.cuInit(0)) + + return cuda + + +def get_compute_capabilities(cuda): + """ + 1. find libcuda.so library (GPU driver) (/usr/lib) + init_device -> init variables -> call function by reference + 2. call extern C function to determine CC + (https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__DEVICE__DEPRECATED.html) + 3. Check for CUDA errors + https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api + # bits taken from https://gist.github.com/f0k/63a664160d016a491b2cbea15913d549 + """ + + + nGpus = ctypes.c_int() + cc_major = ctypes.c_int() + cc_minor = ctypes.c_int() + + device = ctypes.c_int() + + check_cuda_result(cuda, cuda.cuDeviceGetCount(ctypes.byref(nGpus))) + ccs = [] + for i in range(nGpus.value): + check_cuda_result(cuda, cuda.cuDeviceGet(ctypes.byref(device), i)) + ref_major = ctypes.byref(cc_major) + ref_minor = ctypes.byref(cc_minor) + # 2. call extern C function to determine CC + check_cuda_result( + cuda, cuda.cuDeviceComputeCapability(ref_major, ref_minor, device) + ) + ccs.append(f"{cc_major.value}.{cc_minor.value}") + + return ccs + + +# def get_compute_capability()-> Union[List[str, ...], None]: # FIXME: error +def get_compute_capability(cuda): + """ + Extracts the highest compute capbility from all available GPUs, as compute + capabilities are downwards compatible. If no GPUs are detected, it returns + None. + """ + ccs = get_compute_capabilities(cuda) + if ccs is not None: + # TODO: handle different compute capabilities; for now, take the max + return ccs[-1] + return None + + +def evaluate_cuda_setup(): + print('') + print('='*35 + 'BUG REPORT' + '='*35) + print('Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues') + print('For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link') + print('='*80) + return "libbitsandbytes_cuda116.dll" # $$$ + + binary_name = "libbitsandbytes_cpu.so" + #if not torch.cuda.is_available(): + #print('No GPU detected. Loading CPU library...') + #return binary_name + + cudart_path = determine_cuda_runtime_lib_path() + if cudart_path is None: + print( + "WARNING: No libcudart.so found! Install CUDA or the cudatoolkit package (anaconda)!" + ) + return binary_name + + print(f"CUDA SETUP: CUDA runtime path found: {cudart_path}") + cuda = get_cuda_lib_handle() + cc = get_compute_capability(cuda) + print(f"CUDA SETUP: Highest compute capability among GPUs detected: {cc}") + cuda_version_string = get_cuda_version(cuda, cudart_path) + + + if cc == '': + print( + "WARNING: No GPU detected! Check your CUDA paths. Processing to load CPU-only library..." + ) + return binary_name + + # 7.5 is the minimum CC vor cublaslt + has_cublaslt = cc in ["7.5", "8.0", "8.6"] + + # TODO: + # (1) CUDA missing cases (no CUDA installed by CUDA driver (nvidia-smi accessible) + # (2) Multiple CUDA versions installed + + # we use ls -l instead of nvcc to determine the cuda version + # since most installations will have the libcudart.so installed, but not the compiler + print(f'CUDA SETUP: Detected CUDA version {cuda_version_string}') + + def get_binary_name(): + "if not has_cublaslt (CC < 7.5), then we have to choose _nocublaslt.so" + bin_base_name = "libbitsandbytes_cuda" + if has_cublaslt: + return f"{bin_base_name}{cuda_version_string}.so" + else: + return f"{bin_base_name}{cuda_version_string}_nocublaslt.so" + + binary_name = get_binary_name() + + return binary_name diff --git a/fine_tune.py b/fine_tune.py index 66e3c1f1..4795edd1 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -50,7 +50,7 @@ import numpy as np from einops import rearrange from torch import einsum -import model_util +import library.model_util as model_util # Tokenizer: checkpointから読み込むのではなくあらかじめ提供されているものを使う TOKENIZER_PATH = "openai/clip-vit-large-patch14" diff --git a/clean_captions_and_tags.py b/finetune/clean_captions_and_tags.py similarity index 100% rename from clean_captions_and_tags.py rename to finetune/clean_captions_and_tags.py diff --git a/gen_img_diffusers.py b/finetune/gen_img_diffusers.py similarity index 100% rename from gen_img_diffusers.py rename to finetune/gen_img_diffusers.py diff --git a/hypernetwork_nai.py b/finetune/hypernetwork_nai.py similarity index 100% rename from hypernetwork_nai.py rename to finetune/hypernetwork_nai.py diff --git a/make_captions.py b/finetune/make_captions.py similarity index 100% rename from make_captions.py rename to finetune/make_captions.py diff --git a/merge_captions_to_metadata.py b/finetune/merge_captions_to_metadata.py similarity index 100% rename from merge_captions_to_metadata.py rename to finetune/merge_captions_to_metadata.py diff --git a/merge_dd_tags_to_metadata.py b/finetune/merge_dd_tags_to_metadata.py similarity index 100% rename from merge_dd_tags_to_metadata.py rename to finetune/merge_dd_tags_to_metadata.py diff --git a/prepare_buckets_latents.py b/finetune/prepare_buckets_latents.py similarity index 99% rename from prepare_buckets_latents.py rename to finetune/prepare_buckets_latents.py index f4c6a371..e2cebe8d 100644 --- a/prepare_buckets_latents.py +++ b/finetune/prepare_buckets_latents.py @@ -14,7 +14,7 @@ import cv2 import torch from torchvision import transforms -import model_util +import library.model_util as model_util DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') diff --git a/tag_images_by_wd14_tagger.py b/finetune/tag_images_by_wd14_tagger.py similarity index 100% rename from tag_images_by_wd14_tagger.py rename to finetune/tag_images_by_wd14_tagger.py diff --git a/library/__init__.py b/library/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/model_util.py b/library/model_util.py similarity index 100% rename from model_util.py rename to library/model_util.py diff --git a/requirements_blip.txt b/requirements_blip.txt index 2390c1d1..ae5aeb14 100644 --- a/requirements_blip.txt +++ b/requirements_blip.txt @@ -1,3 +1,4 @@ timm==0.4.12 transformers==4.16.2 fairscale==0.4.4 +. \ No newline at end of file diff --git a/requirements_db_finetune.txt b/requirements_db_finetune.txt index 38f3852b..f8493566 100644 --- a/requirements_db_finetune.txt +++ b/requirements_db_finetune.txt @@ -6,3 +6,7 @@ opencv-python einops pytorch_lightning safetensors +bitsandbytes==0.35.0 +tensorboard +diffusers[torch]==0.10.2 +. \ No newline at end of file diff --git a/requirements_wd14_tagger.txt b/requirements_wd14_tagger.txt index 4cc373e6..25067598 100644 --- a/requirements_wd14_tagger.txt +++ b/requirements_wd14_tagger.txt @@ -1,2 +1,3 @@ tensorflow<2.11 huggingface-hub +. \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..7bf54834 --- /dev/null +++ b/setup.py @@ -0,0 +1,3 @@ +from setuptools import setup, find_packages + +setup(name = "library", packages = find_packages()) \ No newline at end of file diff --git a/convert_diffusers20_original_sd.py b/tools/convert_diffusers20_original_sd.py similarity index 99% rename from convert_diffusers20_original_sd.py rename to tools/convert_diffusers20_original_sd.py index 5df5f954..1a698080 100644 --- a/convert_diffusers20_original_sd.py +++ b/tools/convert_diffusers20_original_sd.py @@ -9,7 +9,7 @@ import os import torch from diffusers import StableDiffusionPipeline -import model_util +import library.model_util as model_util def convert(args): diff --git a/detect_face_rotate.py b/tools/detect_face_rotate.py similarity index 100% rename from detect_face_rotate.py rename to tools/detect_face_rotate.py diff --git a/train_db_fixed.py b/train_db.py similarity index 99% rename from train_db_fixed.py rename to train_db.py index ce40aa4e..9ae6c8ca 100644 --- a/train_db_fixed.py +++ b/train_db.py @@ -43,7 +43,7 @@ import cv2 from einops import rearrange from torch import einsum -import model_util +import library.model_util as model_util # Tokenizer: checkpointから読み込むのではなくあらかじめ提供されているものを使う TOKENIZER_PATH = "openai/clip-vit-large-patch14"