From bf2de5620c7a7f283878f6090d8feab96abda44e Mon Sep 17 00:00:00 2001 From: mgz <49577754+mgz-dev@users.noreply.github.com> Date: Sat, 3 Feb 2024 20:09:37 -0600 Subject: [PATCH 01/10] fix formatting in resize_lora.py --- networks/resize_lora.py | 375 ++++++++++++++++++++-------------------- 1 file changed, 189 insertions(+), 186 deletions(-) diff --git a/networks/resize_lora.py b/networks/resize_lora.py index 03fc545e..3c866f1e 100644 --- a/networks/resize_lora.py +++ b/networks/resize_lora.py @@ -14,68 +14,68 @@ MIN_SV = 1e-6 # Model save and load functions def load_state_dict(file_name, dtype): - if model_util.is_safetensors(file_name): - sd = load_file(file_name) - with safe_open(file_name, framework="pt") as f: - metadata = f.metadata() - else: - sd = torch.load(file_name, map_location='cpu') - metadata = None + if model_util.is_safetensors(file_name): + sd = load_file(file_name) + with safe_open(file_name, framework="pt") as f: + metadata = f.metadata() + else: + sd = torch.load(file_name, map_location='cpu') + metadata = None - for key in list(sd.keys()): - if type(sd[key]) == torch.Tensor: - sd[key] = sd[key].to(dtype) + for key in list(sd.keys()): + if type(sd[key]) == torch.Tensor: + sd[key] = sd[key].to(dtype) - return sd, metadata + return sd, metadata def save_to_file(file_name, model, state_dict, dtype, metadata): - if dtype is not None: - for key in list(state_dict.keys()): - if type(state_dict[key]) == torch.Tensor: - state_dict[key] = state_dict[key].to(dtype) + if dtype is not None: + for key in list(state_dict.keys()): + if type(state_dict[key]) == torch.Tensor: + state_dict[key] = state_dict[key].to(dtype) - if model_util.is_safetensors(file_name): - save_file(model, file_name, metadata) - else: - torch.save(model, file_name) + if model_util.is_safetensors(file_name): + save_file(model, file_name, metadata) + else: + torch.save(model, file_name) # Indexing functions def index_sv_cumulative(S, target): - original_sum = float(torch.sum(S)) - cumulative_sums = torch.cumsum(S, dim=0)/original_sum - index = int(torch.searchsorted(cumulative_sums, target)) + 1 - index = max(1, min(index, len(S)-1)) + original_sum = float(torch.sum(S)) + cumulative_sums = torch.cumsum(S, dim=0)/original_sum + index = int(torch.searchsorted(cumulative_sums, target)) + 1 + index = max(1, min(index, len(S)-1)) - return index + return index def index_sv_fro(S, target): - S_squared = S.pow(2) - s_fro_sq = float(torch.sum(S_squared)) - sum_S_squared = torch.cumsum(S_squared, dim=0)/s_fro_sq - index = int(torch.searchsorted(sum_S_squared, target**2)) + 1 - index = max(1, min(index, len(S)-1)) + S_squared = S.pow(2) + s_fro_sq = float(torch.sum(S_squared)) + sum_S_squared = torch.cumsum(S_squared, dim=0)/s_fro_sq + index = int(torch.searchsorted(sum_S_squared, target**2)) + 1 + index = max(1, min(index, len(S)-1)) - return index + return index def index_sv_ratio(S, target): - max_sv = S[0] - min_sv = max_sv/target - index = int(torch.sum(S > min_sv).item()) - index = max(1, min(index, len(S)-1)) + max_sv = S[0] + min_sv = max_sv/target + index = int(torch.sum(S > min_sv).item()) + index = max(1, min(index, len(S)-1)) - return index + return index # Modified from Kohaku-blueleaf's extract/merge functions def extract_conv(weight, lora_rank, dynamic_method, dynamic_param, device, scale=1): out_size, in_size, kernel_size, _ = weight.size() U, S, Vh = torch.linalg.svd(weight.reshape(out_size, -1).to(device)) - + param_dict = rank_resize(S, lora_rank, dynamic_method, dynamic_param, scale) lora_rank = param_dict["new_rank"] @@ -92,17 +92,17 @@ def extract_conv(weight, lora_rank, dynamic_method, dynamic_param, device, scale def extract_linear(weight, lora_rank, dynamic_method, dynamic_param, device, scale=1): out_size, in_size = weight.size() - + U, S, Vh = torch.linalg.svd(weight.to(device)) - + param_dict = rank_resize(S, lora_rank, dynamic_method, dynamic_param, scale) lora_rank = param_dict["new_rank"] - + U = U[:, :lora_rank] S = S[:lora_rank] U = U @ torch.diag(S) Vh = Vh[:lora_rank, :] - + param_dict["lora_down"] = Vh.reshape(lora_rank, in_size).cpu() param_dict["lora_up"] = U.reshape(out_size, lora_rank).cpu() del U, S, Vh, weight @@ -113,7 +113,7 @@ def merge_conv(lora_down, lora_up, device): in_rank, in_size, kernel_size, k_ = lora_down.shape out_size, out_rank, _, _ = lora_up.shape assert in_rank == out_rank and kernel_size == k_, f"rank {in_rank} {out_rank} or kernel {kernel_size} {k_} mismatch" - + lora_down = lora_down.to(device) lora_up = lora_up.to(device) @@ -127,31 +127,31 @@ def merge_linear(lora_down, lora_up, device): in_rank, in_size = lora_down.shape out_size, out_rank = lora_up.shape assert in_rank == out_rank, f"rank {in_rank} {out_rank} mismatch" - + lora_down = lora_down.to(device) lora_up = lora_up.to(device) - + weight = lora_up @ lora_down del lora_up, lora_down return weight - + # Calculate new rank def rank_resize(S, rank, dynamic_method, dynamic_param, scale=1): param_dict = {} - if dynamic_method=="sv_ratio": + if dynamic_method == "sv_ratio": # Calculate new dim and alpha based off ratio new_rank = index_sv_ratio(S, dynamic_param) + 1 new_alpha = float(scale*new_rank) - elif dynamic_method=="sv_cumulative": + elif dynamic_method == "sv_cumulative": # Calculate new dim and alpha based off cumulative sum new_rank = index_sv_cumulative(S, dynamic_param) + 1 new_alpha = float(scale*new_rank) - elif dynamic_method=="sv_fro": + elif dynamic_method == "sv_fro": # Calculate new dim and alpha based off sqrt sum of squares new_rank = index_sv_fro(S, dynamic_param) + 1 new_alpha = float(scale*new_rank) @@ -159,19 +159,17 @@ def rank_resize(S, rank, dynamic_method, dynamic_param, scale=1): new_rank = rank new_alpha = float(scale*new_rank) - - if S[0] <= MIN_SV: # Zero matrix, set dim to 1 + if S[0] <= MIN_SV: # Zero matrix, set dim to 1 new_rank = 1 new_alpha = float(scale*new_rank) - elif new_rank > rank: # cap max rank at rank + elif new_rank > rank: # cap max rank at rank new_rank = rank new_alpha = float(scale*new_rank) - # Calculate resize info s_sum = torch.sum(torch.abs(S)) s_rank = torch.sum(torch.abs(S[:new_rank])) - + S_squared = S.pow(2) s_fro = torch.sqrt(torch.sum(S_squared)) s_red_fro = torch.sqrt(torch.sum(S_squared[:new_rank])) @@ -187,176 +185,181 @@ def rank_resize(S, rank, dynamic_method, dynamic_param, scale=1): def resize_lora_model(lora_sd, new_rank, save_dtype, device, dynamic_method, dynamic_param, verbose): - network_alpha = None - network_dim = None - verbose_str = "\n" - fro_list = [] + network_alpha = None + network_dim = None + verbose_str = "\n" + fro_list = [] - # Extract loaded lora dim and alpha - for key, value in lora_sd.items(): - if network_alpha is None and 'alpha' in key: - network_alpha = value - if network_dim is None and 'lora_down' in key and len(value.size()) == 2: - network_dim = value.size()[0] - if network_alpha is not None and network_dim is not None: - break - if network_alpha is None: - network_alpha = network_dim + # Extract loaded lora dim and alpha + for key, value in lora_sd.items(): + if network_alpha is None and 'alpha' in key: + network_alpha = value + if network_dim is None and 'lora_down' in key and len(value.size()) == 2: + network_dim = value.size()[0] + if network_alpha is not None and network_dim is not None: + break + if network_alpha is None: + network_alpha = network_dim - scale = network_alpha/network_dim + scale = network_alpha/network_dim - if dynamic_method: - print(f"Dynamically determining new alphas and dims based off {dynamic_method}: {dynamic_param}, max rank is {new_rank}") + if dynamic_method: + print(f"Dynamically determining new alphas and dims based off {dynamic_method}: {dynamic_param}, max rank is {new_rank}") - lora_down_weight = None - lora_up_weight = None + lora_down_weight = None + lora_up_weight = None - o_lora_sd = lora_sd.copy() - block_down_name = None - block_up_name = None + o_lora_sd = lora_sd.copy() + block_down_name = None + block_up_name = None - with torch.no_grad(): - for key, value in tqdm(lora_sd.items()): - weight_name = None - if 'lora_down' in key: - block_down_name = key.rsplit('.lora_down', 1)[0] - weight_name = key.rsplit(".", 1)[-1] - lora_down_weight = value - else: - continue + with torch.no_grad(): + for key, value in tqdm(lora_sd.items()): + weight_name = None + if 'lora_down' in key: + block_down_name = key.rsplit('.lora_down', 1)[0] + weight_name = key.rsplit(".", 1)[-1] + lora_down_weight = value + else: + continue - # find corresponding lora_up and alpha - block_up_name = block_down_name - lora_up_weight = lora_sd.get(block_up_name + '.lora_up.' + weight_name, None) - lora_alpha = lora_sd.get(block_down_name + '.alpha', None) + # find corresponding lora_up and alpha + block_up_name = block_down_name + lora_up_weight = lora_sd.get(block_up_name + '.lora_up.' + weight_name, None) + lora_alpha = lora_sd.get(block_down_name + '.alpha', None) - weights_loaded = (lora_down_weight is not None and lora_up_weight is not None) + weights_loaded = (lora_down_weight is not None and lora_up_weight is not None) - if weights_loaded: + if weights_loaded: - conv2d = (len(lora_down_weight.size()) == 4) - if lora_alpha is None: - scale = 1.0 - else: - scale = lora_alpha/lora_down_weight.size()[0] + conv2d = (len(lora_down_weight.size()) == 4) + if lora_alpha is None: + scale = 1.0 + else: + scale = lora_alpha/lora_down_weight.size()[0] - if conv2d: - full_weight_matrix = merge_conv(lora_down_weight, lora_up_weight, device) - param_dict = extract_conv(full_weight_matrix, new_rank, dynamic_method, dynamic_param, device, scale) - else: - full_weight_matrix = merge_linear(lora_down_weight, lora_up_weight, device) - param_dict = extract_linear(full_weight_matrix, new_rank, dynamic_method, dynamic_param, device, scale) + if conv2d: + full_weight_matrix = merge_conv(lora_down_weight, lora_up_weight, device) + param_dict = extract_conv(full_weight_matrix, new_rank, dynamic_method, dynamic_param, device, scale) + else: + full_weight_matrix = merge_linear(lora_down_weight, lora_up_weight, device) + param_dict = extract_linear(full_weight_matrix, new_rank, dynamic_method, dynamic_param, device, scale) - if verbose: - max_ratio = param_dict['max_ratio'] - sum_retained = param_dict['sum_retained'] - fro_retained = param_dict['fro_retained'] - if not np.isnan(fro_retained): - fro_list.append(float(fro_retained)) + if verbose: + max_ratio = param_dict['max_ratio'] + sum_retained = param_dict['sum_retained'] + fro_retained = param_dict['fro_retained'] + if not np.isnan(fro_retained): + fro_list.append(float(fro_retained)) - verbose_str+=f"{block_down_name:75} | " - verbose_str+=f"sum(S) retained: {sum_retained:.1%}, fro retained: {fro_retained:.1%}, max(S) ratio: {max_ratio:0.1f}" + verbose_str += f"{block_down_name:75} | " + verbose_str += f"sum(S) retained: {sum_retained:.1%}, fro retained: {fro_retained:.1%}, max(S) ratio: {max_ratio:0.1f}" - if verbose and dynamic_method: - verbose_str+=f", dynamic | dim: {param_dict['new_rank']}, alpha: {param_dict['new_alpha']}\n" - else: - verbose_str+=f"\n" + if verbose and dynamic_method: + verbose_str += f", dynamic | dim: {param_dict['new_rank']}, alpha: {param_dict['new_alpha']}\n" + else: + verbose_str += "\n" - new_alpha = param_dict['new_alpha'] - o_lora_sd[block_down_name + "." + "lora_down.weight"] = param_dict["lora_down"].to(save_dtype).contiguous() - o_lora_sd[block_up_name + "." + "lora_up.weight"] = param_dict["lora_up"].to(save_dtype).contiguous() - o_lora_sd[block_up_name + "." "alpha"] = torch.tensor(param_dict['new_alpha']).to(save_dtype) + new_alpha = param_dict['new_alpha'] + o_lora_sd[block_down_name + "." + "lora_down.weight"] = param_dict["lora_down"].to(save_dtype).contiguous() + o_lora_sd[block_up_name + "." + "lora_up.weight"] = param_dict["lora_up"].to(save_dtype).contiguous() + o_lora_sd[block_up_name + "." "alpha"] = torch.tensor(param_dict['new_alpha']).to(save_dtype) - block_down_name = None - block_up_name = None - lora_down_weight = None - lora_up_weight = None - weights_loaded = False - del param_dict + block_down_name = None + block_up_name = None + lora_down_weight = None + lora_up_weight = None + weights_loaded = False + del param_dict - if verbose: - print(verbose_str) + if verbose: + print(verbose_str) - print(f"Average Frobenius norm retention: {np.mean(fro_list):.2%} | std: {np.std(fro_list):0.3f}") - print("resizing complete") - return o_lora_sd, network_dim, new_alpha + print(f"Average Frobenius norm retention: {np.mean(fro_list):.2%} | std: {np.std(fro_list):0.3f}") + print("resizing complete") + return o_lora_sd, network_dim, new_alpha def resize(args): - if args.save_to is None or not (args.save_to.endswith('.ckpt') or args.save_to.endswith('.pt') or args.save_to.endswith('.pth') or args.save_to.endswith('.safetensors')): - raise Exception("The --save_to argument must be specified and must be a .ckpt , .pt, .pth or .safetensors file.") + if ( + args.save_to is None or + not (args.save_to.endswith('.ckpt') or + args.save_to.endswith('.pt') or + args.save_to.endswith('.pth') or + args.save_to.endswith('.safetensors')) + ): + raise Exception("The --save_to argument must be specified and must be a .ckpt , .pt, .pth or .safetensors file.") - - def str_to_dtype(p): - if p == 'float': - return torch.float - if p == 'fp16': - return torch.float16 - if p == 'bf16': - return torch.bfloat16 - return None + def str_to_dtype(p): + if p == 'float': + return torch.float + if p == 'fp16': + return torch.float16 + if p == 'bf16': + return torch.bfloat16 + return None - if args.dynamic_method and not args.dynamic_param: - raise Exception("If using dynamic_method, then dynamic_param is required") + if args.dynamic_method and not args.dynamic_param: + raise Exception("If using dynamic_method, then dynamic_param is required") - merge_dtype = str_to_dtype('float') # matmul method above only seems to work in float32 - save_dtype = str_to_dtype(args.save_precision) - if save_dtype is None: - save_dtype = merge_dtype + merge_dtype = str_to_dtype('float') # matmul method above only seems to work in float32 + save_dtype = str_to_dtype(args.save_precision) + if save_dtype is None: + save_dtype = merge_dtype - print("loading Model...") - lora_sd, metadata = load_state_dict(args.model, merge_dtype) + print("loading Model...") + lora_sd, metadata = load_state_dict(args.model, merge_dtype) - print("Resizing Lora...") - state_dict, old_dim, new_alpha = resize_lora_model(lora_sd, args.new_rank, save_dtype, args.device, args.dynamic_method, args.dynamic_param, args.verbose) + print("Resizing Lora...") + state_dict, old_dim, new_alpha = resize_lora_model(lora_sd, args.new_rank, save_dtype, args.device, args.dynamic_method, args.dynamic_param, args.verbose) - # update metadata - if metadata is None: - metadata = {} + # update metadata + if metadata is None: + metadata = {} - comment = metadata.get("ss_training_comment", "") + comment = metadata.get("ss_training_comment", "") - if not args.dynamic_method: - metadata["ss_training_comment"] = f"dimension is resized from {old_dim} to {args.new_rank}; {comment}" - metadata["ss_network_dim"] = str(args.new_rank) - metadata["ss_network_alpha"] = str(new_alpha) - else: - metadata["ss_training_comment"] = f"Dynamic resize with {args.dynamic_method}: {args.dynamic_param} from {old_dim}; {comment}" - metadata["ss_network_dim"] = 'Dynamic' - metadata["ss_network_alpha"] = 'Dynamic' + if not args.dynamic_method: + metadata["ss_training_comment"] = f"dimension is resized from {old_dim} to {args.new_rank}; {comment}" + metadata["ss_network_dim"] = str(args.new_rank) + metadata["ss_network_alpha"] = str(new_alpha) + else: + metadata["ss_training_comment"] = f"Dynamic resize with {args.dynamic_method}: {args.dynamic_param} from {old_dim}; {comment}" + metadata["ss_network_dim"] = 'Dynamic' + metadata["ss_network_alpha"] = 'Dynamic' - model_hash, legacy_hash = train_util.precalculate_safetensors_hashes(state_dict, metadata) - metadata["sshs_model_hash"] = model_hash - metadata["sshs_legacy_hash"] = legacy_hash + model_hash, legacy_hash = train_util.precalculate_safetensors_hashes(state_dict, metadata) + metadata["sshs_model_hash"] = model_hash + metadata["sshs_legacy_hash"] = legacy_hash - print(f"saving model to: {args.save_to}") - save_to_file(args.save_to, state_dict, state_dict, save_dtype, metadata) + print(f"saving model to: {args.save_to}") + save_to_file(args.save_to, state_dict, state_dict, save_dtype, metadata) def setup_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser() - parser.add_argument("--save_precision", type=str, default=None, - choices=[None, "float", "fp16", "bf16"], help="precision in saving, float if omitted / 保存時の精度、未指定時はfloat") - parser.add_argument("--new_rank", type=int, default=4, - help="Specify rank of output LoRA / 出力するLoRAのrank (dim)") - parser.add_argument("--save_to", type=str, default=None, - help="destination file name: ckpt or safetensors file / 保存先のファイル名、ckptまたはsafetensors") - parser.add_argument("--model", type=str, default=None, - help="LoRA model to resize at to new rank: ckpt or safetensors file / 読み込むLoRAモデル、ckptまたはsafetensors") - parser.add_argument("--device", type=str, default=None, help="device to use, cuda for GPU / 計算を行うデバイス、cuda でGPUを使う") - parser.add_argument("--verbose", action="store_true", - help="Display verbose resizing information / rank変更時の詳細情報を出力する") - parser.add_argument("--dynamic_method", type=str, default=None, choices=[None, "sv_ratio", "sv_fro", "sv_cumulative"], - help="Specify dynamic resizing method, --new_rank is used as a hard limit for max rank") - parser.add_argument("--dynamic_param", type=float, default=None, - help="Specify target for dynamic reduction") - - return parser + parser.add_argument("--save_precision", type=str, default=None, + choices=[None, "float", "fp16", "bf16"], help="precision in saving, float if omitted / 保存時の精度、未指定時はfloat") + parser.add_argument("--new_rank", type=int, default=4, + help="Specify rank of output LoRA / 出力するLoRAのrank (dim)") + parser.add_argument("--save_to", type=str, default=None, + help="destination file name: ckpt or safetensors file / 保存先のファイル名、ckptまたはsafetensors") + parser.add_argument("--model", type=str, default=None, + help="LoRA model to resize at to new rank: ckpt or safetensors file / 読み込むLoRAモデル、ckptまたはsafetensors") + parser.add_argument("--device", type=str, default=None, help="device to use, cuda for GPU / 計算を行うデバイス、cuda でGPUを使う") + parser.add_argument("--verbose", action="store_true", + help="Display verbose resizing information / rank変更時の詳細情報を出力する") + parser.add_argument("--dynamic_method", type=str, default=None, choices=[None, "sv_ratio", "sv_fro", "sv_cumulative"], + help="Specify dynamic resizing method, --new_rank is used as a hard limit for max rank") + parser.add_argument("--dynamic_param", type=float, default=None, + help="Specify target for dynamic reduction") + + return parser if __name__ == '__main__': - parser = setup_parser() + parser = setup_parser() - args = parser.parse_args() - resize(args) + args = parser.parse_args() + resize(args) From 1492bcbfa2ba0aa4de26d829b7db1397348f1785 Mon Sep 17 00:00:00 2001 From: mgz <49577754+mgz-dev@users.noreply.github.com> Date: Sat, 3 Feb 2024 23:18:55 -0600 Subject: [PATCH 02/10] add --new_conv_rank option update script to also take a separate conv rank value --- networks/resize_lora.py | 44 +++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/networks/resize_lora.py b/networks/resize_lora.py index 3c866f1e..c6086268 100644 --- a/networks/resize_lora.py +++ b/networks/resize_lora.py @@ -2,11 +2,12 @@ # This code is based off the extract_lora_from_models.py file which is based on https://github.com/cloneofsimo/lora/blob/develop/lora_diffusion/cli_svd.py # Thanks to cloneofsimo +import os import argparse import torch -from safetensors.torch import load_file, save_file, safe_open +from safetensors.torch import load_file, save_file from tqdm import tqdm -from library import train_util, model_util +from library import train_util import numpy as np MIN_SV = 1e-6 @@ -14,32 +15,29 @@ MIN_SV = 1e-6 # Model save and load functions def load_state_dict(file_name, dtype): - if model_util.is_safetensors(file_name): + if os.path.splitext(file_name)[1] == ".safetensors": sd = load_file(file_name) - with safe_open(file_name, framework="pt") as f: - metadata = f.metadata() + metadata = train_util.load_metadata_from_safetensors(file_name) else: - sd = torch.load(file_name, map_location='cpu') - metadata = None + sd = torch.load(file_name, map_location="cpu") + metadata = {} for key in list(sd.keys()): - if type(sd[key]) == torch.Tensor: + if isinstance(sd[key], torch.Tensor): sd[key] = sd[key].to(dtype) return sd, metadata - -def save_to_file(file_name, model, state_dict, dtype, metadata): +def save_to_file(file_name, state_dict, dtype, metadata): if dtype is not None: for key in list(state_dict.keys()): - if type(state_dict[key]) == torch.Tensor: + if isinstance(state_dict[key], torch.Tensor): state_dict[key] = state_dict[key].to(dtype) - if model_util.is_safetensors(file_name): - save_file(model, file_name, metadata) + if os.path.splitext(file_name)[1] == ".safetensors": + save_file(state_dict, file_name, metadata=metadata) else: - torch.save(model, file_name) - + torch.save(state_dict, file_name) # Indexing functions @@ -54,8 +52,8 @@ def index_sv_cumulative(S, target): def index_sv_fro(S, target): S_squared = S.pow(2) - s_fro_sq = float(torch.sum(S_squared)) - sum_S_squared = torch.cumsum(S_squared, dim=0)/s_fro_sq + S_fro_sq = float(torch.sum(S_squared)) + sum_S_squared = torch.cumsum(S_squared, dim=0)/S_fro_sq index = int(torch.searchsorted(sum_S_squared, target**2)) + 1 index = max(1, min(index, len(S)-1)) @@ -184,7 +182,7 @@ def rank_resize(S, rank, dynamic_method, dynamic_param, scale=1): return param_dict -def resize_lora_model(lora_sd, new_rank, save_dtype, device, dynamic_method, dynamic_param, verbose): +def resize_lora_model(lora_sd, new_rank, new_conv_rank, save_dtype, device, dynamic_method, dynamic_param, verbose): network_alpha = None network_dim = None verbose_str = "\n" @@ -240,7 +238,7 @@ def resize_lora_model(lora_sd, new_rank, save_dtype, device, dynamic_method, dyn if conv2d: full_weight_matrix = merge_conv(lora_down_weight, lora_up_weight, device) - param_dict = extract_conv(full_weight_matrix, new_rank, dynamic_method, dynamic_param, device, scale) + param_dict = extract_conv(full_weight_matrix, new_conv_rank, dynamic_method, dynamic_param, device, scale) else: full_weight_matrix = merge_linear(lora_down_weight, lora_up_weight, device) param_dict = extract_linear(full_weight_matrix, new_rank, dynamic_method, dynamic_param, device, scale) @@ -290,6 +288,8 @@ def resize(args): ): raise Exception("The --save_to argument must be specified and must be a .ckpt , .pt, .pth or .safetensors file.") + args.new_conv_rank = args.new_conv_rank if args.new_conv_rank is not None else args.new_rank + def str_to_dtype(p): if p == 'float': return torch.float @@ -311,7 +311,7 @@ def resize(args): lora_sd, metadata = load_state_dict(args.model, merge_dtype) print("Resizing Lora...") - state_dict, old_dim, new_alpha = resize_lora_model(lora_sd, args.new_rank, save_dtype, args.device, args.dynamic_method, args.dynamic_param, args.verbose) + state_dict, old_dim, new_alpha = resize_lora_model(lora_sd, args.new_rank, args.new_conv_rank, save_dtype, args.device, args.dynamic_method, args.dynamic_param, args.verbose) # update metadata if metadata is None: @@ -333,7 +333,7 @@ def resize(args): metadata["sshs_legacy_hash"] = legacy_hash print(f"saving model to: {args.save_to}") - save_to_file(args.save_to, state_dict, state_dict, save_dtype, metadata) + save_to_file(args.save_to, state_dict, save_dtype, metadata) def setup_parser() -> argparse.ArgumentParser: @@ -343,6 +343,8 @@ def setup_parser() -> argparse.ArgumentParser: choices=[None, "float", "fp16", "bf16"], help="precision in saving, float if omitted / 保存時の精度、未指定時はfloat") parser.add_argument("--new_rank", type=int, default=4, help="Specify rank of output LoRA / 出力するLoRAのrank (dim)") + parser.add_argument("--new_conv_rank", type=int, default=None, + help="Specify rank of output LoRA for Conv2d 3x3, None for same as new_rank / 出力するConv2D 3x3 LoRAのrank (dim)、Noneでnew_rankと同じ") parser.add_argument("--save_to", type=str, default=None, help="destination file name: ckpt or safetensors file / 保存先のファイル名、ckptまたはsafetensors") parser.add_argument("--model", type=str, default=None, From a6f1ed2e140eb4d4d37c0bb0502a7c0fd0621f5f Mon Sep 17 00:00:00 2001 From: tamlog06 Date: Sun, 18 Feb 2024 13:20:47 +0000 Subject: [PATCH 03/10] fix dylora create_modules error --- networks/dylora.py | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/networks/dylora.py b/networks/dylora.py index e5a55d19..64e39eaf 100644 --- a/networks/dylora.py +++ b/networks/dylora.py @@ -12,7 +12,9 @@ import math import os import random -from typing import List, Tuple, Union +from typing import Dict, List, Optional, Tuple, Type, Union +from diffusers import AutoencoderKL +from transformers import CLIPTextModel import torch from torch import nn @@ -165,7 +167,15 @@ class DyLoRAModule(torch.nn.Module): super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs) -def create_network(multiplier, network_dim, network_alpha, vae, text_encoder, unet, **kwargs): +def create_network( + multiplier: float, + network_dim: Optional[int], + network_alpha: Optional[float], + vae: AutoencoderKL, + text_encoder: Union[CLIPTextModel, List[CLIPTextModel]], + unet, + **kwargs, +): if network_dim is None: network_dim = 4 # default if network_alpha is None: @@ -182,6 +192,7 @@ def create_network(multiplier, network_dim, network_alpha, vae, text_encoder, un conv_alpha = 1.0 else: conv_alpha = float(conv_alpha) + if unit is not None: unit = int(unit) else: @@ -306,8 +317,22 @@ class DyLoRANetwork(torch.nn.Module): lora = module_class(lora_name, child_module, self.multiplier, dim, alpha, unit) loras.append(lora) return loras + + text_encoders = text_encoder if type(text_encoder) == list else [text_encoder] + + self.text_encoder_loras = [] + for i, text_encoder in enumerate(text_encoders): + if len(text_encoders) > 1: + index = i + 1 + print(f"create LoRA for Text Encoder {index}") + else: + index = None + print(f"create LoRA for Text Encoder") + + text_encoder_loras = create_modules(False, text_encoder, DyLoRANetwork.TEXT_ENCODER_TARGET_REPLACE_MODULE) + self.text_encoder_loras.extend(text_encoder_loras) - self.text_encoder_loras = create_modules(False, text_encoder, DyLoRANetwork.TEXT_ENCODER_TARGET_REPLACE_MODULE) + # self.text_encoder_loras = create_modules(False, text_encoder, DyLoRANetwork.TEXT_ENCODER_TARGET_REPLACE_MODULE) print(f"create LoRA for Text Encoder: {len(self.text_encoder_loras)} modules.") # extend U-Net target modules if conv2d 3x3 is enabled, or load from weights From f4132018c568db2a822f1adf2c55e2615128a485 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sat, 24 Feb 2024 19:25:31 +0900 Subject: [PATCH 04/10] fix to work with cpu_count() == 1 closes #1134 --- fine_tune.py | 4 ++-- sdxl_train.py | 4 ++-- sdxl_train_control_net_lllite.py | 4 ++-- sdxl_train_control_net_lllite_old.py | 4 ++-- tools/cache_latents.py | 4 ++-- tools/cache_text_encoder_outputs.py | 4 ++-- train_controlnet.py | 4 ++-- train_db.py | 4 ++-- train_network.py | 4 ++-- train_textual_inversion.py | 4 ++-- train_textual_inversion_XTI.py | 4 ++-- 11 files changed, 22 insertions(+), 22 deletions(-) diff --git a/fine_tune.py b/fine_tune.py index 8df896b4..875a9195 100644 --- a/fine_tune.py +++ b/fine_tune.py @@ -211,8 +211,8 @@ def train(args): _, _, optimizer = train_util.get_optimizer(args, trainable_params=trainable_params) # dataloaderを準備する - # DataLoaderのプロセス数:0はメインプロセスになる - n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1) # cpu_count-1 ただし最大で指定された数まで + # DataLoaderのプロセス数:0 は persistent_workers が使えないので注意 + n_workers = min(args.max_data_loader_n_workers, os.cpu_count()) # cpu_count or max_data_loader_n_workers train_dataloader = torch.utils.data.DataLoader( train_dataset_group, batch_size=1, diff --git a/sdxl_train.py b/sdxl_train.py index aa161e8a..e0df263d 100644 --- a/sdxl_train.py +++ b/sdxl_train.py @@ -354,8 +354,8 @@ def train(args): _, _, optimizer = train_util.get_optimizer(args, trainable_params=params_to_optimize) # dataloaderを準備する - # DataLoaderのプロセス数:0はメインプロセスになる - n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1) # cpu_count-1 ただし最大で指定された数まで + # DataLoaderのプロセス数:0 は persistent_workers が使えないので注意 + n_workers = min(args.max_data_loader_n_workers, os.cpu_count()) # cpu_count or max_data_loader_n_workers train_dataloader = torch.utils.data.DataLoader( train_dataset_group, batch_size=1, diff --git a/sdxl_train_control_net_lllite.py b/sdxl_train_control_net_lllite.py index b11999bd..1e5f9234 100644 --- a/sdxl_train_control_net_lllite.py +++ b/sdxl_train_control_net_lllite.py @@ -242,8 +242,8 @@ def train(args): _, _, optimizer = train_util.get_optimizer(args, trainable_params) # dataloaderを準備する - # DataLoaderのプロセス数:0はメインプロセスになる - n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1) # cpu_count-1 ただし最大で指定された数まで + # DataLoaderのプロセス数:0 は persistent_workers が使えないので注意 + n_workers = min(args.max_data_loader_n_workers, os.cpu_count()) # cpu_count or max_data_loader_n_workers train_dataloader = torch.utils.data.DataLoader( train_dataset_group, diff --git a/sdxl_train_control_net_lllite_old.py b/sdxl_train_control_net_lllite_old.py index 89a1bc8e..dac56eed 100644 --- a/sdxl_train_control_net_lllite_old.py +++ b/sdxl_train_control_net_lllite_old.py @@ -210,8 +210,8 @@ def train(args): _, _, optimizer = train_util.get_optimizer(args, trainable_params) # dataloaderを準備する - # DataLoaderのプロセス数:0はメインプロセスになる - n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1) # cpu_count-1 ただし最大で指定された数まで + # DataLoaderのプロセス数:0 は persistent_workers が使えないので注意 + n_workers = min(args.max_data_loader_n_workers, os.cpu_count()) # cpu_count or max_data_loader_n_workers train_dataloader = torch.utils.data.DataLoader( train_dataset_group, diff --git a/tools/cache_latents.py b/tools/cache_latents.py index e25506e4..347db27f 100644 --- a/tools/cache_latents.py +++ b/tools/cache_latents.py @@ -116,8 +116,8 @@ def cache_to_disk(args: argparse.Namespace) -> None: # dataloaderを準備する train_dataset_group.set_caching_mode("latents") - # DataLoaderのプロセス数:0はメインプロセスになる - n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1) # cpu_count-1 ただし最大で指定された数まで + # DataLoaderのプロセス数:0 は persistent_workers が使えないので注意 + n_workers = min(args.max_data_loader_n_workers, os.cpu_count()) # cpu_count or max_data_loader_n_workers train_dataloader = torch.utils.data.DataLoader( train_dataset_group, diff --git a/tools/cache_text_encoder_outputs.py b/tools/cache_text_encoder_outputs.py index 46bffc4e..5f1d6d20 100644 --- a/tools/cache_text_encoder_outputs.py +++ b/tools/cache_text_encoder_outputs.py @@ -121,8 +121,8 @@ def cache_to_disk(args: argparse.Namespace) -> None: # dataloaderを準備する train_dataset_group.set_caching_mode("text") - # DataLoaderのプロセス数:0はメインプロセスになる - n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1) # cpu_count-1 ただし最大で指定された数まで + # DataLoaderのプロセス数:0 は persistent_workers が使えないので注意 + n_workers = min(args.max_data_loader_n_workers, os.cpu_count()) # cpu_count or max_data_loader_n_workers train_dataloader = torch.utils.data.DataLoader( train_dataset_group, diff --git a/train_controlnet.py b/train_controlnet.py index 8963a5d6..dc73a91c 100644 --- a/train_controlnet.py +++ b/train_controlnet.py @@ -239,8 +239,8 @@ def train(args): _, _, optimizer = train_util.get_optimizer(args, trainable_params) # dataloaderを準備する - # DataLoaderのプロセス数:0はメインプロセスになる - n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1) # cpu_count-1 ただし最大で指定された数まで + # DataLoaderのプロセス数:0 は persistent_workers が使えないので注意 + n_workers = min(args.max_data_loader_n_workers, os.cpu_count()) # cpu_count or max_data_loader_n_workers train_dataloader = torch.utils.data.DataLoader( train_dataset_group, diff --git a/train_db.py b/train_db.py index c89caaf2..8d36097a 100644 --- a/train_db.py +++ b/train_db.py @@ -180,8 +180,8 @@ def train(args): _, _, optimizer = train_util.get_optimizer(args, trainable_params) # dataloaderを準備する - # DataLoaderのプロセス数:0はメインプロセスになる - n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1) # cpu_count-1 ただし最大で指定された数まで + # DataLoaderのプロセス数:0 は persistent_workers が使えないので注意 + n_workers = min(args.max_data_loader_n_workers, os.cpu_count()) # cpu_count or max_data_loader_n_workers train_dataloader = torch.utils.data.DataLoader( train_dataset_group, batch_size=1, diff --git a/train_network.py b/train_network.py index af15560c..e0fa6945 100644 --- a/train_network.py +++ b/train_network.py @@ -349,8 +349,8 @@ class NetworkTrainer: optimizer_name, optimizer_args, optimizer = train_util.get_optimizer(args, trainable_params) # dataloaderを準備する - # DataLoaderのプロセス数:0はメインプロセスになる - n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1) # cpu_count-1 ただし最大で指定された数まで + # DataLoaderのプロセス数:0 は persistent_workers が使えないので注意 + n_workers = min(args.max_data_loader_n_workers, os.cpu_count()) # cpu_count or max_data_loader_n_workers train_dataloader = torch.utils.data.DataLoader( train_dataset_group, diff --git a/train_textual_inversion.py b/train_textual_inversion.py index a78a37b2..df1d8485 100644 --- a/train_textual_inversion.py +++ b/train_textual_inversion.py @@ -385,8 +385,8 @@ class TextualInversionTrainer: _, _, optimizer = train_util.get_optimizer(args, trainable_params) # dataloaderを準備する - # DataLoaderのプロセス数:0はメインプロセスになる - n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1) # cpu_count-1 ただし最大で指定された数まで + # DataLoaderのプロセス数:0 は persistent_workers が使えないので注意 + n_workers = min(args.max_data_loader_n_workers, os.cpu_count()) # cpu_count or max_data_loader_n_workers train_dataloader = torch.utils.data.DataLoader( train_dataset_group, batch_size=1, diff --git a/train_textual_inversion_XTI.py b/train_textual_inversion_XTI.py index 3f915597..695fad2a 100644 --- a/train_textual_inversion_XTI.py +++ b/train_textual_inversion_XTI.py @@ -305,8 +305,8 @@ def train(args): _, _, optimizer = train_util.get_optimizer(args, trainable_params) # dataloaderを準備する - # DataLoaderのプロセス数:0はメインプロセスになる - n_workers = min(args.max_data_loader_n_workers, os.cpu_count() - 1) # cpu_count-1 ただし最大で指定された数まで + # DataLoaderのプロセス数:0 は persistent_workers が使えないので注意 + n_workers = min(args.max_data_loader_n_workers, os.cpu_count()) # cpu_count or max_data_loader_n_workers train_dataloader = torch.utils.data.DataLoader( train_dataset_group, batch_size=1, From 24092e6f2153c69a8647e0856ec733dd9e9f6ec3 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sat, 24 Feb 2024 19:51:51 +0900 Subject: [PATCH 05/10] update einops to 0.7.0 #1122 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a399d8bd..279de350 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ diffusers[torch]==0.25.0 ftfy==6.1.1 # albumentations==1.3.0 opencv-python==4.7.0.68 -einops==0.6.1 +einops==0.7.0 pytorch-lightning==1.9.0 # bitsandbytes==0.39.1 tensorboard==2.10.1 From fb9110bac1814fbfe0dc031efece96ad03dac7f1 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sat, 24 Feb 2024 20:00:57 +0900 Subject: [PATCH 06/10] format by black --- networks/resize_lora.py | 428 ++++++++++++++++++++++------------------ 1 file changed, 231 insertions(+), 197 deletions(-) diff --git a/networks/resize_lora.py b/networks/resize_lora.py index c5932a89..37eb3caa 100644 --- a/networks/resize_lora.py +++ b/networks/resize_lora.py @@ -9,77 +9,81 @@ from tqdm import tqdm from library import train_util, model_util import numpy as np from library.utils import setup_logging + setup_logging() import logging + logger = logging.getLogger(__name__) MIN_SV = 1e-6 # Model save and load functions + def load_state_dict(file_name, dtype): - if model_util.is_safetensors(file_name): - sd = load_file(file_name) - with safe_open(file_name, framework="pt") as f: - metadata = f.metadata() - else: - sd = torch.load(file_name, map_location='cpu') - metadata = None + if model_util.is_safetensors(file_name): + sd = load_file(file_name) + with safe_open(file_name, framework="pt") as f: + metadata = f.metadata() + else: + sd = torch.load(file_name, map_location="cpu") + metadata = None - for key in list(sd.keys()): - if type(sd[key]) == torch.Tensor: - sd[key] = sd[key].to(dtype) + for key in list(sd.keys()): + if type(sd[key]) == torch.Tensor: + sd[key] = sd[key].to(dtype) - return sd, metadata + return sd, metadata def save_to_file(file_name, model, state_dict, dtype, metadata): - if dtype is not None: - for key in list(state_dict.keys()): - if type(state_dict[key]) == torch.Tensor: - state_dict[key] = state_dict[key].to(dtype) + if dtype is not None: + for key in list(state_dict.keys()): + if type(state_dict[key]) == torch.Tensor: + state_dict[key] = state_dict[key].to(dtype) - if model_util.is_safetensors(file_name): - save_file(model, file_name, metadata) - else: - torch.save(model, file_name) + if model_util.is_safetensors(file_name): + save_file(model, file_name, metadata) + else: + torch.save(model, file_name) # Indexing functions -def index_sv_cumulative(S, target): - original_sum = float(torch.sum(S)) - cumulative_sums = torch.cumsum(S, dim=0)/original_sum - index = int(torch.searchsorted(cumulative_sums, target)) + 1 - index = max(1, min(index, len(S)-1)) - return index +def index_sv_cumulative(S, target): + original_sum = float(torch.sum(S)) + cumulative_sums = torch.cumsum(S, dim=0) / original_sum + index = int(torch.searchsorted(cumulative_sums, target)) + 1 + index = max(1, min(index, len(S) - 1)) + + return index def index_sv_fro(S, target): - S_squared = S.pow(2) - s_fro_sq = float(torch.sum(S_squared)) - sum_S_squared = torch.cumsum(S_squared, dim=0)/s_fro_sq - index = int(torch.searchsorted(sum_S_squared, target**2)) + 1 - index = max(1, min(index, len(S)-1)) + S_squared = S.pow(2) + s_fro_sq = float(torch.sum(S_squared)) + sum_S_squared = torch.cumsum(S_squared, dim=0) / s_fro_sq + index = int(torch.searchsorted(sum_S_squared, target**2)) + 1 + index = max(1, min(index, len(S) - 1)) - return index + return index def index_sv_ratio(S, target): - max_sv = S[0] - min_sv = max_sv/target - index = int(torch.sum(S > min_sv).item()) - index = max(1, min(index, len(S)-1)) + max_sv = S[0] + min_sv = max_sv / target + index = int(torch.sum(S > min_sv).item()) + index = max(1, min(index, len(S) - 1)) - return index + return index # Modified from Kohaku-blueleaf's extract/merge functions def extract_conv(weight, lora_rank, dynamic_method, dynamic_param, device, scale=1): out_size, in_size, kernel_size, _ = weight.size() U, S, Vh = torch.linalg.svd(weight.reshape(out_size, -1).to(device)) - + param_dict = rank_resize(S, lora_rank, dynamic_method, dynamic_param, scale) lora_rank = param_dict["new_rank"] @@ -96,17 +100,17 @@ def extract_conv(weight, lora_rank, dynamic_method, dynamic_param, device, scale def extract_linear(weight, lora_rank, dynamic_method, dynamic_param, device, scale=1): out_size, in_size = weight.size() - + U, S, Vh = torch.linalg.svd(weight.to(device)) - + param_dict = rank_resize(S, lora_rank, dynamic_method, dynamic_param, scale) lora_rank = param_dict["new_rank"] - + U = U[:, :lora_rank] S = S[:lora_rank] U = U @ torch.diag(S) Vh = Vh[:lora_rank, :] - + param_dict["lora_down"] = Vh.reshape(lora_rank, in_size).cpu() param_dict["lora_up"] = U.reshape(out_size, lora_rank).cpu() del U, S, Vh, weight @@ -117,7 +121,7 @@ def merge_conv(lora_down, lora_up, device): in_rank, in_size, kernel_size, k_ = lora_down.shape out_size, out_rank, _, _ = lora_up.shape assert in_rank == out_rank and kernel_size == k_, f"rank {in_rank} {out_rank} or kernel {kernel_size} {k_} mismatch" - + lora_down = lora_down.to(device) lora_up = lora_up.to(device) @@ -131,236 +135,266 @@ def merge_linear(lora_down, lora_up, device): in_rank, in_size = lora_down.shape out_size, out_rank = lora_up.shape assert in_rank == out_rank, f"rank {in_rank} {out_rank} mismatch" - + lora_down = lora_down.to(device) lora_up = lora_up.to(device) - + weight = lora_up @ lora_down del lora_up, lora_down return weight - + # Calculate new rank + def rank_resize(S, rank, dynamic_method, dynamic_param, scale=1): param_dict = {} - if dynamic_method=="sv_ratio": + if dynamic_method == "sv_ratio": # Calculate new dim and alpha based off ratio new_rank = index_sv_ratio(S, dynamic_param) + 1 - new_alpha = float(scale*new_rank) + new_alpha = float(scale * new_rank) - elif dynamic_method=="sv_cumulative": + elif dynamic_method == "sv_cumulative": # Calculate new dim and alpha based off cumulative sum new_rank = index_sv_cumulative(S, dynamic_param) + 1 - new_alpha = float(scale*new_rank) + new_alpha = float(scale * new_rank) - elif dynamic_method=="sv_fro": + elif dynamic_method == "sv_fro": # Calculate new dim and alpha based off sqrt sum of squares new_rank = index_sv_fro(S, dynamic_param) + 1 - new_alpha = float(scale*new_rank) + new_alpha = float(scale * new_rank) else: new_rank = rank - new_alpha = float(scale*new_rank) + new_alpha = float(scale * new_rank) - - if S[0] <= MIN_SV: # Zero matrix, set dim to 1 + if S[0] <= MIN_SV: # Zero matrix, set dim to 1 new_rank = 1 - new_alpha = float(scale*new_rank) - elif new_rank > rank: # cap max rank at rank + new_alpha = float(scale * new_rank) + elif new_rank > rank: # cap max rank at rank new_rank = rank - new_alpha = float(scale*new_rank) - + new_alpha = float(scale * new_rank) # Calculate resize info s_sum = torch.sum(torch.abs(S)) s_rank = torch.sum(torch.abs(S[:new_rank])) - + S_squared = S.pow(2) s_fro = torch.sqrt(torch.sum(S_squared)) s_red_fro = torch.sqrt(torch.sum(S_squared[:new_rank])) - fro_percent = float(s_red_fro/s_fro) + fro_percent = float(s_red_fro / s_fro) param_dict["new_rank"] = new_rank param_dict["new_alpha"] = new_alpha - param_dict["sum_retained"] = (s_rank)/s_sum + param_dict["sum_retained"] = (s_rank) / s_sum param_dict["fro_retained"] = fro_percent - param_dict["max_ratio"] = S[0]/S[new_rank - 1] + param_dict["max_ratio"] = S[0] / S[new_rank - 1] return param_dict def resize_lora_model(lora_sd, new_rank, save_dtype, device, dynamic_method, dynamic_param, verbose): - network_alpha = None - network_dim = None - verbose_str = "\n" - fro_list = [] + network_alpha = None + network_dim = None + verbose_str = "\n" + fro_list = [] - # Extract loaded lora dim and alpha - for key, value in lora_sd.items(): - if network_alpha is None and 'alpha' in key: - network_alpha = value - if network_dim is None and 'lora_down' in key and len(value.size()) == 2: - network_dim = value.size()[0] - if network_alpha is not None and network_dim is not None: - break - if network_alpha is None: - network_alpha = network_dim + # Extract loaded lora dim and alpha + for key, value in lora_sd.items(): + if network_alpha is None and "alpha" in key: + network_alpha = value + if network_dim is None and "lora_down" in key and len(value.size()) == 2: + network_dim = value.size()[0] + if network_alpha is not None and network_dim is not None: + break + if network_alpha is None: + network_alpha = network_dim - scale = network_alpha/network_dim + scale = network_alpha / network_dim - if dynamic_method: - logger.info(f"Dynamically determining new alphas and dims based off {dynamic_method}: {dynamic_param}, max rank is {new_rank}") + if dynamic_method: + logger.info( + f"Dynamically determining new alphas and dims based off {dynamic_method}: {dynamic_param}, max rank is {new_rank}" + ) - lora_down_weight = None - lora_up_weight = None + lora_down_weight = None + lora_up_weight = None - o_lora_sd = lora_sd.copy() - block_down_name = None - block_up_name = None + o_lora_sd = lora_sd.copy() + block_down_name = None + block_up_name = None - with torch.no_grad(): - for key, value in tqdm(lora_sd.items()): - weight_name = None - if 'lora_down' in key: - block_down_name = key.rsplit('.lora_down', 1)[0] - weight_name = key.rsplit(".", 1)[-1] - lora_down_weight = value - else: - continue + with torch.no_grad(): + for key, value in tqdm(lora_sd.items()): + weight_name = None + if "lora_down" in key: + block_down_name = key.rsplit(".lora_down", 1)[0] + weight_name = key.rsplit(".", 1)[-1] + lora_down_weight = value + else: + continue - # find corresponding lora_up and alpha - block_up_name = block_down_name - lora_up_weight = lora_sd.get(block_up_name + '.lora_up.' + weight_name, None) - lora_alpha = lora_sd.get(block_down_name + '.alpha', None) + # find corresponding lora_up and alpha + block_up_name = block_down_name + lora_up_weight = lora_sd.get(block_up_name + ".lora_up." + weight_name, None) + lora_alpha = lora_sd.get(block_down_name + ".alpha", None) - weights_loaded = (lora_down_weight is not None and lora_up_weight is not None) + weights_loaded = lora_down_weight is not None and lora_up_weight is not None - if weights_loaded: + if weights_loaded: - conv2d = (len(lora_down_weight.size()) == 4) - if lora_alpha is None: - scale = 1.0 - else: - scale = lora_alpha/lora_down_weight.size()[0] + conv2d = len(lora_down_weight.size()) == 4 + if lora_alpha is None: + scale = 1.0 + else: + scale = lora_alpha / lora_down_weight.size()[0] - if conv2d: - full_weight_matrix = merge_conv(lora_down_weight, lora_up_weight, device) - param_dict = extract_conv(full_weight_matrix, new_rank, dynamic_method, dynamic_param, device, scale) - else: - full_weight_matrix = merge_linear(lora_down_weight, lora_up_weight, device) - param_dict = extract_linear(full_weight_matrix, new_rank, dynamic_method, dynamic_param, device, scale) + if conv2d: + full_weight_matrix = merge_conv(lora_down_weight, lora_up_weight, device) + param_dict = extract_conv(full_weight_matrix, new_rank, dynamic_method, dynamic_param, device, scale) + else: + full_weight_matrix = merge_linear(lora_down_weight, lora_up_weight, device) + param_dict = extract_linear(full_weight_matrix, new_rank, dynamic_method, dynamic_param, device, scale) - if verbose: - max_ratio = param_dict['max_ratio'] - sum_retained = param_dict['sum_retained'] - fro_retained = param_dict['fro_retained'] - if not np.isnan(fro_retained): - fro_list.append(float(fro_retained)) + if verbose: + max_ratio = param_dict["max_ratio"] + sum_retained = param_dict["sum_retained"] + fro_retained = param_dict["fro_retained"] + if not np.isnan(fro_retained): + fro_list.append(float(fro_retained)) - verbose_str+=f"{block_down_name:75} | " - verbose_str+=f"sum(S) retained: {sum_retained:.1%}, fro retained: {fro_retained:.1%}, max(S) ratio: {max_ratio:0.1f}" + verbose_str += f"{block_down_name:75} | " + verbose_str += ( + f"sum(S) retained: {sum_retained:.1%}, fro retained: {fro_retained:.1%}, max(S) ratio: {max_ratio:0.1f}" + ) - if verbose and dynamic_method: - verbose_str+=f", dynamic | dim: {param_dict['new_rank']}, alpha: {param_dict['new_alpha']}\n" - else: - verbose_str+=f"\n" + if verbose and dynamic_method: + verbose_str += f", dynamic | dim: {param_dict['new_rank']}, alpha: {param_dict['new_alpha']}\n" + else: + verbose_str += f"\n" - new_alpha = param_dict['new_alpha'] - o_lora_sd[block_down_name + "." + "lora_down.weight"] = param_dict["lora_down"].to(save_dtype).contiguous() - o_lora_sd[block_up_name + "." + "lora_up.weight"] = param_dict["lora_up"].to(save_dtype).contiguous() - o_lora_sd[block_up_name + "." "alpha"] = torch.tensor(param_dict['new_alpha']).to(save_dtype) + new_alpha = param_dict["new_alpha"] + o_lora_sd[block_down_name + "." + "lora_down.weight"] = param_dict["lora_down"].to(save_dtype).contiguous() + o_lora_sd[block_up_name + "." + "lora_up.weight"] = param_dict["lora_up"].to(save_dtype).contiguous() + o_lora_sd[block_up_name + "." "alpha"] = torch.tensor(param_dict["new_alpha"]).to(save_dtype) - block_down_name = None - block_up_name = None - lora_down_weight = None - lora_up_weight = None - weights_loaded = False - del param_dict + block_down_name = None + block_up_name = None + lora_down_weight = None + lora_up_weight = None + weights_loaded = False + del param_dict - if verbose: - logger.info(verbose_str) + if verbose: + logger.info(verbose_str) - logger.info(f"Average Frobenius norm retention: {np.mean(fro_list):.2%} | std: {np.std(fro_list):0.3f}") - logger.info("resizing complete") - return o_lora_sd, network_dim, new_alpha + logger.info(f"Average Frobenius norm retention: {np.mean(fro_list):.2%} | std: {np.std(fro_list):0.3f}") + logger.info("resizing complete") + return o_lora_sd, network_dim, new_alpha def resize(args): - if args.save_to is None or not (args.save_to.endswith('.ckpt') or args.save_to.endswith('.pt') or args.save_to.endswith('.pth') or args.save_to.endswith('.safetensors')): - raise Exception("The --save_to argument must be specified and must be a .ckpt , .pt, .pth or .safetensors file.") + if args.save_to is None or not ( + args.save_to.endswith(".ckpt") + or args.save_to.endswith(".pt") + or args.save_to.endswith(".pth") + or args.save_to.endswith(".safetensors") + ): + raise Exception("The --save_to argument must be specified and must be a .ckpt , .pt, .pth or .safetensors file.") - - def str_to_dtype(p): - if p == 'float': - return torch.float - if p == 'fp16': - return torch.float16 - if p == 'bf16': - return torch.bfloat16 - return None + def str_to_dtype(p): + if p == "float": + return torch.float + if p == "fp16": + return torch.float16 + if p == "bf16": + return torch.bfloat16 + return None - if args.dynamic_method and not args.dynamic_param: - raise Exception("If using dynamic_method, then dynamic_param is required") + if args.dynamic_method and not args.dynamic_param: + raise Exception("If using dynamic_method, then dynamic_param is required") - merge_dtype = str_to_dtype('float') # matmul method above only seems to work in float32 - save_dtype = str_to_dtype(args.save_precision) - if save_dtype is None: - save_dtype = merge_dtype + merge_dtype = str_to_dtype("float") # matmul method above only seems to work in float32 + save_dtype = str_to_dtype(args.save_precision) + if save_dtype is None: + save_dtype = merge_dtype - logger.info("loading Model...") - lora_sd, metadata = load_state_dict(args.model, merge_dtype) + logger.info("loading Model...") + lora_sd, metadata = load_state_dict(args.model, merge_dtype) - logger.info("Resizing Lora...") - state_dict, old_dim, new_alpha = resize_lora_model(lora_sd, args.new_rank, save_dtype, args.device, args.dynamic_method, args.dynamic_param, args.verbose) + logger.info("Resizing Lora...") + state_dict, old_dim, new_alpha = resize_lora_model( + lora_sd, args.new_rank, save_dtype, args.device, args.dynamic_method, args.dynamic_param, args.verbose + ) - # update metadata - if metadata is None: - metadata = {} + # update metadata + if metadata is None: + metadata = {} - comment = metadata.get("ss_training_comment", "") + comment = metadata.get("ss_training_comment", "") - if not args.dynamic_method: - metadata["ss_training_comment"] = f"dimension is resized from {old_dim} to {args.new_rank}; {comment}" - metadata["ss_network_dim"] = str(args.new_rank) - metadata["ss_network_alpha"] = str(new_alpha) - else: - metadata["ss_training_comment"] = f"Dynamic resize with {args.dynamic_method}: {args.dynamic_param} from {old_dim}; {comment}" - metadata["ss_network_dim"] = 'Dynamic' - metadata["ss_network_alpha"] = 'Dynamic' + if not args.dynamic_method: + metadata["ss_training_comment"] = f"dimension is resized from {old_dim} to {args.new_rank}; {comment}" + metadata["ss_network_dim"] = str(args.new_rank) + metadata["ss_network_alpha"] = str(new_alpha) + else: + metadata["ss_training_comment"] = ( + f"Dynamic resize with {args.dynamic_method}: {args.dynamic_param} from {old_dim}; {comment}" + ) + metadata["ss_network_dim"] = "Dynamic" + metadata["ss_network_alpha"] = "Dynamic" - model_hash, legacy_hash = train_util.precalculate_safetensors_hashes(state_dict, metadata) - metadata["sshs_model_hash"] = model_hash - metadata["sshs_legacy_hash"] = legacy_hash + model_hash, legacy_hash = train_util.precalculate_safetensors_hashes(state_dict, metadata) + metadata["sshs_model_hash"] = model_hash + metadata["sshs_legacy_hash"] = legacy_hash - logger.info(f"saving model to: {args.save_to}") - save_to_file(args.save_to, state_dict, state_dict, save_dtype, metadata) + logger.info(f"saving model to: {args.save_to}") + save_to_file(args.save_to, state_dict, state_dict, save_dtype, metadata) def setup_parser() -> argparse.ArgumentParser: - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser() - parser.add_argument("--save_precision", type=str, default=None, - choices=[None, "float", "fp16", "bf16"], help="precision in saving, float if omitted / 保存時の精度、未指定時はfloat") - parser.add_argument("--new_rank", type=int, default=4, - help="Specify rank of output LoRA / 出力するLoRAのrank (dim)") - parser.add_argument("--save_to", type=str, default=None, - help="destination file name: ckpt or safetensors file / 保存先のファイル名、ckptまたはsafetensors") - parser.add_argument("--model", type=str, default=None, - help="LoRA model to resize at to new rank: ckpt or safetensors file / 読み込むLoRAモデル、ckptまたはsafetensors") - parser.add_argument("--device", type=str, default=None, help="device to use, cuda for GPU / 計算を行うデバイス、cuda でGPUを使う") - parser.add_argument("--verbose", action="store_true", - help="Display verbose resizing information / rank変更時の詳細情報を出力する") - parser.add_argument("--dynamic_method", type=str, default=None, choices=[None, "sv_ratio", "sv_fro", "sv_cumulative"], - help="Specify dynamic resizing method, --new_rank is used as a hard limit for max rank") - parser.add_argument("--dynamic_param", type=float, default=None, - help="Specify target for dynamic reduction") - - return parser + parser.add_argument( + "--save_precision", + type=str, + default=None, + choices=[None, "float", "fp16", "bf16"], + help="precision in saving, float if omitted / 保存時の精度、未指定時はfloat", + ) + parser.add_argument("--new_rank", type=int, default=4, help="Specify rank of output LoRA / 出力するLoRAのrank (dim)") + parser.add_argument( + "--save_to", + type=str, + default=None, + help="destination file name: ckpt or safetensors file / 保存先のファイル名、ckptまたはsafetensors", + ) + parser.add_argument( + "--model", + type=str, + default=None, + help="LoRA model to resize at to new rank: ckpt or safetensors file / 読み込むLoRAモデル、ckptまたはsafetensors", + ) + parser.add_argument( + "--device", type=str, default=None, help="device to use, cuda for GPU / 計算を行うデバイス、cuda でGPUを使う" + ) + parser.add_argument( + "--verbose", action="store_true", help="Display verbose resizing information / rank変更時の詳細情報を出力する" + ) + parser.add_argument( + "--dynamic_method", + type=str, + default=None, + choices=[None, "sv_ratio", "sv_fro", "sv_cumulative"], + help="Specify dynamic resizing method, --new_rank is used as a hard limit for max rank", + ) + parser.add_argument("--dynamic_param", type=float, default=None, help="Specify target for dynamic reduction") + + return parser -if __name__ == '__main__': - parser = setup_parser() +if __name__ == "__main__": + parser = setup_parser() - args = parser.parse_args() - resize(args) + args = parser.parse_args() + resize(args) From 52b379998944a9feac072110b05f77c406a2ec1d Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sat, 24 Feb 2024 20:49:41 +0900 Subject: [PATCH 07/10] fix format, add new conv rank to metadata comment --- networks/resize_lora.py | 168 ++++++++++++++++++++++++---------------- 1 file changed, 101 insertions(+), 67 deletions(-) diff --git a/networks/resize_lora.py b/networks/resize_lora.py index 5bf8b3c3..d697baa4 100644 --- a/networks/resize_lora.py +++ b/networks/resize_lora.py @@ -5,10 +5,12 @@ import os import argparse import torch -from safetensors.torch import load_file, save_file +from safetensors.torch import load_file, save_file, safe_open from tqdm import tqdm -from library import train_util import numpy as np + +from library import train_util +from library import model_util from library.utils import setup_logging setup_logging() @@ -36,16 +38,18 @@ def load_state_dict(file_name, dtype): return sd, metadata -def save_to_file(file_name, model, state_dict, dtype, metadata): + +def save_to_file(file_name, state_dict, dtype, metadata): if dtype is not None: for key in list(state_dict.keys()): if type(state_dict[key]) == torch.Tensor: state_dict[key] = state_dict[key].to(dtype) if model_util.is_safetensors(file_name): - save_file(model, file_name, metadata) + save_file(state_dict, file_name, metadata) else: - torch.save(model, file_name) + torch.save(state_dict, file_name) + # Indexing functions @@ -62,18 +66,18 @@ def index_sv_cumulative(S, target): def index_sv_fro(S, target): S_squared = S.pow(2) S_fro_sq = float(torch.sum(S_squared)) - sum_S_squared = torch.cumsum(S_squared, dim=0)/S_fro_sq + sum_S_squared = torch.cumsum(S_squared, dim=0) / S_fro_sq index = int(torch.searchsorted(sum_S_squared, target**2)) + 1 - index = max(1, min(index, len(S)-1)) + index = max(1, min(index, len(S) - 1)) return index def index_sv_ratio(S, target): max_sv = S[0] - min_sv = max_sv/target + min_sv = max_sv / target index = int(torch.sum(S > min_sv).item()) - index = max(1, min(index, len(S)-1)) + index = max(1, min(index, len(S) - 1)) return index @@ -169,10 +173,10 @@ def rank_resize(S, rank, dynamic_method, dynamic_param, scale=1): if S[0] <= MIN_SV: # Zero matrix, set dim to 1 new_rank = 1 - new_alpha = float(scale*new_rank) + new_alpha = float(scale * new_rank) elif new_rank > rank: # cap max rank at rank new_rank = rank - new_alpha = float(scale*new_rank) + new_alpha = float(scale * new_rank) # Calculate resize info s_sum = torch.sum(torch.abs(S)) @@ -200,19 +204,21 @@ def resize_lora_model(lora_sd, new_rank, new_conv_rank, save_dtype, device, dyna # Extract loaded lora dim and alpha for key, value in lora_sd.items(): - if network_alpha is None and 'alpha' in key: + if network_alpha is None and "alpha" in key: network_alpha = value - if network_dim is None and 'lora_down' in key and len(value.size()) == 2: + if network_dim is None and "lora_down" in key and len(value.size()) == 2: network_dim = value.size()[0] if network_alpha is not None and network_dim is not None: break if network_alpha is None: network_alpha = network_dim - scale = network_alpha/network_dim + scale = network_alpha / network_dim if dynamic_method: - logger.info(f"Dynamically determining new alphas and dims based off {dynamic_method}: {dynamic_param}, max rank is {new_rank}") + logger.info( + f"Dynamically determining new alphas and dims based off {dynamic_method}: {dynamic_param}, max rank is {new_rank}" + ) lora_down_weight = None lora_up_weight = None @@ -224,8 +230,8 @@ def resize_lora_model(lora_sd, new_rank, new_conv_rank, save_dtype, device, dyna with torch.no_grad(): for key, value in tqdm(lora_sd.items()): weight_name = None - if 'lora_down' in key: - block_down_name = key.rsplit('.lora_down', 1)[0] + if "lora_down" in key: + block_down_name = key.rsplit(".lora_down", 1)[0] weight_name = key.rsplit(".", 1)[-1] lora_down_weight = value else: @@ -233,18 +239,18 @@ def resize_lora_model(lora_sd, new_rank, new_conv_rank, save_dtype, device, dyna # find corresponding lora_up and alpha block_up_name = block_down_name - lora_up_weight = lora_sd.get(block_up_name + '.lora_up.' + weight_name, None) - lora_alpha = lora_sd.get(block_down_name + '.alpha', None) + lora_up_weight = lora_sd.get(block_up_name + ".lora_up." + weight_name, None) + lora_alpha = lora_sd.get(block_down_name + ".alpha", None) - weights_loaded = (lora_down_weight is not None and lora_up_weight is not None) + weights_loaded = lora_down_weight is not None and lora_up_weight is not None if weights_loaded: - conv2d = (len(lora_down_weight.size()) == 4) + conv2d = len(lora_down_weight.size()) == 4 if lora_alpha is None: scale = 1.0 else: - scale = lora_alpha/lora_down_weight.size()[0] + scale = lora_alpha / lora_down_weight.size()[0] if conv2d: full_weight_matrix = merge_conv(lora_down_weight, lora_up_weight, device) @@ -254,24 +260,26 @@ def resize_lora_model(lora_sd, new_rank, new_conv_rank, save_dtype, device, dyna param_dict = extract_linear(full_weight_matrix, new_rank, dynamic_method, dynamic_param, device, scale) if verbose: - max_ratio = param_dict['max_ratio'] - sum_retained = param_dict['sum_retained'] - fro_retained = param_dict['fro_retained'] + max_ratio = param_dict["max_ratio"] + sum_retained = param_dict["sum_retained"] + fro_retained = param_dict["fro_retained"] if not np.isnan(fro_retained): fro_list.append(float(fro_retained)) verbose_str += f"{block_down_name:75} | " - verbose_str += f"sum(S) retained: {sum_retained:.1%}, fro retained: {fro_retained:.1%}, max(S) ratio: {max_ratio:0.1f}" + verbose_str += ( + f"sum(S) retained: {sum_retained:.1%}, fro retained: {fro_retained:.1%}, max(S) ratio: {max_ratio:0.1f}" + ) if verbose and dynamic_method: verbose_str += f", dynamic | dim: {param_dict['new_rank']}, alpha: {param_dict['new_alpha']}\n" else: verbose_str += "\n" - new_alpha = param_dict['new_alpha'] + new_alpha = param_dict["new_alpha"] o_lora_sd[block_down_name + "." + "lora_down.weight"] = param_dict["lora_down"].to(save_dtype).contiguous() o_lora_sd[block_up_name + "." + "lora_up.weight"] = param_dict["lora_up"].to(save_dtype).contiguous() - o_lora_sd[block_up_name + "." "alpha"] = torch.tensor(param_dict['new_alpha']).to(save_dtype) + o_lora_sd[block_up_name + "." "alpha"] = torch.tensor(param_dict["new_alpha"]).to(save_dtype) block_down_name = None block_up_name = None @@ -281,38 +289,36 @@ def resize_lora_model(lora_sd, new_rank, new_conv_rank, save_dtype, device, dyna del param_dict if verbose: - logger.info(verbose_str) - - logger.info(f"Average Frobenius norm retention: {np.mean(fro_list):.2%} | std: {np.std(fro_list):0.3f}") + print(verbose_str) + print(f"Average Frobenius norm retention: {np.mean(fro_list):.2%} | std: {np.std(fro_list):0.3f}") logger.info("resizing complete") return o_lora_sd, network_dim, new_alpha def resize(args): - if ( - args.save_to is None or - not (args.save_to.endswith('.ckpt') or - args.save_to.endswith('.pt') or - args.save_to.endswith('.pth') or - args.save_to.endswith('.safetensors')) - ): + if args.save_to is None or not ( + args.save_to.endswith(".ckpt") + or args.save_to.endswith(".pt") + or args.save_to.endswith(".pth") + or args.save_to.endswith(".safetensors") + ): raise Exception("The --save_to argument must be specified and must be a .ckpt , .pt, .pth or .safetensors file.") args.new_conv_rank = args.new_conv_rank if args.new_conv_rank is not None else args.new_rank def str_to_dtype(p): - if p == 'float': + if p == "float": return torch.float - if p == 'fp16': + if p == "fp16": return torch.float16 - if p == 'bf16': + if p == "bf16": return torch.bfloat16 return None if args.dynamic_method and not args.dynamic_param: raise Exception("If using dynamic_method, then dynamic_param is required") - merge_dtype = str_to_dtype('float') # matmul method above only seems to work in float32 + merge_dtype = str_to_dtype("float") # matmul method above only seems to work in float32 save_dtype = str_to_dtype(args.save_precision) if save_dtype is None: save_dtype = merge_dtype @@ -321,7 +327,9 @@ def resize(args): lora_sd, metadata = load_state_dict(args.model, merge_dtype) logger.info("Resizing Lora...") - state_dict, old_dim, new_alpha = resize_lora_model(lora_sd, args.new_rank, args.new_conv_rank, save_dtype, args.device, args.dynamic_method, args.dynamic_param, args.verbose) + state_dict, old_dim, new_alpha = resize_lora_model( + lora_sd, args.new_rank, args.new_conv_rank, save_dtype, args.device, args.dynamic_method, args.dynamic_param, args.verbose + ) # update metadata if metadata is None: @@ -330,47 +338,73 @@ def resize(args): comment = metadata.get("ss_training_comment", "") if not args.dynamic_method: - metadata["ss_training_comment"] = f"dimension is resized from {old_dim} to {args.new_rank}; {comment}" + conv_desc = "" if args.new_rank == args.new_conv_rank else f" (conv: {args.new_conv_rank})" + metadata["ss_training_comment"] = f"dimension is resized from {old_dim} to {args.new_rank}{conv_desc}; {comment}" metadata["ss_network_dim"] = str(args.new_rank) metadata["ss_network_alpha"] = str(new_alpha) else: - metadata["ss_training_comment"] = f"Dynamic resize with {args.dynamic_method}: {args.dynamic_param} from {old_dim}; {comment}" - metadata["ss_network_dim"] = 'Dynamic' - metadata["ss_network_alpha"] = 'Dynamic' + metadata["ss_training_comment"] = ( + f"Dynamic resize with {args.dynamic_method}: {args.dynamic_param} from {old_dim}; {comment}" + ) + metadata["ss_network_dim"] = "Dynamic" + metadata["ss_network_alpha"] = "Dynamic" model_hash, legacy_hash = train_util.precalculate_safetensors_hashes(state_dict, metadata) metadata["sshs_model_hash"] = model_hash metadata["sshs_legacy_hash"] = legacy_hash logger.info(f"saving model to: {args.save_to}") - save_to_file(args.save_to, state_dict, state_dict, save_dtype, metadata) + save_to_file(args.save_to, state_dict, save_dtype, metadata) def setup_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser() - parser.add_argument("--save_precision", type=str, default=None, - choices=[None, "float", "fp16", "bf16"], help="precision in saving, float if omitted / 保存時の精度、未指定時はfloat") - parser.add_argument("--new_rank", type=int, default=4, - help="Specify rank of output LoRA / 出力するLoRAのrank (dim)") - parser.add_argument("--new_conv_rank", type=int, default=None, - help="Specify rank of output LoRA for Conv2d 3x3, None for same as new_rank / 出力するConv2D 3x3 LoRAのrank (dim)、Noneでnew_rankと同じ") - parser.add_argument("--save_to", type=str, default=None, - help="destination file name: ckpt or safetensors file / 保存先のファイル名、ckptまたはsafetensors") - parser.add_argument("--model", type=str, default=None, - help="LoRA model to resize at to new rank: ckpt or safetensors file / 読み込むLoRAモデル、ckptまたはsafetensors") - parser.add_argument("--device", type=str, default=None, help="device to use, cuda for GPU / 計算を行うデバイス、cuda でGPUを使う") - parser.add_argument("--verbose", action="store_true", - help="Display verbose resizing information / rank変更時の詳細情報を出力する") - parser.add_argument("--dynamic_method", type=str, default=None, choices=[None, "sv_ratio", "sv_fro", "sv_cumulative"], - help="Specify dynamic resizing method, --new_rank is used as a hard limit for max rank") - parser.add_argument("--dynamic_param", type=float, default=None, - help="Specify target for dynamic reduction") + parser.add_argument( + "--save_precision", + type=str, + default=None, + choices=[None, "float", "fp16", "bf16"], + help="precision in saving, float if omitted / 保存時の精度、未指定時はfloat", + ) + parser.add_argument("--new_rank", type=int, default=4, help="Specify rank of output LoRA / 出力するLoRAのrank (dim)") + parser.add_argument( + "--new_conv_rank", + type=int, + default=None, + help="Specify rank of output LoRA for Conv2d 3x3, None for same as new_rank / 出力するConv2D 3x3 LoRAのrank (dim)、Noneでnew_rankと同じ", + ) + parser.add_argument( + "--save_to", + type=str, + default=None, + help="destination file name: ckpt or safetensors file / 保存先のファイル名、ckptまたはsafetensors", + ) + parser.add_argument( + "--model", + type=str, + default=None, + help="LoRA model to resize at to new rank: ckpt or safetensors file / 読み込むLoRAモデル、ckptまたはsafetensors", + ) + parser.add_argument( + "--device", type=str, default=None, help="device to use, cuda for GPU / 計算を行うデバイス、cuda でGPUを使う" + ) + parser.add_argument( + "--verbose", action="store_true", help="Display verbose resizing information / rank変更時の詳細情報を出力する" + ) + parser.add_argument( + "--dynamic_method", + type=str, + default=None, + choices=[None, "sv_ratio", "sv_fro", "sv_cumulative"], + help="Specify dynamic resizing method, --new_rank is used as a hard limit for max rank", + ) + parser.add_argument("--dynamic_param", type=float, default=None, help="Specify target for dynamic reduction") return parser - -if __name__ == '__main__': + +if __name__ == "__main__": parser = setup_parser() args = parser.parse_args() From 8b7c14246ae4e48375ffde13e4fdce2c8cd29b17 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sat, 24 Feb 2024 20:50:00 +0900 Subject: [PATCH 08/10] some log output to print --- networks/check_lora_weights.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/networks/check_lora_weights.py b/networks/check_lora_weights.py index 6ec60a89..794659c9 100644 --- a/networks/check_lora_weights.py +++ b/networks/check_lora_weights.py @@ -20,16 +20,16 @@ def main(file): for key in keys: if "lora_up" in key or "lora_down" in key: values.append((key, sd[key])) - logger.info(f"number of LoRA modules: {len(values)}") + print(f"number of LoRA modules: {len(values)}") if args.show_all_keys: for key in [k for k in keys if k not in values]: values.append((key, sd[key])) - logger.info(f"number of all modules: {len(values)}") + print(f"number of all modules: {len(values)}") for key, value in values: value = value.to(torch.float32) - logger.info(f"{key},{str(tuple(value.size())).replace(', ', '-')},{torch.mean(torch.abs(value))},{torch.min(torch.abs(value))}") + print(f"{key},{str(tuple(value.size())).replace(', ', '-')},{torch.mean(torch.abs(value))},{torch.min(torch.abs(value))}") def setup_parser() -> argparse.ArgumentParser: From 81e8af651942c44e7ef2f0068b497de2d12cf0fa Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sat, 24 Feb 2024 20:51:26 +0900 Subject: [PATCH 09/10] fix ipex init --- gen_img.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gen_img.py b/gen_img.py index a24220a0..daf88d2a 100644 --- a/gen_img.py +++ b/gen_img.py @@ -20,7 +20,7 @@ import diffusers import numpy as np import torch -from library.ipex_interop import init_ipex +from library.device_utils import init_ipex, clean_memory, get_preferred_device init_ipex() @@ -338,7 +338,7 @@ class PipelineLike: self.clip_vision_model: CLIPVisionModelWithProjection = None self.clip_vision_processor: CLIPImageProcessor = None self.clip_vision_strength = 0.0 - + # Textual Inversion self.token_replacements_list = [] for _ in range(len(self.text_encoders)): From a21218bdd5d451f431c35577f4cccf8c60862ef8 Mon Sep 17 00:00:00 2001 From: Kohya S Date: Sat, 24 Feb 2024 21:09:59 +0900 Subject: [PATCH 10/10] update readme --- README.md | 45 +++++---------------------------------------- 1 file changed, 5 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 4a53c154..e635e5ae 100644 --- a/README.md +++ b/README.md @@ -249,7 +249,7 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum ## Change History -### Working in progress +### Feb 24, 2024 / 2024/2/24: v0.8.4 - The log output has been improved. PR [#905](https://github.com/kohya-ss/sd-scripts/pull/905) Thanks to shirayu! - The log is formatted by default. The `rich` library is required. Please see [Upgrade](#upgrade) and update the library. @@ -260,10 +260,12 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - `--console_log_file` option can be used to output the log to a file. The default is `None` (output to the console). - The sample image generation during multi-GPU training is now done with multiple GPUs. PR [#1061](https://github.com/kohya-ss/sd-scripts/pull/1061) Thanks to DKnight54! - The support for mps devices is improved. PR [#1054](https://github.com/kohya-ss/sd-scripts/pull/1054) Thanks to akx! If mps device exists instead of CUDA, the mps device is used automatically. +- The `--new_conv_rank` option to specify the new rank of Conv2d is added to `networks/resize_lora.py`. PR [#1102](https://github.com/kohya-ss/sd-scripts/pull/1102) Thanks to mgz-dev! - An option `--highvram` to disable the optimization for environments with little VRAM is added to the training scripts. If you specify it when there is enough VRAM, the operation will be faster. - Currently, only the cache part of latents is optimized. - The IPEX support is improved. PR [#1086](https://github.com/kohya-ss/sd-scripts/pull/1086) Thanks to Disty0! - Fixed a bug that `svd_merge_lora.py` crashes in some cases. PR [#1087](https://github.com/kohya-ss/sd-scripts/pull/1087) Thanks to mgz-dev! +- DyLoRA is fixed to work with SDXL. PR [#1126](https://github.com/kohya-ss/sd-scripts/pull/1126) Thanks to tamlog06! - The common image generation script `gen_img.py` for SD 1/2 and SDXL is added. The basic functions are the same as the scripts for SD 1/2 and SDXL, but some new features are added. - External scripts to generate prompts can be supported. It can be called with `--from_module` option. (The documentation will be added later) - The normalization method after prompt weighting can be specified with `--emb_normalize_mode` option. `original` is the original method, `abs` is the normalization with the average of the absolute values, `none` is no normalization. @@ -278,10 +280,12 @@ ControlNet-LLLite, a novel method for ControlNet with SDXL, is added. See [docum - `--console_log_file` でログファイルを出力できます。デフォルトは `None`(コンソールに出力) です。 - 複数 GPU 学習時に学習中のサンプル画像生成を複数 GPU で行うようになりました。 PR [#1061](https://github.com/kohya-ss/sd-scripts/pull/1061) DKnight54 氏に感謝します。 - mps デバイスのサポートが改善されました。 PR [#1054](https://github.com/kohya-ss/sd-scripts/pull/1054) akx 氏に感謝します。CUDA ではなく mps が存在する場合には自動的に mps デバイスを使用します。 +- `networks/resize_lora.py` に Conv2d の新しいランクを指定するオプション `--new_conv_rank` が追加されました。 PR [#1102](https://github.com/kohya-ss/sd-scripts/pull/1102) mgz-dev 氏に感謝します。 - 学習スクリプトに VRAMが少ない環境向け最適化を無効にするオプション `--highvram` を追加しました。VRAM に余裕がある場合に指定すると動作が高速化されます。 - 現在は latents のキャッシュ部分のみ高速化されます。 - IPEX サポートが改善されました。 PR [#1086](https://github.com/kohya-ss/sd-scripts/pull/1086) Disty0 氏に感謝します。 - `svd_merge_lora.py` が場合によってエラーになる不具合が修正されました。 PR [#1087](https://github.com/kohya-ss/sd-scripts/pull/1087) mgz-dev 氏に感謝します。 +- DyLoRA が SDXL で動くよう修正されました。PR [#1126](https://github.com/kohya-ss/sd-scripts/pull/1126) tamlog06 氏に感謝します。 - SD 1/2 および SDXL 共通の生成スクリプト `gen_img.py` を追加しました。基本的な機能は SD 1/2、SDXL 向けスクリプトと同じですが、いくつかの新機能が追加されています。 - プロンプトを動的に生成する外部スクリプトをサポートしました。 `--from_module` で呼び出せます。(ドキュメントはのちほど追加します) - プロンプト重みづけ後の正規化方法を `--emb_normalize_mode` で指定できます。`original` は元の方法、`abs` は絶対値の平均値で正規化、`none` は正規化を行いません。 @@ -358,45 +362,6 @@ network_multiplier = -1.0 ``` -### Jan 17, 2024 / 2024/1/17: v0.8.1 - -- Fixed a bug that the VRAM usage without Text Encoder training is larger than before in training scripts for LoRA etc (`train_network.py`, `sdxl_train_network.py`). - - Text Encoders were not moved to CPU. -- Fixed typos. Thanks to akx! [PR #1053](https://github.com/kohya-ss/sd-scripts/pull/1053) - -- LoRA 等の学習スクリプト(`train_network.py`、`sdxl_train_network.py`)で、Text Encoder を学習しない場合の VRAM 使用量が以前に比べて大きくなっていた不具合を修正しました。 - - Text Encoder が GPU に保持されたままになっていました。 -- 誤字が修正されました。 [PR #1053](https://github.com/kohya-ss/sd-scripts/pull/1053) akx 氏に感謝します。 - -### Jan 15, 2024 / 2024/1/15: v0.8.0 - -- Diffusers, Accelerate, Transformers and other related libraries have been updated. Please update the libraries with [Upgrade](#upgrade). - - Some model files (Text Encoder without position_id) based on the latest Transformers can be loaded. -- `torch.compile` is supported (experimental). PR [#1024](https://github.com/kohya-ss/sd-scripts/pull/1024) Thanks to p1atdev! - - This feature works only on Linux or WSL. - - Please specify `--torch_compile` option in each training script. - - You can select the backend with `--dynamo_backend` option. The default is `"inductor"`. `inductor` or `eager` seems to work. - - Please use `--sdpa` option instead of `--xformers` option. - - PyTorch 2.1 or later is recommended. - - Please see [PR](https://github.com/kohya-ss/sd-scripts/pull/1024) for details. -- The session name for wandb can be specified with `--wandb_run_name` option. PR [#1032](https://github.com/kohya-ss/sd-scripts/pull/1032) Thanks to hopl1t! -- IPEX library is updated. PR [#1030](https://github.com/kohya-ss/sd-scripts/pull/1030) Thanks to Disty0! -- Fixed a bug that Diffusers format model cannot be saved. - -- Diffusers、Accelerate、Transformers 等の関連ライブラリを更新しました。[Upgrade](#upgrade) を参照し更新をお願いします。 - - 最新の Transformers を前提とした一部のモデルファイル(Text Encoder が position_id を持たないもの)が読み込めるようになりました。 -- `torch.compile` がサポートされしました(実験的)。 PR [#1024](https://github.com/kohya-ss/sd-scripts/pull/1024) p1atdev 氏に感謝します。 - - Linux または WSL でのみ動作します。 - - 各学習スクリプトで `--torch_compile` オプションを指定してください。 - - `--dynamo_backend` オプションで使用される backend を選択できます。デフォルトは `"inductor"` です。 `inductor` または `eager` が動作するようです。 - - `--xformers` オプションとは互換性がありません。 代わりに `--sdpa` オプションを使用してください。 - - PyTorch 2.1以降を推奨します。 - - 詳細は [PR](https://github.com/kohya-ss/sd-scripts/pull/1024) をご覧ください。 -- wandb 保存時のセッション名が各学習スクリプトの `--wandb_run_name` オプションで指定できるようになりました。 PR [#1032](https://github.com/kohya-ss/sd-scripts/pull/1032) hopl1t 氏に感謝します。 -- IPEX ライブラリが更新されました。[PR #1030](https://github.com/kohya-ss/sd-scripts/pull/1030) Disty0 氏に感謝します。 -- Diffusers 形式でのモデル保存ができなくなっていた不具合を修正しました。 - - Please read [Releases](https://github.com/kohya-ss/sd-scripts/releases) for recent updates. 最近の更新情報は [Release](https://github.com/kohya-ss/sd-scripts/releases) をご覧ください。