mirror of
https://github.com/kohya-ss/sd-scripts.git
synced 2026-04-09 06:45:09 +00:00
fix tokenizer 2 is not same as open clip tokenizer
This commit is contained in:
@@ -114,7 +114,7 @@ def load_tokenizers(args: argparse.Namespace):
|
|||||||
|
|
||||||
original_paths = [TOKENIZER1_PATH, TOKENIZER2_PATH]
|
original_paths = [TOKENIZER1_PATH, TOKENIZER2_PATH]
|
||||||
tokeniers = []
|
tokeniers = []
|
||||||
for original_path in original_paths:
|
for i, original_path in enumerate(original_paths):
|
||||||
tokenizer: CLIPTokenizer = None
|
tokenizer: CLIPTokenizer = None
|
||||||
if args.tokenizer_cache_dir:
|
if args.tokenizer_cache_dir:
|
||||||
local_tokenizer_path = os.path.join(args.tokenizer_cache_dir, original_path.replace("/", "_"))
|
local_tokenizer_path = os.path.join(args.tokenizer_cache_dir, original_path.replace("/", "_"))
|
||||||
@@ -129,6 +129,9 @@ def load_tokenizers(args: argparse.Namespace):
|
|||||||
print(f"save Tokenizer to cache: {local_tokenizer_path}")
|
print(f"save Tokenizer to cache: {local_tokenizer_path}")
|
||||||
tokenizer.save_pretrained(local_tokenizer_path)
|
tokenizer.save_pretrained(local_tokenizer_path)
|
||||||
|
|
||||||
|
if i == 1:
|
||||||
|
tokenizer.pad_token_id = 0 # fix pad token id to make same as open clip tokenizer
|
||||||
|
|
||||||
tokeniers.append(tokenizer)
|
tokeniers.append(tokenizer)
|
||||||
|
|
||||||
if hasattr(args, "max_token_length") and args.max_token_length is not None:
|
if hasattr(args, "max_token_length") and args.max_token_length is not None:
|
||||||
|
|||||||
Reference in New Issue
Block a user