mirror of
https://github.com/kohya-ss/sd-scripts.git
synced 2026-04-08 22:35:09 +00:00
fix tokenizer 2 is not same as open clip tokenizer
This commit is contained in:
@@ -114,7 +114,7 @@ def load_tokenizers(args: argparse.Namespace):
|
||||
|
||||
original_paths = [TOKENIZER1_PATH, TOKENIZER2_PATH]
|
||||
tokeniers = []
|
||||
for original_path in original_paths:
|
||||
for i, original_path in enumerate(original_paths):
|
||||
tokenizer: CLIPTokenizer = None
|
||||
if args.tokenizer_cache_dir:
|
||||
local_tokenizer_path = os.path.join(args.tokenizer_cache_dir, original_path.replace("/", "_"))
|
||||
@@ -129,6 +129,9 @@ def load_tokenizers(args: argparse.Namespace):
|
||||
print(f"save Tokenizer to cache: {local_tokenizer_path}")
|
||||
tokenizer.save_pretrained(local_tokenizer_path)
|
||||
|
||||
if i == 1:
|
||||
tokenizer.pad_token_id = 0 # fix pad token id to make same as open clip tokenizer
|
||||
|
||||
tokeniers.append(tokenizer)
|
||||
|
||||
if hasattr(args, "max_token_length") and args.max_token_length is not None:
|
||||
|
||||
Reference in New Issue
Block a user