feat: SD1.x/2.x と SDXL 向けの LECO 学習スクリプトを追加 (#2285)

* Add LECO training script and associated tests

- Implemented `sdxl_train_leco.py` for training with LECO prompts, including argument parsing, model setup, training loop, and weight saving functionality.
- Created unit tests for `load_prompt_settings` in `test_leco_train_util.py` to validate loading of prompt configurations in both original and slider formats.
- Added basic syntax tests for `train_leco.py` and `sdxl_train_leco.py` to ensure modules are importable.

* fix: use getattr for safe attribute access in argument verification

* feat: add CUDA device compatibility validation and corresponding tests

* Revert "feat: add CUDA device compatibility validation and corresponding tests"

This reverts commit 6d3e51431b.

* feat: update predict_noise_xl to use vector embedding from add_time_ids

* feat: implement checkpointing in predict_noise and predict_noise_xl functions

* feat: remove unused submodules and update .gitignore to exclude .codex-tmp

---------

Co-authored-by: Kohya S. <52813779+kohya-ss@users.noreply.github.com>
This commit is contained in:
Umisetokikaze
2026-03-28 19:25:16 +09:00
committed by GitHub
parent feb38356ea
commit 4ea6032c66
10 changed files with 1574 additions and 12 deletions

View File

@@ -0,0 +1,114 @@
from pathlib import Path
import torch
from library.leco_train_util import load_prompt_settings
def test_load_prompt_settings_with_original_format(tmp_path: Path):
prompt_file = tmp_path / "prompts.yaml"
prompt_file.write_text(
"""
- target: "van gogh"
guidance_scale: 1.5
resolution: 512
""".strip(),
encoding="utf-8",
)
prompts = load_prompt_settings(prompt_file)
assert len(prompts) == 1
assert prompts[0].target == "van gogh"
assert prompts[0].positive == "van gogh"
assert prompts[0].unconditional == ""
assert prompts[0].neutral == ""
assert prompts[0].action == "erase"
assert prompts[0].guidance_scale == 1.5
def test_load_prompt_settings_with_slider_targets(tmp_path: Path):
prompt_file = tmp_path / "slider.yaml"
prompt_file.write_text(
"""
targets:
- target_class: ""
positive: "high detail"
negative: "low detail"
multiplier: 1.25
weight: 0.5
guidance_scale: 2.0
resolution: 768
neutral: ""
""".strip(),
encoding="utf-8",
)
prompts = load_prompt_settings(prompt_file)
assert len(prompts) == 4
first = prompts[0]
second = prompts[1]
third = prompts[2]
fourth = prompts[3]
assert first.target == ""
assert first.positive == "low detail"
assert first.unconditional == "high detail"
assert first.action == "erase"
assert first.multiplier == 1.25
assert first.weight == 0.5
assert first.get_resolution() == (768, 768)
assert second.positive == "high detail"
assert second.unconditional == "low detail"
assert second.action == "enhance"
assert second.multiplier == 1.25
assert third.action == "erase"
assert third.multiplier == -1.25
assert fourth.action == "enhance"
assert fourth.multiplier == -1.25
def test_predict_noise_xl_uses_vector_embedding_from_add_time_ids():
from library import sdxl_train_util
from library.leco_train_util import PromptEmbedsXL, predict_noise_xl
class DummyScheduler:
def scale_model_input(self, latent_model_input, timestep):
return latent_model_input
class DummyUNet:
def __call__(self, x, timesteps, context, y):
self.x = x
self.timesteps = timesteps
self.context = context
self.y = y
return torch.zeros_like(x)
latents = torch.randn(1, 4, 8, 8)
prompt_embeds = PromptEmbedsXL(
text_embeds=torch.randn(2, 77, 2048),
pooled_embeds=torch.randn(2, 1280),
)
add_time_ids = torch.tensor(
[
[1024, 1024, 0, 0, 1024, 1024],
[1024, 1024, 0, 0, 1024, 1024],
],
dtype=prompt_embeds.pooled_embeds.dtype,
)
unet = DummyUNet()
noise_pred = predict_noise_xl(unet, DummyScheduler(), torch.tensor(10), latents, prompt_embeds, add_time_ids)
expected_size_embeddings = sdxl_train_util.get_size_embeddings(
add_time_ids[:, :2], add_time_ids[:, 2:4], add_time_ids[:, 4:6], latents.device
).to(prompt_embeds.pooled_embeds.dtype)
assert noise_pred.shape == latents.shape
assert unet.context is prompt_embeds.text_embeds
assert torch.equal(unet.y, torch.cat([prompt_embeds.pooled_embeds, expected_size_embeddings], dim=1))

View File

@@ -0,0 +1,16 @@
import sdxl_train_leco
from library import deepspeed_utils, sdxl_train_util, train_util
def test_syntax():
assert sdxl_train_leco is not None
def test_setup_parser_supports_shared_training_validation():
args = sdxl_train_leco.setup_parser().parse_args(["--prompts_file", "slider.yaml"])
train_util.verify_training_args(args)
sdxl_train_util.verify_sdxl_training_args(args, support_text_encoder_caching=False)
assert args.min_snr_gamma is None
assert deepspeed_utils.prepare_deepspeed_plugin(args) is None

15
tests/test_train_leco.py Normal file
View File

@@ -0,0 +1,15 @@
import train_leco
from library import deepspeed_utils, train_util
def test_syntax():
assert train_leco is not None
def test_setup_parser_supports_shared_training_validation():
args = train_leco.setup_parser().parse_args(["--prompts_file", "slider.yaml"])
train_util.verify_training_args(args)
assert args.min_snr_gamma is None
assert deepspeed_utils.prepare_deepspeed_plugin(args) is None