Add non_blocking to loading and moving tensors

This commit is contained in:
rockerBOO
2025-10-10 14:50:27 -04:00
parent 5e366acda4
commit d4081b2e66
9 changed files with 215 additions and 113 deletions

View File

@@ -53,7 +53,7 @@ def swap_weight_devices_cuda(device: torch.device, layer_to_cpu: nn.Module, laye
# print(
# f"Module {module_to_cuda_name} not found in CPU model or shape mismatch, so not swapping and moving to device"
# )
module_to_cuda.weight.data = module_to_cuda.weight.data.to(device)
module_to_cuda.weight.data = module_to_cuda.weight.data.to(device, non_blocking=True)
torch.cuda.current_stream().synchronize() # this prevents the illegal loss value