more fixes for controlnet

brianfitzgerald · brianfitzgerald · commit 232df687059a · 2024-11-20T21:25:25.000Z
diff --git a/.gitignore b/.gitignore
@@ -167,4 +167,5 @@ cython_debug/
 #.idea/
 
 .vscode/
-*.out.*
+*.out.*
+*.pt
diff --git a/evaluate.py b/evaluate.py
@@ -51,7 +51,6 @@ def load_pickles(folder_path):
     # Filter and sort the files
     pickle_files = sorted(
         [file for file in files if file.startswith("data_") and file.endswith(".pkl")],
-        key=lambda x: int(x.split("_")[1].split(".")[0]),
     )
 
     data_list = []
@@ -124,7 +123,7 @@ def main(
         controlnet_ckpt,
         model_folder,
         text_encoder_device,
-        load_tokenizers=True,
+        load_tokenizers=False,
     )
 
     print(f"Saving images to {out_dir}")
@@ -150,8 +149,8 @@ def _get_precomputed_cond(sample):
     # torch.save(neg_cond[0], os.path.join(out_dir, "neg_cond_0.pt"))
     # torch.save(neg_cond[1], os.path.join(out_dir, "neg_cond_1.pt"))
     neg_cond = (
-        torch.load(os.path.join("outputs", "neg_cond_0.pt")),
-        torch.load(os.path.join("outputs", "neg_cond_1.pt")),
+        torch.load("neg_cond_0.pt"),
+        torch.load("neg_cond_1.pt"),
     )
 
     for i, sample in tqdm(enumerate(dataset)):
@@ -161,7 +160,9 @@ def _get_precomputed_cond(sample):
         else:
             latent = inferencer.get_empty_latent(1, width, height, seed, "cpu")
             latent = latent.cuda()
-        controlnet_cond = inferencer.vae_encode_tensor(sample["vae_f8_ch16.cond.sft.latent"])
+        controlnet_cond = inferencer.vae_encode_tensor(
+            sample["vae_f8_ch16.cond.sft.latent"]
+        )
         conditioning = _get_precomputed_cond(sample)
         seed_num = 42
         sampled_latent = inferencer.do_sampling(
diff --git a/sd3_infer.py b/sd3_infer.py
@@ -382,8 +382,7 @@ def vae_encode(self, image, controlnet_cond: bool = False) -> torch.Tensor:
         image_np = np.moveaxis(image_np, 2, 0)
         batch_images = np.expand_dims(image_np, axis=0).repeat(1, axis=0)
         image_torch = torch.from_numpy(batch_images).cuda()
-        if not controlnet_cond:
-            image_torch = 2.0 * image_torch - 1.0
+        image_torch = 2.0 * image_torch - 1.0
         image_torch = image_torch.cuda()
         self.vae.model = self.vae.model.cuda()
         latent = self.vae.model.encode(image_torch).cpu()
@@ -400,9 +399,9 @@ def vae_encode_pkl(self, pkl_location: str) -> torch.Tensor:
         return latent
 
     def vae_encode_tensor(self, tensor: torch.Tensor) -> torch.Tensor:
-        latent, _ = DiagonalGaussianRegularizer()(tensor)
-        latent = SD3LatentFormat().process_in(latent)
-        return latent
+        tensor, _ = DiagonalGaussianRegularizer()(tensor)
+        tensor = SD3LatentFormat().process_in(tensor)
+        return tensor
 
     def vae_decode(self, latent) -> Image.Image:
         self.print("Decoding latent to image...")

-Original file line number
+Diff line change
 #.idea/
 .vscode/
 -*.out.*
 +*.out.*
 +*.pt