1313
1414# Debugging notes - the original method apply_model is being called for sd1.5 is in modules.sd_hijack_utils and is ldm.models.diffusion.ddpm.LatentDiffusion
1515# For sdxl - OpenAIWrapper will be called, which will call the underlying diffusion_model
16-
16+ # When controlnet is enabled, the underlying model is not available to use, therefore we skip
1717
1818def find_noise_for_image (p , cond , uncond , cfg_scale , steps ):
1919 x = p .init_latent
@@ -78,11 +78,11 @@ def find_noise_for_image(p, cond, uncond, cfg_scale, steps):
7878 return x / x .std ()
7979
8080
81- Cached = namedtuple ("Cached" , ["noise" , "cfg_scale" , "steps" , "latent" , "original_prompt" , "original_negative_prompt" , "sigma_adjustment" ])
81+ Cached = namedtuple ("Cached" , ["noise" , "cfg_scale" , "steps" , "latent" , "original_prompt" , "original_negative_prompt" , "sigma_adjustment" , "second_order_correction" , "noise_sigma_intensity" ])
8282
8383
8484# Based on changes suggested by briansemrau in https://github.com/AUTOMATIC1111/stable-diffusion-webui/issues/736
85- def find_noise_for_image_sigma_adjustment (p , cond , uncond , cfg_scale , steps ):
85+ def find_noise_for_image_sigma_adjustment (p , cond , uncond , cfg_scale , steps , correction_factor , sigma_intensity ):
8686 x = p .init_latent
8787
8888 s_in = x .new_ones ([x .shape [0 ]])
@@ -98,11 +98,7 @@ def find_noise_for_image_sigma_adjustment(p, cond, uncond, cfg_scale, steps):
9898
9999 for i in trange (1 , len (sigmas )):
100100 shared .state .sampling_step += 1
101-
102- x_in = torch .cat ([x ] * 2 )
103101 sigma_in = torch .cat ([sigmas [i - 1 ] * s_in ] * 2 )
104-
105-
106102 if shared .sd_model .is_sdxl :
107103 cond_tensor = cond ['crossattn' ]
108104 uncond_tensor = uncond ['crossattn' ]
@@ -113,46 +109,73 @@ def find_noise_for_image_sigma_adjustment(p, cond, uncond, cfg_scale, steps):
113109 image_conditioning = torch .cat ([p .image_conditioning ] * 2 )
114110 cond_in = {"c_concat" : [image_conditioning ], "c_crossattn" : [cond_in ]}
115111
116- c_out , c_in = [K .utils .append_dims (k , x_in .ndim ) for k in dnw .get_scalings (sigma_in )[skip :]]
117-
118112 if i == 1 :
119113 t = dnw .sigma_to_t (torch .cat ([sigmas [i ] * s_in ] * 2 ))
114+ dt = (sigmas [i ] - sigmas [i - 1 ]) / (2 * sigmas [i ])
120115 else :
121116 t = dnw .sigma_to_t (sigma_in )
117+ dt = (sigmas [i ] - sigmas [i - 1 ]) / sigmas [i - 1 ]
118+
119+ noise = noise_from_model (x , t , dt , sigma_in , cond_in , cfg_scale , dnw , skip )
120+
121+ if correction_factor > 0 :
122+ recalculated_noise = noise_from_model (x + noise , t , dt , sigma_in , cond_in , cfg_scale , dnw , skip )
123+ noise = recalculated_noise * correction_factor + noise * (1 - correction_factor )
124+
125+ x += noise
126+
127+ sd_samplers_common .store_latent (x )
128+
129+ # This shouldn't be necessary, but solved some VRAM issues
130+ #del x_in, sigma_in, cond_in, c_out, c_in, t
131+ #del eps, denoised_uncond, denoised_cond, denoised, dt
132+
133+ shared .state .nextjob ()
134+
135+ return x / (x .std ()* (1 - sigma_intensity ) + sigmas [- 1 ]* sigma_intensity )
122136
137+ def noise_from_model (x , t , dt , sigma_in , cond_in , cfg_scale , dnw , skip ):
123138
139+ if cfg_scale == 1 : # Case where denoised_uncond should not be calculated - 50% speedup, also good for sdxl in experiments
140+ x_in = x
141+ sigma_in = sigma_in [1 :2 ]
142+ c_out , c_in = [K .utils .append_dims (k , x_in .ndim ) for k in dnw .get_scalings (sigma_in )[skip :]]
143+ cond_in = {"c_concat" :[cond_in ["c_concat" ][0 ][1 :2 ]], "c_crossattn" : [cond_in ["c_crossattn" ][0 ][1 :2 ]]}
124144 if shared .sd_model .is_sdxl :
125145 num_classes_hack = shared .sd_model .model .diffusion_model .num_classes
126146 shared .sd_model .model .diffusion_model .num_classes = None
147+ print ("\n DIMS" )
148+ print (x_in .shape , c_in .shape , t [1 :2 ].shape , cond_in ["c_crossattn" ][0 ].shape )
127149 try :
128- eps = shared .sd_model .model (x_in * c_in , t , {"crossattn" : cond_in ["c_crossattn" ][0 ]} )
150+ eps = shared .sd_model .model (x_in * c_in , t [ 1 : 2 ] , {"crossattn" : cond_in ["c_crossattn" ][0 ]})
129151 finally :
130152 shared .sd_model .model .diffusion_model .num_classes = num_classes_hack
131153 else :
132- eps = shared .sd_model .apply_model (x_in * c_in , t , cond = cond_in )
154+ eps = shared .sd_model .apply_model (x_in * c_in , t [ 1 : 2 ] , cond = cond_in )
133155
134- denoised_uncond , denoised_cond = (x_in + eps * c_out ).chunk (2 )
156+ return - eps * c_out * dt
157+ else :
158+ x_in = torch .cat ([x ] * 2 )
135159
136- denoised = denoised_uncond + ( denoised_cond - denoised_uncond ) * cfg_scale
160+ c_out , c_in = [ K . utils . append_dims ( k , x_in . ndim ) for k in dnw . get_scalings ( sigma_in )[ skip :]]
137161
138- if i == 1 :
139- d = (x - denoised ) / (2 * sigmas [i ])
162+ if shared .sd_model .is_sdxl :
163+ num_classes_hack = shared .sd_model .model .diffusion_model .num_classes
164+ shared .sd_model .model .diffusion_model .num_classes = None
165+ print ("\n DIMS" )
166+ print (x_in .shape , c_in .shape , t .shape , cond_in ["c_crossattn" ][0 ].shape )
167+ try :
168+ eps = shared .sd_model .model (x_in * c_in , t , {"crossattn" : cond_in ["c_crossattn" ][0 ]} )
169+ finally :
170+ shared .sd_model .model .diffusion_model .num_classes = num_classes_hack
140171 else :
141- d = (x - denoised ) / sigmas [i - 1 ]
142-
143- dt = sigmas [i ] - sigmas [i - 1 ]
144- x = x + d * dt
145-
146- sd_samplers_common .store_latent (x )
147-
148- # This shouldn't be necessary, but solved some VRAM issues
149- del x_in , sigma_in , cond_in , c_out , c_in , t ,
150- del eps , denoised_uncond , denoised_cond , denoised , d , dt
172+ eps = shared .sd_model .apply_model (x_in * c_in , t , cond = cond_in )
151173
152- shared . state . nextjob ( )
174+ denoised_uncond , denoised_cond = ( eps * c_out ). chunk ( 2 )
153175
154- return x / sigmas [ - 1 ]
176+ denoised = denoised_uncond + ( denoised_cond - denoised_uncond ) * cfg_scale
155177
178+ return - denoised * dt
156179
157180class Script (scripts .Script ):
158181 def __init__ (self ):
@@ -183,17 +206,20 @@ def ui(self, is_img2img):
183206 cfg = gr .Slider (label = "Decode CFG scale" , minimum = 0.0 , maximum = 15.0 , step = 0.1 , value = 1.0 , elem_id = self .elem_id ("cfg" ))
184207 randomness = gr .Slider (label = "Randomness" , minimum = 0.0 , maximum = 1.0 , step = 0.01 , value = 0.0 , elem_id = self .elem_id ("randomness" ))
185208 sigma_adjustment = gr .Checkbox (label = "Sigma adjustment for finding noise for image" , value = False , elem_id = self .elem_id ("sigma_adjustment" ))
209+ second_order_correction = gr .Slider (label = "Correct noise by running model again" , minimum = 0.0 , maximum = 1.0 , step = 0.01 , value = 0.5 , elem_id = self .elem_id ("second_order_correction" ))
210+ noise_sigma_intensity = gr .Slider (label = "Weight scaling std vs sigma based" , minimum = - 1.0 , maximum = 2.0 , step = 0.01 , value = 0.5 , elem_id = self .elem_id ("noise_sigma_intensity" ))
186211
187212 return [
188213 info ,
189214 override_sampler ,
190215 override_prompt , original_prompt , original_negative_prompt ,
191216 override_steps , st ,
192217 override_strength ,
193- cfg , randomness , sigma_adjustment ,
218+ cfg , randomness , sigma_adjustment , second_order_correction ,
219+ noise_sigma_intensity
194220 ]
195221
196- def run (self , p , _ , override_sampler , override_prompt , original_prompt , original_negative_prompt , override_steps , st , override_strength , cfg , randomness , sigma_adjustment ):
222+ def run (self , p , _ , override_sampler , override_prompt , original_prompt , original_negative_prompt , override_steps , st , override_strength , cfg , randomness , sigma_adjustment , second_order_correction , noise_sigma_intensity ):
197223 # Override
198224 if override_sampler :
199225 p .sampler_name = "Euler"
@@ -211,7 +237,9 @@ def sample_extra(conditioning, unconditional_conditioning, seeds, subseeds, subs
211237 same_params = self .cache is not None and self .cache .cfg_scale == cfg and self .cache .steps == st \
212238 and self .cache .original_prompt == original_prompt \
213239 and self .cache .original_negative_prompt == original_negative_prompt \
214- and self .cache .sigma_adjustment == sigma_adjustment
240+ and self .cache .sigma_adjustment == sigma_adjustment \
241+ and self .cache .second_order_correction == second_order_correction \
242+ and self .cache .noise_sigma_intensity == noise_sigma_intensity
215243 same_everything = same_params and self .cache .latent .shape == lat .shape and np .abs (self .cache .latent - lat ).sum () < 100
216244
217245 rand_noise = processing .create_random_tensors (p .init_latent .shape [1 :], seeds = seeds , subseeds = subseeds , subseed_strength = p .subseed_strength , seed_resize_from_h = p .seed_resize_from_h , seed_resize_from_w = p .seed_resize_from_w , p = p )
@@ -231,10 +259,10 @@ def sample_extra(conditioning, unconditional_conditioning, seeds, subseeds, subs
231259 cond = p .sd_model .get_learned_conditioning (p .batch_size * [original_prompt ])
232260 uncond = p .sd_model .get_learned_conditioning (p .batch_size * [original_negative_prompt ])
233261 if sigma_adjustment :
234- rec_noise = find_noise_for_image_sigma_adjustment (p , cond , uncond , cfg , st )
262+ rec_noise = find_noise_for_image_sigma_adjustment (p , cond , uncond , cfg , st , second_order_correction , noise_sigma_intensity )
235263 else :
236264 rec_noise = find_noise_for_image (p , cond , uncond , cfg , st )
237- self .cache = Cached (rec_noise , cfg , st , lat , original_prompt , original_negative_prompt , sigma_adjustment )
265+ self .cache = Cached (rec_noise , cfg , st , lat , original_prompt , original_negative_prompt , sigma_adjustment , second_order_correction , noise_sigma_intensity )
238266
239267 combined_noise = ((1 - randomness ) * rec_noise + randomness * rand_noise ) / ((randomness ** 2 + (1 - randomness )** 2 ) ** 0.5 )
240268
0 commit comments