@@ -181,7 +181,8 @@ def __init__(self) -> None:
181
181
"mlsd" : mlsd ,
182
182
"normal_map" : midas_normal ,
183
183
"openpose" : openpose ,
184
- # "openpose_hand": openpose_hand,
184
+ "openpose_hand" : openpose_hand ,
185
+ "clip_vision" : clip ,
185
186
"pidinet" : pidinet ,
186
187
"scribble" : simple_scribble ,
187
188
"fake_scribble" : fake_scribble ,
@@ -191,6 +192,7 @@ def __init__(self) -> None:
191
192
"hed" : unload_hed ,
192
193
"fake_scribble" : unload_hed ,
193
194
"mlsd" : unload_mlsd ,
195
+ "clip" : unload_clip ,
194
196
"depth" : unload_midas ,
195
197
"depth_leres" : unload_leres ,
196
198
"normal_map" : unload_midas ,
@@ -532,6 +534,38 @@ def parse_remote_call(self, p, params, idx):
532
534
533
535
return (enabled , module , model , weight , image , scribble_mode , \
534
536
resize_mode , rgbbgr_mode , lowvram , pres , pthr_a , pthr_b , guidance_start , guidance_end , guess_mode ), input_image
537
+
538
+ def detectmap_proc (self , module , rgbbgr_mode , resize_mode , h , w ):
539
+ detected_map = HWC3 (detected_map )
540
+ if module == "normal_map" or rgbbgr_mode :
541
+ control = torch .from_numpy (detected_map [:, :, ::- 1 ].copy ()).float ().to (devices .get_device_for ("controlnet" )) / 255.0
542
+ else :
543
+ control = torch .from_numpy (detected_map .copy ()).float ().to (devices .get_device_for ("controlnet" )) / 255.0
544
+
545
+ control = rearrange (control , 'h w c -> c h w' )
546
+ detected_map = rearrange (torch .from_numpy (detected_map ), 'h w c -> c h w' )
547
+
548
+ if resize_mode == "Scale to Fit (Inner Fit)" :
549
+ transform = Compose ([
550
+ Resize (h if h < w else w , interpolation = InterpolationMode .BICUBIC ),
551
+ CenterCrop (size = (h , w )),
552
+ ])
553
+ control = transform (control )
554
+ detected_map = transform (detected_map )
555
+ elif resize_mode == "Envelope (Outer Fit)" :
556
+ transform = Compose ([
557
+ Resize (h if h > w else w , interpolation = InterpolationMode .BICUBIC ),
558
+ CenterCrop (size = (h , w ))
559
+ ])
560
+ control = transform (control )
561
+ detected_map = transform (detected_map )
562
+ else :
563
+ control = Resize ((h ,w ), interpolation = InterpolationMode .BICUBIC )(control )
564
+ detected_map = Resize ((h ,w ), interpolation = InterpolationMode .BICUBIC )(detected_map )
565
+
566
+ # for log use
567
+ detected_map = rearrange (detected_map , 'c h w -> h w c' ).numpy ().astype (np .uint8 )
568
+ return control , detected_map
535
569
536
570
def process (self , p , is_img2img = False , * args ):
537
571
"""
@@ -652,43 +686,28 @@ def process(self, p, is_img2img=False, *args):
652
686
preprocessor = self .preprocessor [module ]
653
687
h , w , bsz = p .height , p .width , p .batch_size
654
688
if pres > 64 :
655
- detected_map = preprocessor (input_image , res = pres , thr_a = pthr_a , thr_b = pthr_b )
689
+ detected_map , is_image = preprocessor (input_image , res = pres , thr_a = pthr_a , thr_b = pthr_b )
656
690
else :
657
- detected_map = preprocessor (input_image )
658
-
659
- detected_map = HWC3 (detected_map )
660
- if module == "normal_map" or rgbbgr_mode :
661
- control = torch .from_numpy (detected_map [:, :, ::- 1 ].copy ()).float ().to (devices .get_device_for ("controlnet" )) / 255.0
662
- else :
663
- control = torch .from_numpy (detected_map .copy ()).float ().to (devices .get_device_for ("controlnet" )) / 255.0
691
+ detected_map , is_image = preprocessor (input_image )
664
692
665
- control = rearrange (control , 'h w c -> c h w' )
666
- detected_map = rearrange (torch .from_numpy (detected_map ), 'h w c -> c h w' )
667
-
668
- if resize_mode == "Scale to Fit (Inner Fit)" :
669
- transform = Compose ([
670
- Resize (h if h < w else w , interpolation = InterpolationMode .BICUBIC ),
671
- CenterCrop (size = (h , w )),
672
- ])
673
- control = transform (control )
674
- detected_map = transform (detected_map )
675
- elif resize_mode == "Envelope (Outer Fit)" :
676
- transform = Compose ([
677
- Resize (h if h > w else w , interpolation = InterpolationMode .BICUBIC ),
678
- CenterCrop (size = (h , w ))
679
- ])
680
- control = transform (control )
681
- detected_map = transform (detected_map )
693
+ if is_image :
694
+ control , detected_map = self .detectmap_proc (detected_map , rgbbgr_mode , resize_mode , h , w )
695
+ detected_maps .append ((detected_map , module ))
682
696
else :
683
- control = Resize ((h ,w ), interpolation = InterpolationMode .BICUBIC )(control )
684
- detected_map = Resize ((h ,w ), interpolation = InterpolationMode .BICUBIC )(detected_map )
685
-
686
- # for log use
687
- detected_map = rearrange (detected_map , 'c h w -> h w c' ).numpy ().astype (np .uint8 )
688
- detected_maps .append ((detected_map , module ))
697
+ control = detected_map
689
698
690
- # hint_cond, guess_mode, weight, guidance_stopped, stop_guidance_percent, advanced_weighting
691
- forward_param = ControlParams (model_net , control , guess_mode , weight , False , guidance_start , guidance_end , None , isinstance (model_net , PlugableAdapter ))
699
+ forward_param = ControlParams (
700
+ control_model = model_net ,
701
+ hint_cond = control ,
702
+ guess_mode = guess_mode ,
703
+ weight = weight ,
704
+ guidance_stopped = False ,
705
+ start_guidance_percent = guidance_start ,
706
+ stop_guidance_percent = guidance_end ,
707
+ advanced_weighting = None ,
708
+ is_adapter = isinstance (model_net , PlugableAdapter ),
709
+ is_extra_cond = getattr (model_net , "target" , "" ) == "scripts.adapter.StyleAdapter"
710
+ )
692
711
forward_params .append (forward_param )
693
712
694
713
self .latest_network = UnetHook (lowvram = hook_lowvram )
0 commit comments