|
| 1 | +import base64 |
| 2 | +import io |
| 3 | +from diffusers.models import ControlNetModel |
| 4 | +from huggingface_hub import hf_hub_download |
| 5 | + |
| 6 | +import cv2 |
| 7 | +import torch |
| 8 | +import numpy as np |
| 9 | +import random |
| 10 | +from PIL import Image |
| 11 | +from insightface.app import FaceAnalysis |
| 12 | + |
| 13 | +from app.llms.instantid.pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline, draw_kps |
| 14 | + |
| 15 | +def instantid_worker(prompt, sharedmem): |
| 16 | + try: |
| 17 | + hf_hub_download(repo_id="InstantX/InstantID", filename="ControlNetModel/config.json", local_dir="./checkpoints") |
| 18 | + hf_hub_download(repo_id="InstantX/InstantID", filename="ControlNetModel/diffusion_pytorch_model.safetensors", local_dir="./checkpoints") |
| 19 | + hf_hub_download(repo_id="InstantX/InstantID", filename="ip-adapter.bin", local_dir="./checkpoints") |
| 20 | + except: |
| 21 | + pass |
| 22 | + |
| 23 | + img_data = base64.b64decode(sharedmem["input_image"]) |
| 24 | + face_image = Image.open(io.BytesIO(img_data)) |
| 25 | + |
| 26 | + prompt_default = ", (detailed) (intricate) (8k) (HDR) (cinematic lighting) (sharp focus)" |
| 27 | + prompt = prompt + prompt_default |
| 28 | + |
| 29 | + negative_prompt = "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green" |
| 30 | + |
| 31 | + DEFAULT_CUDA = "cuda" |
| 32 | + |
| 33 | + app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) |
| 34 | + app.prepare(ctx_id=0, det_size=(640, 640)) |
| 35 | + |
| 36 | + face_adapter = f'./checkpoints/ip-adapter.bin' |
| 37 | + controlnet_path = f'./checkpoints/ControlNetModel' |
| 38 | + |
| 39 | + controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16) |
| 40 | + |
| 41 | + pipe = StableDiffusionXLInstantIDPipeline.from_pretrained( |
| 42 | + "wangqixun/YamerMIX_v8", controlnet=controlnet, torch_dtype=torch.float16 |
| 43 | + ) |
| 44 | + pipe.to(DEFAULT_CUDA) |
| 45 | + |
| 46 | + pipe.load_ip_adapter_instantid(face_adapter) |
| 47 | + |
| 48 | + pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl") |
| 49 | + pipe.disable_lora() |
| 50 | + |
| 51 | + face_image_cv2 = cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR) |
| 52 | + height, width, _ = face_image_cv2.shape |
| 53 | + |
| 54 | + face_info = app.get(face_image_cv2) |
| 55 | + face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*x['bbox'][3]-x['bbox'][1])[-1] |
| 56 | + face_emb = face_info['embedding'] |
| 57 | + face_kps = draw_kps(face_image, face_info['kps']) |
| 58 | + |
| 59 | + control_mask = np.zeros([height, width, 3]) |
| 60 | + x1, y1, x2, y2 = face_info["bbox"] |
| 61 | + x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) |
| 62 | + control_mask[y1:y2, x1:x2] = 255 |
| 63 | + control_mask = Image.fromarray(control_mask.astype(np.uint8)) |
| 64 | + |
| 65 | + pipe.set_ip_adapter_scale(0.8) |
| 66 | + |
| 67 | + generator = torch.Generator(device=DEFAULT_CUDA).manual_seed(random.randint(0, np.iinfo(np.int32).max)) |
| 68 | + |
| 69 | + image = pipe( |
| 70 | + prompt, |
| 71 | + image_embeds=face_emb, |
| 72 | + image=face_kps, |
| 73 | + control_mask=control_mask, |
| 74 | + num_inference_steps=50, |
| 75 | + controlnet_conditioning_scale=0.8, |
| 76 | + negative_prompt=negative_prompt, |
| 77 | + generator=generator, |
| 78 | + guide_scale=0, |
| 79 | + height=height, |
| 80 | + width=width, |
| 81 | + ).images[0] |
| 82 | + |
| 83 | + output_img_data = io.BytesIO() |
| 84 | + image.save(output_img_data, format="JPEG") |
| 85 | + image_base64 = base64.b64encode(output_img_data.getvalue()).decode('utf-8') |
| 86 | + |
| 87 | + sharedmem["output_image"] = image_base64 |
0 commit comments