|
| 1 | +# SPDX-FileCopyrightText: 2022-present deepset GmbH <[email protected]> |
| 2 | +# |
| 3 | +# SPDX-License-Identifier: Apache-2.0 |
| 4 | + |
| 5 | +import os |
| 6 | +from typing import Any, Dict, List, Literal, Optional |
| 7 | + |
| 8 | +from openai import OpenAI |
| 9 | +from openai.types.image import Image |
| 10 | + |
| 11 | +from haystack import component, default_from_dict, default_to_dict, logging |
| 12 | +from haystack.utils import Secret, deserialize_secrets_inplace |
| 13 | + |
| 14 | +logger = logging.getLogger(__name__) |
| 15 | + |
| 16 | + |
| 17 | +@component |
| 18 | +class DALLEImageGenerator: |
| 19 | + """ |
| 20 | + Generates images using OpenAI's DALL-E model. |
| 21 | +
|
| 22 | + For details on OpenAI API parameters, see |
| 23 | + [OpenAI documentation](https://platform.openai.com/docs/api-reference/images/create). |
| 24 | +
|
| 25 | + ### Usage example |
| 26 | +
|
| 27 | + ```python |
| 28 | + from haystack.components.generators import DALLEImageGenerator |
| 29 | + image_generator = DALLEImageGenerator() |
| 30 | + response = image_generator.run("Show me a picture of a black cat.") |
| 31 | + print(response) |
| 32 | + ``` |
| 33 | + """ |
| 34 | + |
| 35 | + def __init__( # pylint: disable=too-many-positional-arguments |
| 36 | + self, |
| 37 | + model: str = "dall-e-3", |
| 38 | + quality: Literal["standard", "hd"] = "standard", |
| 39 | + size: Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"] = "1024x1024", |
| 40 | + response_format: Literal["url", "b64_json"] = "url", |
| 41 | + api_key: Secret = Secret.from_env_var("OPENAI_API_KEY"), |
| 42 | + api_base_url: Optional[str] = None, |
| 43 | + organization: Optional[str] = None, |
| 44 | + timeout: Optional[float] = None, |
| 45 | + max_retries: Optional[int] = None, |
| 46 | + ): |
| 47 | + """ |
| 48 | + Creates an instance of DALLEImageGenerator. Unless specified otherwise in `model`, uses OpenAI's dall-e-3. |
| 49 | +
|
| 50 | + :param model: The model to use for image generation. Can be "dall-e-2" or "dall-e-3". |
| 51 | + :param quality: The quality of the generated image. Can be "standard" or "hd". |
| 52 | + :param size: The size of the generated images. |
| 53 | + Must be one of 256x256, 512x512, or 1024x1024 for dall-e-2. |
| 54 | + Must be one of 1024x1024, 1792x1024, or 1024x1792 for dall-e-3 models. |
| 55 | + :param response_format: The format of the response. Can be "url" or "b64_json". |
| 56 | + :param api_key: The OpenAI API key to connect to OpenAI. |
| 57 | + :param api_base_url: An optional base URL. |
| 58 | + :param organization: The Organization ID, defaults to `None`. |
| 59 | + :param timeout: |
| 60 | + Timeout for OpenAI Client calls. If not set, it is inferred from the `OPENAI_TIMEOUT` environment variable |
| 61 | + or set to 30. |
| 62 | + :param max_retries: |
| 63 | + Maximum retries to establish contact with OpenAI if it returns an internal error. If not set, it is inferred |
| 64 | + from the `OPENAI_MAX_RETRIES` environment variable or set to 5. |
| 65 | + """ |
| 66 | + self.model = model |
| 67 | + self.quality = quality |
| 68 | + self.size = size |
| 69 | + self.response_format = response_format |
| 70 | + self.api_key = api_key |
| 71 | + self.api_base_url = api_base_url |
| 72 | + self.organization = organization |
| 73 | + |
| 74 | + self.timeout = timeout or float(os.environ.get("OPENAI_TIMEOUT", 30.0)) |
| 75 | + self.max_retries = max_retries or int(os.environ.get("OPENAI_MAX_RETRIES", 5)) |
| 76 | + |
| 77 | + self.client: Optional[OpenAI] = None |
| 78 | + |
| 79 | + def warm_up(self) -> None: |
| 80 | + """ |
| 81 | + Warm up the OpenAI client. |
| 82 | + """ |
| 83 | + if self.client is None: |
| 84 | + self.client = OpenAI( |
| 85 | + api_key=self.api_key.resolve_value(), |
| 86 | + organization=self.organization, |
| 87 | + base_url=self.api_base_url, |
| 88 | + timeout=self.timeout, |
| 89 | + max_retries=self.max_retries, |
| 90 | + ) |
| 91 | + |
| 92 | + @component.output_types(images=List[str], revised_prompt=str) |
| 93 | + def run( |
| 94 | + self, |
| 95 | + prompt: str, |
| 96 | + size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] = None, |
| 97 | + quality: Optional[Literal["standard", "hd"]] = None, |
| 98 | + response_format: Optional[Optional[Literal["url", "b64_json"]]] = None, |
| 99 | + ): |
| 100 | + """ |
| 101 | + Invokes the image generation inference based on the provided prompt and generation parameters. |
| 102 | +
|
| 103 | + :param prompt: The prompt to generate the image. |
| 104 | + :param size: If provided, overrides the size provided during initialization. |
| 105 | + :param quality: If provided, overrides the quality provided during initialization. |
| 106 | + :param response_format: If provided, overrides the response format provided during initialization. |
| 107 | +
|
| 108 | + :returns: |
| 109 | + A dictionary containing the generated list of images and the revised prompt. |
| 110 | + Depending on the `response_format` parameter, the list of images can be URLs or base64 encoded JSON strings. |
| 111 | + The revised prompt is the prompt that was used to generate the image, if there was any revision |
| 112 | + to the prompt made by OpenAI. |
| 113 | + """ |
| 114 | + if self.client is None: |
| 115 | + raise RuntimeError( |
| 116 | + "The component DALLEImageGenerator wasn't warmed up. Run 'warm_up()' before calling 'run()'." |
| 117 | + ) |
| 118 | + |
| 119 | + size = size or self.size |
| 120 | + quality = quality or self.quality |
| 121 | + response_format = response_format or self.response_format |
| 122 | + response = self.client.images.generate( |
| 123 | + model=self.model, prompt=prompt, size=size, quality=quality, response_format=response_format, n=1 |
| 124 | + ) |
| 125 | + image: Image = response.data[0] |
| 126 | + image_str = image.url or image.b64_json or "" |
| 127 | + return {"images": [image_str], "revised_prompt": image.revised_prompt or ""} |
| 128 | + |
| 129 | + def to_dict(self) -> Dict[str, Any]: |
| 130 | + """ |
| 131 | + Serialize this component to a dictionary. |
| 132 | +
|
| 133 | + :returns: |
| 134 | + The serialized component as a dictionary. |
| 135 | + """ |
| 136 | + return default_to_dict( # type: ignore |
| 137 | + self, |
| 138 | + model=self.model, |
| 139 | + quality=self.quality, |
| 140 | + size=self.size, |
| 141 | + response_format=self.response_format, |
| 142 | + api_key=self.api_key.to_dict(), |
| 143 | + api_base_url=self.api_base_url, |
| 144 | + organization=self.organization, |
| 145 | + ) |
| 146 | + |
| 147 | + @classmethod |
| 148 | + def from_dict(cls, data: Dict[str, Any]) -> "DALLEImageGenerator": |
| 149 | + """ |
| 150 | + Deserialize this component from a dictionary. |
| 151 | +
|
| 152 | + :param data: |
| 153 | + The dictionary representation of this component. |
| 154 | + :returns: |
| 155 | + The deserialized component instance. |
| 156 | + """ |
| 157 | + init_params = data.get("init_parameters", {}) |
| 158 | + deserialize_secrets_inplace(init_params, keys=["api_key"]) |
| 159 | + return default_from_dict(cls, data) # type: ignore |
0 commit comments