|
1 | 1 | # Copyright (c) Microsoft. All rights reserved.
|
2 | 2 |
|
3 |
| -from typing import Any |
| 3 | +from pathlib import Path |
| 4 | +from typing import IO, Any |
4 | 5 | from warnings import warn
|
5 | 6 |
|
| 7 | +from openai._types import NOT_GIVEN, FileTypes, NotGiven |
6 | 8 | from openai.types.images_response import ImagesResponse
|
7 | 9 |
|
8 | 10 | from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.open_ai_text_to_image_execution_settings import (
|
@@ -38,6 +40,7 @@ async def generate_image(
|
38 | 40 | Returns:
|
39 | 41 | bytes | str: Image bytes or image URL.
|
40 | 42 | """
|
| 43 | + warn("generate_image is deprecated. Use generate_images.", DeprecationWarning, stacklevel=2) |
41 | 44 | if not settings:
|
42 | 45 | settings = OpenAITextToImageExecutionSettings(**kwargs)
|
43 | 46 | if not isinstance(settings, OpenAITextToImageExecutionSettings):
|
@@ -70,6 +73,177 @@ async def generate_image(
|
70 | 73 |
|
71 | 74 | return response.data[0].url
|
72 | 75 |
|
| 76 | + async def generate_images( |
| 77 | + self, |
| 78 | + prompt: str, |
| 79 | + settings: PromptExecutionSettings | None = None, |
| 80 | + **kwargs: Any, |
| 81 | + ) -> list[str]: |
| 82 | + """Generate one or more images from text. Returns URLs or base64-encoded images. |
| 83 | +
|
| 84 | + Args: |
| 85 | + prompt: Description of the image(s) to generate. |
| 86 | + settings: Execution settings for the prompt. |
| 87 | + kwargs: Additional arguments, check the openai images.generate documentation for the supported arguments. |
| 88 | +
|
| 89 | + Returns: |
| 90 | + list[str]: Image URLs or base64-encoded images. |
| 91 | +
|
| 92 | + Example: |
| 93 | + Generate images and save them as PNG files: |
| 94 | +
|
| 95 | + ```python |
| 96 | + from semantic_kernel.connectors.ai.open_ai import AzureTextToImage |
| 97 | + import base64, os |
| 98 | +
|
| 99 | + service = AzureTextToImage( |
| 100 | + service_id="image1", |
| 101 | + deployment_name="gpt-image-1", |
| 102 | + endpoint="https://your-endpoint.cognitiveservices.azure.com", |
| 103 | + api_key="your-api-key", |
| 104 | + api_version="2025-04-01-preview", |
| 105 | + ) |
| 106 | + settings = service.get_prompt_execution_settings_class()(service_id="image1") |
| 107 | + settings.n = 3 |
| 108 | + images_b64 = await service.generate_images("A cute cat wearing a whimsical striped hat", settings=settings) |
| 109 | + ``` |
| 110 | + """ |
| 111 | + if not settings: |
| 112 | + settings = OpenAITextToImageExecutionSettings(**kwargs) |
| 113 | + if not isinstance(settings, OpenAITextToImageExecutionSettings): |
| 114 | + settings = OpenAITextToImageExecutionSettings.from_prompt_execution_settings(settings) |
| 115 | + if prompt: |
| 116 | + settings.prompt = prompt |
| 117 | + |
| 118 | + if not settings.prompt: |
| 119 | + raise ServiceInvalidRequestError("Prompt is required.") |
| 120 | + |
| 121 | + if not settings.ai_model_id: |
| 122 | + settings.ai_model_id = self.ai_model_id |
| 123 | + |
| 124 | + response = await self._send_request(settings) |
| 125 | + |
| 126 | + assert isinstance(response, ImagesResponse) # nosec |
| 127 | + if not response.data or not isinstance(response.data, list) or len(response.data) == 0: |
| 128 | + raise ServiceResponseException("Failed to generate image.") |
| 129 | + |
| 130 | + results: list[str] = [] |
| 131 | + for image in response.data: |
| 132 | + url: str | None = getattr(image, "url", None) |
| 133 | + b64_json: str | None = getattr(image, "b64_json", None) |
| 134 | + if url: |
| 135 | + results.append(url) |
| 136 | + elif b64_json: |
| 137 | + results.append(b64_json) |
| 138 | + else: |
| 139 | + continue |
| 140 | + |
| 141 | + if len(results) == 0: |
| 142 | + raise ServiceResponseException("No valid image data found in response.") |
| 143 | + return results |
| 144 | + |
| 145 | + async def edit_image( |
| 146 | + self, |
| 147 | + prompt: str, |
| 148 | + image_paths: list[str] | None = None, |
| 149 | + image_files: list[IO[bytes]] | None = None, |
| 150 | + mask_path: str | None = None, |
| 151 | + mask_file: IO[bytes] | None = None, |
| 152 | + settings: PromptExecutionSettings | None = None, |
| 153 | + **kwargs: Any, |
| 154 | + ) -> list[str]: |
| 155 | + """Edit images using the OpenAI image edit API. |
| 156 | +
|
| 157 | + Args: |
| 158 | + prompt: Instructional prompt for image editing. |
| 159 | + image_paths: List of image file paths to edit. |
| 160 | + image_files: List of file-like objects (opened in binary mode) to edit. |
| 161 | + mask_path: Optional mask image file path. |
| 162 | + mask_file: Optional mask image file-like object (opened in binary mode). |
| 163 | + settings: Optional execution settings. If not provided, will be constructed from kwargs. |
| 164 | + kwargs: Additional API parameters. |
| 165 | +
|
| 166 | + Returns: |
| 167 | + list[str]: List of edited image URLs or base64-encoded strings. |
| 168 | +
|
| 169 | + Example: |
| 170 | + Edit images from file path and save results: |
| 171 | +
|
| 172 | + ```python |
| 173 | + from semantic_kernel.connectors.ai.open_ai import AzureTextToImage |
| 174 | + import base64, os |
| 175 | +
|
| 176 | + service = AzureTextToImage( |
| 177 | + service_id="image1", |
| 178 | + deployment_name="gpt-image-1", |
| 179 | + endpoint="https://your-endpoint.cognitiveservices.azure.com", |
| 180 | + api_key="your-api-key", |
| 181 | + api_version="2025-04-01-preview", |
| 182 | + ) |
| 183 | + file_paths = ["./new_images/img_1.png", "./new_images/img_2.png"] |
| 184 | + settings = service.get_prompt_execution_settings_class()(service_id="image1") |
| 185 | + settings.n = 2 |
| 186 | + results = await service.edit_image( |
| 187 | + prompt="Make the cat wear a wizard hat", |
| 188 | + image_paths=file_paths, |
| 189 | + settings=settings, |
| 190 | + ) |
| 191 | + ``` |
| 192 | +
|
| 193 | + Edit images from file object: |
| 194 | +
|
| 195 | + ```python |
| 196 | + with open("./new_images/img_1.png", "rb") as f: |
| 197 | + results = await service.edit_image( |
| 198 | + prompt="Make the cat wear a wizard hat", |
| 199 | + image_files=[f], |
| 200 | + ) |
| 201 | + ``` |
| 202 | + """ |
| 203 | + if not settings: |
| 204 | + settings = OpenAITextToImageExecutionSettings(**kwargs) |
| 205 | + if not isinstance(settings, OpenAITextToImageExecutionSettings): |
| 206 | + settings = OpenAITextToImageExecutionSettings.from_prompt_execution_settings(settings) |
| 207 | + settings.prompt = prompt |
| 208 | + |
| 209 | + if not settings.prompt: |
| 210 | + raise ServiceInvalidRequestError("Prompt is required.") |
| 211 | + if (image_paths is None and image_files is None) or (image_paths is not None and image_files is not None): |
| 212 | + raise ServiceInvalidRequestError("Provide either 'image_paths' or 'image_files', and only one.") |
| 213 | + |
| 214 | + images: list[FileTypes] = [] |
| 215 | + if image_paths is not None: |
| 216 | + images = [Path(p) for p in image_paths] |
| 217 | + elif image_files is not None: |
| 218 | + images = list(image_files) |
| 219 | + |
| 220 | + mask: FileTypes | NotGiven = NOT_GIVEN |
| 221 | + if mask_path is not None: |
| 222 | + mask = Path(mask_path) |
| 223 | + elif mask_file is not None: |
| 224 | + mask = mask_file |
| 225 | + |
| 226 | + response: ImagesResponse = await self._send_image_edit_request( |
| 227 | + image=images, |
| 228 | + mask=mask, |
| 229 | + settings=settings, |
| 230 | + ) |
| 231 | + |
| 232 | + if not response or not response.data or not isinstance(response.data, list): |
| 233 | + raise ServiceResponseException("Failed to edit image.") |
| 234 | + |
| 235 | + results: list[str] = [] |
| 236 | + for img in response.data: |
| 237 | + b64_json: str | None = getattr(img, "b64_json", None) |
| 238 | + url: str | None = getattr(img, "url", None) |
| 239 | + if b64_json: |
| 240 | + results.append(b64_json) |
| 241 | + elif url: |
| 242 | + results.append(url) |
| 243 | + if not results: |
| 244 | + raise ServiceResponseException("No valid image data found in response.") |
| 245 | + return results |
| 246 | + |
73 | 247 | def get_prompt_execution_settings_class(self) -> type[PromptExecutionSettings]:
|
74 | 248 | """Get the request settings class."""
|
75 | 249 | return OpenAITextToImageExecutionSettings
|
0 commit comments