community-efforts/image_preferences/01_synthetic_data_generation_images.py [23:296]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class InferenceEndpointsImageLLM(InferenceEndpointsLLM):
    @validate_call
    async def agenerate(
        self,
        input: Dict[str, Any],
        negative_prompt: Optional[str] = None,
        height: Optional[float] = None,
        width: Optional[float] = None,
        num_inference_steps: Optional[float] = None,
        guidance_scale: Optional[float] = None,
    ) -> List[Dict[str, Any]]:
        prompt = input.get("prompt")
        image = await self._aclient.text_to_image(
            prompt,
            negative_prompt=negative_prompt,
            height=int(height) if height else None,
            width=int(width) if width else None,
            num_inference_steps=int(num_inference_steps)
            if num_inference_steps
            else None,
            guidance_scale=float(guidance_scale) if guidance_scale else None,
            seed=random.randint(0, 1000000),
        )
        buffered = BytesIO()
        image.save(buffered, format="JPEG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        return [{"image": img_str}]


class ImageGeneration(Task):
    @property
    def inputs(self) -> List[str]:
        return ["prompt"]

    @property
    def outputs(self) -> List[str]:
        return ["image", "model_name"]

    def format_input(self, input: Dict[str, Any]) -> Dict[str, str]:
        return {"prompt": input["prompt"]}

    def format_output(
        self, output: Dict[str, Any], input: Dict[str, Any]
    ) -> Dict[str, Any]:
        image_str = output.get("image")
        image = None
        if image_str:
            image_bytes = base64.b64decode(image_str)
            image = Image.open(BytesIO(image_bytes))
        return {"image": image, "model_name": self.llm.model_name}

    def process(self, *args: StepInput) -> "StepOutput":
        inputs = args[0] if args else []
        formatted_inputs = self._format_inputs(inputs)

        outputs = self.llm.generate_outputs(
            inputs=formatted_inputs,
            num_generations=self.num_generations,
            **self.llm.get_generation_kwargs(),
        )

        task_outputs = []
        for input, input_outputs in zip(inputs, outputs):
            formatted_outputs = self._format_outputs(input_outputs, input)
            for formatted_output in formatted_outputs:
                if "image" in formatted_output and formatted_output["image"]:
                    # use prompt as filename
                    prompt_hash = hashlib.md5(input["prompt"].encode()).hexdigest()
                    self.save_artifact(
                        name="images",
                        write_function=lambda path: formatted_output["image"].save(
                            path / f"{prompt_hash}.jpeg"
                        ),
                        metadata={"type": "image", "library": "diffusers"},
                    )
                    formatted_output["image"] = {
                        "path": f"artifacts/{self.name}/images/{prompt_hash}.jpeg"
                    }

                task_output = {
                    **input,
                    **formatted_output,
                    "model_name": self.llm.model_name,
                }
                task_outputs.append(task_output)
        yield task_outputs


## Let's determine the categories and subcategories for the image generation task
# https://huggingface.co/spaces/google/sdxl/blob/main/app.py#L55
categories = {
    # included
    "Cinematic": [
        # included
        "emotional",
        "harmonious",
        "vignette",
        "highly detailed",
        "high budget",
        "bokeh",
        "cinemascope",
        "moody",
        "epic",
        "gorgeous",
        "film grain",
        "grainy",
    ],
    # included
    "Photographic": [
        # included
        "film",
        "bokeh",
        "professional",
        "4k",
        "highly detailed",
        ## not included
        "Landscape",
        "Portrait",
        "Macro",
        "Portra",
        "Gold",
        "ColorPlus",
        "Ektar",
        "Superia",
        "C200",
        "CineStill",
        "CineStill 50D",
        "CineStill 800T",
        "Tri-X",
        "HP5",
        "Delta",
        "T-Max",
        "Fomapan",
        "StreetPan",
        "Provia",
        "Ektachrome",
        "Velvia",
    ],
    # included
    "Anime": [
        # included
        "anime style",
        "key visual",
        "vibrant",
        "studio anime",
        "highly detailed",
    ],
    # included
    "Manga": [
        # included
        "vibrant",
        "high-energy",
        "detailed",
        "iconic",
        "Japanese comic style",
    ],
    # included
    "Digital art": [
        # included
        "digital artwork",
        "illustrative",
        "painterly",
        "matte painting",
        "highly detailed",
    ],
    # included
    "Pixel art": [
        # included
        "low-res",
        "blocky",
        "pixel art style",
        "8-bit graphics",
    ],
    # included
    "Fantasy art": [
        # included
        "magnificent",
        "celestial",
        "ethereal",
        "painterly",
        "epic",
        "majestic",
        "magical",
        "fantasy art",
        "cover art",
        "dreamy",
    ],
    # included
    "Neonpunk": [
        # included
        "cyberpunk",
        "vaporwave",
        "neon",
        "vibes",
        "vibrant",
        "stunningly beautiful",
        "crisp",
        "detailed",
        "sleek",
        "ultramodern",
        "magenta highlights",
        "dark purple shadows",
        "high contrast",
        "cinematic",
        "ultra detailed",
        "intricate",
        "professional",
    ],
    # included
    "3D Model": [
        # included
        "octane render",
        "highly detailed",
        "volumetric",
        "dramatic lighting",
    ],
    # not included
    "Painting": [
        "Oil",
        "Acrylic",
        "Watercolor",
        "Digital",
        "Mural",
        "Sketch",
        "Gouache",
        "Renaissance",
        "Baroque",
        "Romanticism",
        "Impressionism",
        "Expressionism",
        "Cubism",
        "Surrealism",
        "Pop Art",
        "Minimalism",
        "Realism",
        "Encaustic",
        "Tempera",
        "Fresco",
        "Ink Wash",
        "Spray Paint",
        "Mixed Media",
    ],
    # not included
    "Animation": [
        # not included
        "Animation",
        "Stop motion",
        "Claymation",
        "Pixel Art",
        "Vector",
        "Hand-drawn",
        "Cutout",
        "Whiteboard",
    ],
    # not included
    "Illustration": [
        # not included
        "Book",
        "Comics",
        "Editorial",
        "Advertising",
        "Technical",
        "Fantasy",
        "Scientific",
        "Fashion",
        "Storyboard",
        "Concept Art",
        "Manga",
        "Anime",
        "Digital",
        "Vector",
        "Design",
    ],
}
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


community-efforts/image_preferences/01_synthetic_data_generation_total.py [23:296]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
class InferenceEndpointsImageLLM(InferenceEndpointsLLM):
    @validate_call
    async def agenerate(
        self,
        input: Dict[str, Any],
        negative_prompt: Optional[str] = None,
        height: Optional[float] = None,
        width: Optional[float] = None,
        num_inference_steps: Optional[float] = None,
        guidance_scale: Optional[float] = None,
    ) -> List[Dict[str, Any]]:
        prompt = input.get("prompt")
        image = await self._aclient.text_to_image(
            prompt,
            negative_prompt=negative_prompt,
            height=int(height) if height else None,
            width=int(width) if width else None,
            num_inference_steps=int(num_inference_steps)
            if num_inference_steps
            else None,
            guidance_scale=float(guidance_scale) if guidance_scale else None,
            seed=random.randint(0, 1000000),
        )
        buffered = BytesIO()
        image.save(buffered, format="JPEG")
        img_str = base64.b64encode(buffered.getvalue()).decode()
        return [{"image": img_str}]


class ImageGeneration(Task):
    @property
    def inputs(self) -> List[str]:
        return ["prompt"]

    @property
    def outputs(self) -> List[str]:
        return ["image", "model_name"]

    def format_input(self, input: Dict[str, Any]) -> Dict[str, str]:
        return {"prompt": input["prompt"]}

    def format_output(
        self, output: Dict[str, Any], input: Dict[str, Any]
    ) -> Dict[str, Any]:
        image_str = output.get("image")
        image = None
        if image_str:
            image_bytes = base64.b64decode(image_str)
            image = Image.open(BytesIO(image_bytes))
        return {"image": image, "model_name": self.llm.model_name}

    def process(self, *args: StepInput) -> "StepOutput":
        inputs = args[0] if args else []
        formatted_inputs = self._format_inputs(inputs)

        outputs = self.llm.generate_outputs(
            inputs=formatted_inputs,
            num_generations=self.num_generations,
            **self.llm.get_generation_kwargs(),
        )

        task_outputs = []
        for input, input_outputs in zip(inputs, outputs):
            formatted_outputs = self._format_outputs(input_outputs, input)
            for formatted_output in formatted_outputs:
                if "image" in formatted_output and formatted_output["image"]:
                    # use prompt as filename
                    prompt_hash = hashlib.md5(input["prompt"].encode()).hexdigest()
                    self.save_artifact(
                        name="images",
                        write_function=lambda path: formatted_output["image"].save(
                            path / f"{prompt_hash}.jpeg"
                        ),
                        metadata={"type": "image", "library": "diffusers"},
                    )
                    formatted_output["image"] = {
                        "path": f"artifacts/{self.name}/images/{prompt_hash}.jpeg"
                    }

                task_output = {
                    **input,
                    **formatted_output,
                    "model_name": self.llm.model_name,
                }
                task_outputs.append(task_output)
        yield task_outputs


## Let's determine the categories and subcategories for the image generation task
# https://huggingface.co/spaces/google/sdxl/blob/main/app.py#L55
categories = {
    # included
    "Cinematic": [
        # included
        "emotional",
        "harmonious",
        "vignette",
        "highly detailed",
        "high budget",
        "bokeh",
        "cinemascope",
        "moody",
        "epic",
        "gorgeous",
        "film grain",
        "grainy",
    ],
    # included
    "Photographic": [
        # included
        "film",
        "bokeh",
        "professional",
        "4k",
        "highly detailed",
        ## not included
        "Landscape",
        "Portrait",
        "Macro",
        "Portra",
        "Gold",
        "ColorPlus",
        "Ektar",
        "Superia",
        "C200",
        "CineStill",
        "CineStill 50D",
        "CineStill 800T",
        "Tri-X",
        "HP5",
        "Delta",
        "T-Max",
        "Fomapan",
        "StreetPan",
        "Provia",
        "Ektachrome",
        "Velvia",
    ],
    # included
    "Anime": [
        # included
        "anime style",
        "key visual",
        "vibrant",
        "studio anime",
        "highly detailed",
    ],
    # included
    "Manga": [
        # included
        "vibrant",
        "high-energy",
        "detailed",
        "iconic",
        "Japanese comic style",
    ],
    # included
    "Digital art": [
        # included
        "digital artwork",
        "illustrative",
        "painterly",
        "matte painting",
        "highly detailed",
    ],
    # included
    "Pixel art": [
        # included
        "low-res",
        "blocky",
        "pixel art style",
        "8-bit graphics",
    ],
    # included
    "Fantasy art": [
        # included
        "magnificent",
        "celestial",
        "ethereal",
        "painterly",
        "epic",
        "majestic",
        "magical",
        "fantasy art",
        "cover art",
        "dreamy",
    ],
    # included
    "Neonpunk": [
        # included
        "cyberpunk",
        "vaporwave",
        "neon",
        "vibes",
        "vibrant",
        "stunningly beautiful",
        "crisp",
        "detailed",
        "sleek",
        "ultramodern",
        "magenta highlights",
        "dark purple shadows",
        "high contrast",
        "cinematic",
        "ultra detailed",
        "intricate",
        "professional",
    ],
    # included
    "3D Model": [
        # included
        "octane render",
        "highly detailed",
        "volumetric",
        "dramatic lighting",
    ],
    # not included
    "Painting": [
        "Oil",
        "Acrylic",
        "Watercolor",
        "Digital",
        "Mural",
        "Sketch",
        "Gouache",
        "Renaissance",
        "Baroque",
        "Romanticism",
        "Impressionism",
        "Expressionism",
        "Cubism",
        "Surrealism",
        "Pop Art",
        "Minimalism",
        "Realism",
        "Encaustic",
        "Tempera",
        "Fresco",
        "Ink Wash",
        "Spray Paint",
        "Mixed Media",
    ],
    # not included
    "Animation": [
        # not included
        "Animation",
        "Stop motion",
        "Claymation",
        "Pixel Art",
        "Vector",
        "Hand-drawn",
        "Cutout",
        "Whiteboard",
    ],
    # not included
    "Illustration": [
        # not included
        "Book",
        "Comics",
        "Editorial",
        "Advertising",
        "Technical",
        "Fantasy",
        "Scientific",
        "Fashion",
        "Storyboard",
        "Concept Art",
        "Manga",
        "Anime",
        "Digital",
        "Vector",
        "Design",
    ],
}
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -