import gradio as gr from PIL import Image from transformers import BlipProcessor, BlipForConditionalGeneration import torch device = torch.device("cpu") processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") model.to(device) # Template khusus untuk gaya deskripsi natural language eksplisit INSTRUCTION = ( "Describe this NSFW image in highly explicit, natural language. " "Include specific details about the subject's pose, nudity, sexual context, facial expression, body parts, hairstyle, clothing (if any), background, lighting, and emotional tone. " "Use clear, uncensored language, and write it as a vivid and realistic scene description." ) def generate_detailed_prompt(image): inputs = processor(image, text=INSTRUCTION, return_tensors="pt").to(device) output = model.generate(**inputs, max_new_tokens=150) caption = processor.decode(output[0], skip_special_tokens=True) return caption demo = gr.Interface( fn=generate_detailed_prompt, inputs=gr.Image(type="pil"), outputs="text", title="🔞 NSFW Image to Natural Prompt (BLIP)", description="Upload NSFW image to get a detailed, uncensored, natural-language prompt. Based on BLIP. CPU-only.", allow_flagging="never" ) demo.launch()