Spaces:

Mauricio-100
/

agent-ai

Sleeping

App Files Files Community

Mauricio-100 commited on Oct 30, 2025

Commit

a697679

verified ·

1 Parent(s): 8ba6910

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -12

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from fastapi import FastAPI
 from pydantic import BaseModel
-from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch, gradio as gr
 MODEL_ID = "Gopu-poss/gopu-agent-2k-fdf"
@@ -16,33 +16,48 @@ app = FastAPI()
 class Input(BaseModel):
     input: str
 @app.post("/infer")
 def infer(data: Input):
-    inputs = tokenizer(data.input, return_tensors="pt").to(model.device)
     outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7, top_p=0.9)
     text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return {"generated_text": text}
-# Gradio multimodal
-def gopu_chat(prompt, image=None, audio=None, video=None):
-    # Pour l’instant on ne traite que le texte
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7, top_p=0.9)
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 demo = gr.Interface(
     fn=gopu_chat,
     inputs=[
-        gr.Textbox(label="Texte"),
-        gr.Image(label="Image", type="filepath"),
-        gr.Audio(label="Voix", type="filepath"),
-        gr.Video(label="Vidéo", type="filepath")
     ],
     outputs="text",
     title="GopuOS Agentic Endpoint",
-    description="Texte, image, voix, vidéo, recherche web (à venir)."
 )
 app = gr.mount_gradio_app(app, demo, path="/gradio")

 from fastapi import FastAPI
 from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import torch, gradio as gr
 MODEL_ID = "Gopu-poss/gopu-agent-2k-fdf"
 class Input(BaseModel):
     input: str
+    system_prompt: str | None = None
 @app.post("/infer")
 def infer(data: Input):
+    # Concatène prompt système + input utilisateur
+    full_prompt = f"{data.system_prompt}\n{data.input}" if data.system_prompt else data.input
+    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7, top_p=0.9)
     text = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return {"generated_text": text}
+# --- Gradio multimodal ---
+# Ajoutons un champ "system prompt" + micro pour parler
+# Pipeline de reconnaissance vocale (tu peux choisir un modèle HF adapté)
+asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
+def gopu_chat(system_prompt, prompt, audio=None, image=None, video=None):
+    # Si audio fourni → transcrire et remplacer le prompt
+    if audio is not None:
+        transcription = asr(audio)["text"]
+        prompt = transcription
+    # Construire le prompt complet
+    full_prompt = f"{system_prompt}\n{prompt}" if system_prompt else prompt
+    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(**inputs, max_new_tokens=200, temperature=0.7, top_p=0.9)
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
 demo = gr.Interface(
     fn=gopu_chat,
     inputs=[
+        gr.Textbox(label="Prompt système (contexte)", placeholder="Ex: Tu es GopuOS, un agent exclusif..."),
+        gr.Textbox(label="Texte utilisateur"),
+        gr.Audio(label="Voix (optionnel)", type="filepath"),
+        gr.Image(label="Image (optionnel)", type="filepath"),
+        gr.Video(label="Vidéo (optionnel)", type="filepath"),
     ],
     outputs="text",
     title="GopuOS Agentic Endpoint",
+    description="Ajoute un prompt système, parle avec ta voix, ou envoie texte/image/vidéo."
 )
 app = gr.mount_gradio_app(app, demo, path="/gradio")