import torch import gradio as gr from model import ECAPA_gender SAMPLE_AUDIO = [ ("Sample 1", "samples/00001.wav"), ("Sample 2", "samples/00002.wav"), ] model = ECAPA_gender.from_pretrained("Beijuka/voice-gender-classifier") model.eval() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) def predict_gender_confidence(audio_file): if audio_file is None: return "No audio provided" try: path = audio_file if isinstance(audio_file, str) else getattr(audio_file, "name", None) if not path: return "No audio path provided" audio = model.load_audio(path) audio = audio.to(device) with torch.no_grad(): logits = model.forward(audio) probs = torch.softmax(logits, dim=1).cpu().numpy()[0] pred_idx = logits.argmax(dim=1).item() gender_pred = model.pred2gender[pred_idx].capitalize() confidence = probs[pred_idx] * 100 return f"{gender_pred} — {confidence:.1f}% confidence" except Exception as e: return f"Error: {e}" with gr.Blocks(title="Voice Gender Classifier") as demo: gr.Markdown(""" ## Voice Gender Classifier Upload or record a short audio clip to predict speaker gender. Try the built-in samples if you need test audio. """) audio_input = gr.Audio( sources=["upload", "microphone"], type="filepath", label="Upload or record audio", ) prediction = gr.Textbox(label="Prediction", interactive=False) gr.Examples( examples=[path for _, path in SAMPLE_AUDIO], inputs=audio_input, outputs=prediction, fn=predict_gender_confidence, label="Try sample audios", ) audio_input.change(fn=predict_gender_confidence, inputs=audio_input, outputs=prediction) demo.launch(share=True)