| import gradio as gr |
| from transformers import pipeline |
| import numpy as np |
|
|
| transcriber_hindi = pipeline("automatic-speech-recognition", model="ai4bharat/indicwav2vec-hindi") |
| transcriber_bang = pipeline("automatic-speech-recognition", model="ai4bharat/indicwav2vec_v1_bengali") |
| transcriber_odia = pipeline("automatic-speech-recognition", model="ai4bharat/indicwav2vec-odia") |
|
|
|
|
| def transcribe(audio,lang = "hindi"): |
| |
| |
| sr, y = audio |
| y = y.astype(np.float32) |
| y /= np.max(np.abs(y)) |
| if lang == "hindi": |
| return transcriber_hindi({"sampling_rate": sr, "raw": y})["text"] |
| if lang == "bangali": |
| return transcriber_bang({"sampling_rate": sr, "raw": y})["text"] |
| if lang == "odia": |
| return transcriber_odia({"sampling_rate": sr, "raw": y})["text"] |
|
|
|
|
| demo = gr.Interface(fn=transcribe, inputs = [gr.Audio(source="microphone"), gr.Radio(["hindi","bangali","odia"])] , outputs = "text") |
|
|
| demo.launch() |