import os import warnings import json import numpy as np from typing import List from sentence_transformers import SentenceTransformer import gradio as gr # Suppress warnings os.environ['TRANSFORMERS_VERBOSITY'] = 'error' os.environ['TOKENIZERS_PARALLELISM'] = 'false' warnings.filterwarnings("ignore") # Load Model MODEL_NAME = "odunola/sentence-transformers-bible-reference-final" model = SentenceTransformer(MODEL_NAME) THEMES = ["Trust and Guidance", "Restoration and Peace", "Wrath and Judgment", "Jesus", "Resurrection", "Love", "Faith", "Hope", "Power", "Joy", "Victory", "Creation", "Suffering", "Grace", "Kingdom", "Sin", "Spirit", "Trinity", "Eternity", "Humble", "Wisdom", "Mercy", "Heaven", "Throne", "Covenant"] THEME_VECS = model.encode(THEMES, convert_to_numpy=True) THEME_VECS = THEME_VECS / np.linalg.norm(THEME_VECS, axis=1, keepdims=True) def chunk_text(text): words = text.split() return [" ".join(words[i : i + 400]) for i in range(0, len(words), 200)] def get_normalized_vector(text): chunks = chunk_text(text) embeddings = model.encode(chunks) avg_vec = np.mean(embeddings, axis=0) return avg_vec / np.linalg.norm(avg_vec) def get_thematic_signature(doc_vec): raw_scores = np.dot(doc_vec, THEME_VECS.T) relu_scores = np.maximum(0, raw_scores) norm = np.linalg.norm(relu_scores) return relu_scores / (norm if norm > 0 else 1.0) def analyze_thematic_similarity(passage_text, songs_json): """ Called by Gradio API. songs_json is a stringified list of {name, lyrics} from Node.js """ try: songs = json.loads(songs_json) p_vec = get_normalized_vector(passage_text) p_sig = get_thematic_signature(p_vec) results = [] for song in songs: # Process Song Lyrics l_vec = get_normalized_vector(song['lyrics']) direct_sim = float(np.dot(p_vec, l_vec)) relevant_themes = [] final_score = direct_sim # Original Threshold Logic if direct_sim >= 0.1: l_sig = get_thematic_signature(l_vec) thematic_sim = float(np.dot(p_sig, l_sig)) contributions = p_sig * l_sig relevant_themes = [THEMES[i] for i, val in enumerate(contributions) if val > 0.05] # 60/40 weighted split final_score = (0.6 * direct_sim) + (0.4 * thematic_sim) results.append({ "name": song['name'], "score": round(final_score, 4), "themes": relevant_themes }) # Sort by score descending results.sort(key=lambda x: x['score'], reverse=True) return results except Exception as e: raise gr.Error(f"NLP Worker Error: {str(e)}") demo = gr.Interface( fn=analyze_thematic_similarity, inputs=[ gr.Textbox(label="Bible Passage Text"), gr.Textbox(label="Songs JSON (List of {name, lyrics})") ], outputs=gr.JSON(label="Ranked Matches"), api_name="predict", title="SelahSearch NLP Agent", description="NLP model worker for SelahSearch API" # Alive agent providing thematic Bible-Song matching. ) if __name__ == "__main__": demo.queue().launch() # demo.queue() is vital for high RAM reliability