import os
import warnings
import json
import numpy as np
from typing import List

from sentence_transformers import SentenceTransformer
import gradio as gr


# Suppress warnings
os.environ['TRANSFORMERS_VERBOSITY'] = 'error'
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
warnings.filterwarnings("ignore")

# Load Model
MODEL_NAME = "odunola/sentence-transformers-bible-reference-final"
model = SentenceTransformer(MODEL_NAME)

THEMES = ["Trust and Guidance", "Restoration and Peace", "Wrath and Judgment", "Jesus", 
          "Resurrection", "Love", "Faith", "Hope", "Power", "Joy", "Victory", "Creation", 
          "Suffering", "Grace", "Kingdom", "Sin", "Spirit", "Trinity", "Eternity", 
          "Humble", "Wisdom", "Mercy", "Heaven", "Throne", "Covenant"]

THEME_VECS = model.encode(THEMES, convert_to_numpy=True)
THEME_VECS = THEME_VECS / np.linalg.norm(THEME_VECS, axis=1, keepdims=True)


def chunk_text(text):
    words = text.split()
    return [" ".join(words[i : i + 400]) for i in range(0, len(words), 200)]

def get_normalized_vector(text):
    chunks = chunk_text(text)
    embeddings = model.encode(chunks)
    avg_vec = np.mean(embeddings, axis=0)
    return avg_vec / np.linalg.norm(avg_vec)

def get_thematic_signature(doc_vec):
    raw_scores = np.dot(doc_vec, THEME_VECS.T)
    relu_scores = np.maximum(0, raw_scores)
    norm = np.linalg.norm(relu_scores)
    return relu_scores / (norm if norm > 0 else 1.0)


def analyze_thematic_similarity(passage_text, songs_json):
    """
    Called by Gradio API. 
    songs_json is a stringified list of {name, lyrics} from Node.js
    """
    try:
        songs = json.loads(songs_json)
        p_vec = get_normalized_vector(passage_text)
        p_sig = get_thematic_signature(p_vec)

        results = []

        for song in songs:
            # Process Song Lyrics
            l_vec = get_normalized_vector(song['lyrics'])
            direct_sim = float(np.dot(p_vec, l_vec))
            
            relevant_themes = []
            final_score = direct_sim

            # Original Threshold Logic
            if direct_sim >= 0.1: 
                l_sig = get_thematic_signature(l_vec)
                thematic_sim = float(np.dot(p_sig, l_sig))
                
                contributions = p_sig * l_sig
                relevant_themes = [THEMES[i] for i, val in enumerate(contributions) if val > 0.05]
                
                # 60/40 weighted split
                final_score = (0.6 * direct_sim) + (0.4 * thematic_sim)

            results.append({
                "name": song['name'],
                "score": round(final_score, 4),
                "themes": relevant_themes
            })

        # Sort by score descending
        results.sort(key=lambda x: x['score'], reverse=True)
        return results
    
    except Exception as e:
        raise gr.Error(f"NLP Worker Error: {str(e)}")
    
demo = gr.Interface(
    fn=analyze_thematic_similarity,
    inputs=[
        gr.Textbox(label="Bible Passage Text"),
        gr.Textbox(label="Songs JSON (List of {name, lyrics})")
    ],
    outputs=gr.JSON(label="Ranked Matches"),
    api_name="predict",
    title="SelahSearch NLP Agent",
    description="NLP model worker for SelahSearch API" # Alive agent providing thematic Bible-Song matching.
)

if __name__ == "__main__":
    demo.queue().launch() # demo.queue() is vital for high RAM reliability