# filename: tinyllama_gradio_chatbot.py
import os
os.system("wget https://huggingface.co/bartowski/DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored-GGUF/resolve/main/DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored-IQ2_M.gguf")
from llama_cpp import Llama
import gradio as gr
# filename: llama_cpp_gradio_chatbot_darkidol.py

import os
import gradio as gr
from llama_cpp import Llama

# Constants
MODEL_FILENAME = "DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored-IQ2_M.gguf"
MODEL_URL = f"https://huggingface.co/bartowski/DarkIdol-Llama-3.1-8B-Instruct-1.2-Uncensored-GGUF/resolve/main/{MODEL_FILENAME}"
SYSTEM_PROMPT = "You are a helpful, uncensored AI assistant who responds clearly and concisely."

# Step 1: Ensure model is present
if not os.path.exists(MODEL_FILENAME):
    print("Downloading model...")
    os.system(f"wget {MODEL_URL}")

# Step 2: Load model
llm = Llama(
    model_path="./"+MODEL_FILENAME,
    n_ctx=2048,
    n_threads=4,
    use_mlock=True
)

# Step 3: Prompt template using DarkIdol's format
def format_prompt(user_input, history):
    chat_history = ""
    for user_msg, bot_reply in history:
        chat_history += f"<|start_header_id|>user<|end_header_id|>\n{user_msg}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n{bot_reply}<|eot_id|>\n"
    full_prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>{SYSTEM_PROMPT}<|eot_id|>{chat_history}<|start_header_id|>user<|end_header_id|>\n{user_input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n"""
    return full_prompt

# Step 4: Chat logic
def chat_with_llama(user_input, chat_history):
    prompt = format_prompt(user_input, chat_history)
    output = llm(prompt, max_tokens=256, stop=["<|eot_id|>"], echo=False)
    response = output["choices"][0]["text"].strip()
    return response

# Step 5: Gradio UI
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Your message", placeholder="Type something...")
    clear = gr.Button("Clear")

    state = gr.State([])

    def user_submit(user_input, history):
        reply = chat_with_llama(user_input, history)
        history.append((user_input, reply))
        return "", history

    msg.submit(user_submit, [msg, state], [msg, chatbot])
    clear.click(lambda: ([], []), None, [state, chatbot])

demo.launch()