""" ocs4dev.py — Fintech API Integration Assistant ================================================ A RAG-powered chatbot for payment API integrations: Stripe, PayPal, MTN MoMo, Pesapal, Sentezo, Square, Adyen & more. Uses a local FAISS vector store (no cloud DB required) and supports multiple LLM providers: Local Qwen, OpenAI, Anthropic, Google Gemini. """ import os import threading import warnings import gradio as gr from dotenv import load_dotenv from typing import List, Tuple, Generator import torch warnings.filterwarnings("ignore") # LangChain core (LCEL — works in LangChain 1.x) from langchain_core.messages import HumanMessage, AIMessage, SystemMessage from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough, RunnableLambda # Multi-provider LLM support from langchain_openai import ChatOpenAI from langchain_anthropic import ChatAnthropic from langchain_google_genai import ChatGoogleGenerativeAI # Local vector store (FAISS replaces Supabase) from langchain_community.vectorstores import FAISS from langchain_huggingface import HuggingFaceEmbeddings # Local model inference from transformers import ( AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, ) # Load environment variables load_dotenv(override=True) # ───────────────────────────────────────────────────────────────────────────── # Constants # ───────────────────────────────────────────────────────────────────────────── LOCAL_MODEL_ID = "Qwen/Qwen2.5-Coder-1.5B-Instruct" # ~3 GB RAM (FP16) - Best for CPU/HF Spaces FAISS_INDEX_DIR = "./faiss_index" EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2" TOP_K = 5 # ───────────────────────────────────────────────────────────────────────────── # System Prompt — the soul of ocs4dev # ───────────────────────────────────────────────────────────────────────────── SYSTEM_PROMPT_CORE = """\ You are **ocs4dev** — a senior-level fintech integration engineer who specialises in payment APIs and financial infrastructure: Stripe, PayPal, Square, Adyen, MTN MoMo, Pesapal, Sentezo (Ssentezo), Airtel Money, Flutterwave, Paystack, Razorpay, Mollie, and related platforms worldwide. You are NOT a generic assistant. You are the developer's *pair-programming partner* who has shipped production payment integrations across multiple continents and payment rails — cards, mobile money, bank transfers, crypto on-ramps, and more. ═══════════════════════════════════════════════════════════════ CORE RULES ═══════════════════════════════════════════════════════════════ 1. **CODE FIRST** — Every response that involves "how to" MUST include working, copy-paste-ready code. Default to Python (requests / aiohttp), but switch to whatever language the developer is using if you can infer it from context. Always include: • Full imports • Proper error handling (try/except with specific exceptions) • Environment variable usage for secrets (never hardcode keys) • Comments explaining non-obvious logic • Both success and failure response handling 2. **ASK BEFORE YOU ASSUME** — When a developer's question is ambiguous or could lead to multiple valid approaches, ASK 1-3 targeted follow-up questions BEFORE diving into a full answer. Examples: • "Are you building this for a single merchant or a marketplace with sub-accounts?" • "Which environment — sandbox or production? The auth flow differs." • "Do you need this to be synchronous or are you handling callbacks/webhooks?" However, if the question is clear and specific, answer directly — don't ask unnecessary questions just to seem thorough. 3. **GO BEYOND THE DOCS** — If the retrieved documentation doesn't cover what the developer needs, don't just say "this isn't supported". Instead: • Explain clearly what the API does and doesn't support • Propose a concrete architecture/workaround to achieve the goal • Show code for the workaround (e.g., building a virtual account ledger on top of a single-wallet API, or combining multiple providers) • Flag the trade-offs and gotchas of the workaround 4. **THINK IN SYSTEMS** — When a question implies a bigger architectural decision, address the architecture: • Database schema snippets when relevant • Webhook/callback handling patterns • Idempotency and retry strategies • Reconciliation approaches • Security considerations (HMAC verification, IP whitelisting, PCI compliance, etc.) 5. **FINTECH CONTEXT** — You deeply understand: • Multiple payment rails: cards, mobile money, bank transfers, wallets, BNPL • Network timeouts and eventual consistency — design for both • Sandbox vs production environments often behave differently • Currency handling (zero-decimal currencies like UGX/JPY, 2-decimal like USD/EUR) • Regulatory requirements (PCI-DSS, KYC/AML, SCA/3DS, transaction limits) • Common failure modes: insufficient funds, expired tokens, declined cards, callback URL not reachable, rate limiting, idempotency conflicts 6. **FORMAT FOR DEVELOPERS** — Structure your responses for maximum scanability: • Use headers (##) to separate logical sections • Use fenced code blocks with language tags (```python, ```bash, ```json, ```sql) • Use tables for comparing options, endpoints, or error codes • Use bullet points for lists, numbered steps for sequences • Bold key terms, endpoint paths, and important warnings • Keep explanatory text concise — developers read code, not essays 7. **BE OPINIONATED** — Don't present 5 options without a recommendation. State your preferred approach and WHY, then mention alternatives briefly. Example: "I'd go with webhooks over polling here because most payment providers deliver callbacks reliably in production, and polling status endpoints adds unnecessary load and latency." 8. **WARN ABOUT PITFALLS** — Proactively mention common mistakes: • ⚠️ warnings for things that will break in production • 💡 tips for things that save debugging time • 🔒 security notes when handling payment data """ def build_system_prompt(context: str) -> str: """Build the full system prompt with retrieved documentation context.""" doc_section = ( f"\n═══════════════════════════════════════════════════════════════\n" f" RETRIEVED DOCUMENTATION\n" f"═══════════════════════════════════════════════════════════════\n\n" f"{context}\n\n" f"Use the documentation above to ground your answers with specific endpoints, \n" f"headers, and request/response formats. If the docs don't fully cover the \n" f"developer's question, supplement with your deep knowledge of these APIs \n" f"and clearly distinguish between doc-sourced facts and your recommendations." ) if context else ( "\nNo specific documentation was retrieved for this query. " "Rely on your broad knowledge of fintech APIs, but clearly state " "when you're working from general knowledge rather than specific docs." ) return SYSTEM_PROMPT_CORE + doc_section def build_local_system_prompt(context: str) -> str: """Shorter system prompt optimised for the local 1.5B model's context window.""" return ( "You are ocs4dev, a senior fintech integration engineer specializing in " "payment APIs (Stripe, PayPal, MTN MoMo, Pesapal, Sentezo, Square, Adyen, and more).\n\n" "Rules:\n" "- Always include working code examples with error handling\n" "- Ask follow-up questions when the intent is unclear\n" "- If an API doesn't support something, propose a workaround with code\n" "- Use Python by default, fenced code blocks with language tags\n" "- Be concise but complete — developers read code, not essays\n" "- Warn about common pitfalls with ⚠️\n\n" f"Retrieved docs:\n{context if context else 'No docs retrieved — use general knowledge.'}" ) # ───────────────────────────────────────────────────────────────────────────── # Model configurations — March 2026 # ───────────────────────────────────────────────────────────────────────────── MODEL_CONFIGS = { "openai": { "budget": "gpt-5-mini", "premium": "gpt-5.2", }, "anthropic": { "budget": "claude-haiku-4-5", "premium": "claude-opus-4-6", }, "google": { "budget": "gemini-2.0-flash", "premium": "gemini-2.5-pro", }, } MODEL_DISPLAY = { "openai": { "budget": "GPT-5 mini", "premium": "GPT-5.2", }, "anthropic": { "budget": "Claude Haiku 4.5", "premium": "Claude Opus 4.6", }, "google": { "budget": "Gemini 2.0 Flash", "premium": "Gemini 2.5 Pro", }, } # ───────────────────────────────────────────────────────────────────────────── # Assistant Class # ───────────────────────────────────────────────────────────────────────────── class OCS4DevAssistant: def __init__(self): self._setup_environment() self._setup_vector_store() # Local model is lazy-loaded on first use — keeps startup fast self._local_tokenizer = None self._local_model = None self._model_loaded = False self._model_loading = False self._model_lock = threading.Lock() # ── Environment ────────────────────────────────────────────────────────── def _setup_environment(self): self.openai_key = os.getenv("OPENAI_API_KEY", "") self.anthropic_key = os.getenv("ANTHROPIC_API_KEY", "") self.google_key = os.getenv("GOOGLE_API_KEY", "") # ── Vector Store ───────────────────────────────────────────────────────── def _setup_vector_store(self): """Load local FAISS index built by build_index.py""" if not os.path.exists(FAISS_INDEX_DIR): print(f"⚠️ FAISS index not found at '{FAISS_INDEX_DIR}'.") print(" Run python build_index.py to create it.") self.vector_store = None return try: print("🔄 Loading FAISS vector index...") self.embeddings = HuggingFaceEmbeddings( model_name=EMBEDDING_MODEL, model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": True}, ) self.vector_store = FAISS.load_local( FAISS_INDEX_DIR, self.embeddings, allow_dangerous_deserialization=True, ) print("✅ FAISS vector store loaded!") except Exception as e: print(f"❌ Failed to load FAISS index: {e}") self.vector_store = None # ── Local Model (lazy) ─────────────────────────────────────────────────── def respond( self, message: str, history: list, provider: str = "local", tier: str = "budget", openai_key: str = "", anthropic_key: str = "", google_key: str = "", ) -> Generator[str, None, None]: """ Chat handler for gr.ChatInterface — yield plain string chunks. ChatInterface manages history format automatically (no dict wrangling here). """ # Ensure values are strings (Gradio examples might pass None for some inputs) provider = provider or "local" tier = tier or "budget" if not message.strip(): return # ── Get FAISS retrieval context ─────────────────────────────────────── context = self.get_context(message) system_content = build_system_prompt(context) # ── Local model path ────────────────────────────────────────────────── if provider == "local": yield from self._local_respond(message, history, system_content) return # ── API model path ──────────────────────────────────────────────────── key_map = { "openai": openai_key or self.openai_key, "anthropic": anthropic_key or self.anthropic_key, "google": google_key or self.google_key, } api_key = key_map.get(provider, "") if not api_key: yield ( f"❌ No API key for **{provider}**.\n\n" f"Open ⚙️ **Settings → API Keys** and enter your {provider.title()} key." ) return try: llm = self._get_llm(provider, tier, api_key) # Build message list: system + history + new user message # Direct stream — avoids LCEL pipeline blocking Gradio's async loop messages = [SystemMessage(content=system_content)] for entry in history: role = entry.get("role", "user") if isinstance(entry, dict) else getattr(entry, "role", "user") content = entry.get("content") or "" if isinstance(entry, dict) else getattr(entry, "content", "") content = str(content) if role == "user": messages.append(HumanMessage(content=content)) elif role == "assistant": messages.append(AIMessage(content=content)) messages.append(HumanMessage(content=message)) response = "" for chunk in llm.stream(messages): response += chunk.content yield response except Exception as e: yield f"❌ Error calling {provider} API: {str(e)}" def _local_respond( self, message: str, history: list, system_content: str ) -> Generator[str, None, None]: """Stream response from the local Qwen model.""" if not self._load_local_model(): yield ( "⚠️ **Local model failed to load.**\n\n" "This usually means the model weights need to be downloaded first, " "or a quantization library (`auto-gptq`, `bitsandbytes`) is missing.\n\n" "Try selecting **Google / OpenAI / Anthropic** instead and entering an API key." ) return msgs = [{"role": "system", "content": system_content}] for entry in history[-8:]: role = entry.get("role", "user") if isinstance(entry, dict) else getattr(entry, "role", "user") content = entry.get("content") or "" if isinstance(entry, dict) else getattr(entry, "content", "") content = str(content) if role in ("user", "assistant") and content: msgs.append({"role": role, "content": content}) msgs.append({"role": "user", "content": message}) try: formatted = self._local_tokenizer.apply_chat_template( msgs, tokenize=False, add_generation_prompt=True ) inputs = self._local_tokenizer(formatted, return_tensors="pt") input_ids = inputs["input_ids"] mask = inputs["attention_mask"] streamer = TextIteratorStreamer( self._local_tokenizer, skip_prompt=True, skip_special_tokens=True, ) gen_kwargs = dict( input_ids=input_ids, attention_mask=mask, streamer=streamer, max_new_tokens=16384, temperature=0.3, do_sample=True, pad_token_id=self._local_tokenizer.eos_token_id, ) thread = threading.Thread(target=self._local_model.generate, kwargs=gen_kwargs) thread.start() accumulated = "" for token in streamer: accumulated += token yield accumulated thread.join() except Exception as e: yield f"❌ Local model error: {str(e)}" def _load_local_model(self) -> bool: """Attempt to load Qwen2.5-Coder-3B-Int4. Returns True on success.""" if self._model_loaded: return True with self._model_lock: if self._model_loaded: # Double-checked locking return True if self._model_loading: return False self._model_loading = True print(f"\n🚀 Loading local model: {LOCAL_MODEL_ID}...") try: device = "cuda" if torch.cuda.is_available() else "cpu" print(f" Device: {device}") self._local_tokenizer = AutoTokenizer.from_pretrained( LOCAL_MODEL_ID, trust_remote_code=True, ) self._local_model = AutoModelForCausalLM.from_pretrained( LOCAL_MODEL_ID, dtype=torch.float16 if device == "cuda" else torch.float32, device_map="auto" if device == "cuda" else None, trust_remote_code=True, low_cpu_mem_usage=True, ) if device == "cpu": self._local_model = self._local_model.to("cpu") self._model_loaded = True self._model_loading = False print("✅ Local model loaded!") return True except Exception as e: print(f"❌ Failed to load local model: {e}") self._model_loading = False return False # ── Retrieval ───────────────────────────────────────────────────────────── def get_context(self, query: str) -> str: """Return relevant doc chunks from FAISS as a string.""" if not self.vector_store: return "" try: docs = self.vector_store.similarity_search(query, k=TOP_K) return "\n\n---\n\n".join( f"[{d.metadata.get('provider', 'Docs')}]\n{d.page_content}" for d in docs ) except Exception as e: print(f"Retrieval error: {e}") return "" def get_retriever(self): """Return a LangChain retriever from the FAISS store.""" if not self.vector_store: return None return self.vector_store.as_retriever( search_type="similarity", search_kwargs={"k": TOP_K}, ) # ── LLM Factory ────────────────────────────────────────────────────────── def _get_llm(self, provider: str, tier: str, api_key: str): """Return a streaming-capable LangChain LLM instance.""" model_id = MODEL_CONFIGS[provider][tier] common = {"temperature": 0.3, "streaming": True} if provider == "openai": return ChatOpenAI( model=model_id, openai_api_key=api_key, **common, ) elif provider == "anthropic": return ChatAnthropic( model=model_id, max_tokens=128000, anthropic_api_key=api_key, **common, ) elif provider == "google": return ChatGoogleGenerativeAI( model=model_id, google_api_key=api_key, **common, ) raise ValueError(f"Unknown provider: {provider}") # ── RAG Chain (LCEL) ────────────────────────────────────────────────────── def _build_rag_chain(self, llm): """ Build a history-aware RAG chain using LCEL. Compatible with LangChain 1.x (old langchain.chains removed in v1). """ retriever = self.get_retriever() if not retriever: return None # Step 1 — reformulate the question given chat history condense_prompt = ChatPromptTemplate.from_messages([ ("system", "Given the chat history and the latest user question, " "rewrite it as a clear standalone question about fintech API integration. " "Do NOT answer — only reformulate if needed."), MessagesPlaceholder("chat_history"), ("human", "{input}"), ]) condense_chain = condense_prompt | llm | StrOutputParser() def contextualized_retriever(inputs: dict): """Reformulate question if history exists, then retrieve.""" if inputs.get("chat_history"): standalone = condense_chain.invoke(inputs) else: standalone = inputs["input"] return retriever.invoke(standalone) def format_docs(docs) -> str: return "\n\n---\n\n".join( f"[{d.metadata.get('provider', 'Docs')}]\n{d.page_content}" for d in docs ) # Step 2 — answer using retrieved context qa_prompt = ChatPromptTemplate.from_messages([ ("system", SYSTEM_PROMPT_CORE + """ ═══════════════════════════════════════════════════════════════ RETRIEVED DOCUMENTATION ═══════════════════════════════════════════════════════════════ {context} Use the documentation above to ground your answers with specific endpoints, headers, and request/response formats. If the docs don't fully cover the developer's question, supplement with your deep knowledge of these APIs and clearly distinguish between doc-sourced facts and your recommendations."""), MessagesPlaceholder("chat_history"), ("human", "{input}"), ]) # LCEL chain: assign context → prompt → llm → parse rag_chain = ( RunnablePassthrough.assign( context=RunnableLambda(contextualized_retriever) | format_docs ) | qa_prompt | llm | StrOutputParser() ) return rag_chain # ── Local Model Inference ───────────────────────────────────────────────── def _generate_local_stream( self, message: str, history: List[Tuple[str, str]] ) -> Generator[str, None, None]: """Stream tokens from the local Qwen model.""" if not self._load_local_model(): yield "⏳ Local model is loading, please wait a moment then try again." return context = self.get_context(message) system = build_local_system_prompt(context) # Build messages list — history is Gradio 6 dict format messages = [{"role": "system", "content": system}] # Include last 8 entries of history (4 turns) for entry in history[-8:]: role = entry.get("role", "user") content = entry.get("content") or "" if role in ("user", "assistant") and content: messages.append({"role": role, "content": str(content)}) messages.append({"role": "user", "content": message}) formatted = self._local_tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = self._local_tokenizer(formatted, return_tensors="pt") input_ids = inputs["input_ids"] mask = inputs["attention_mask"] streamer = TextIteratorStreamer( self._local_tokenizer, skip_prompt=True, skip_special_tokens=True, ) gen_kwargs = dict( input_ids=input_ids, attention_mask=mask, streamer=streamer, max_new_tokens=16384, temperature=0.3, do_sample=True, pad_token_id=self._local_tokenizer.eos_token_id, ) thread = threading.Thread( target=self._local_model.generate, kwargs=gen_kwargs, ) thread.start() accumulated = "" for token in streamer: accumulated += token yield accumulated thread.join() # ── Main Chat Dispatcher ────────────────────────────────────────────────── def chat_stream( self, message: str, history: list, # Gradio 6: [{"role": ..., "content": ...}] provider: str, tier: str, openai_key: str, anthropic_key: str, google_key: str, ) -> Generator: """ Yields (updated_history, cleared_input) pairs for Gradio streaming. All providers use FAISS for retrieval context. """ if not message.strip(): yield history, "" return # ── Local model path ────────────────────────────────────────────────── if provider == "local": new_history = history + [(message, "")] for partial in self._generate_local_stream(message, history): new_history[-1] = (message, partial) yield new_history, "" return # ── API model path ──────────────────────────────────────────────────── key_map = { "openai": openai_key or self.openai_key, "anthropic": anthropic_key or self.anthropic_key, "google": google_key or self.google_key, } api_key = key_map.get(provider, "") if not api_key: err_msg = ( f"❌ No API key provided for **{provider}**.\n\n" f"Open the ⚙️ **Settings** panel and enter your {provider.title()} API key." ) yield history + [(message, err_msg)], "" return try: llm = self._get_llm(provider, tier, api_key) rag_chain = self._build_rag_chain(llm) # Convert Gradio history → LangChain messages lc_history = [] for h, a in history: lc_history.append(HumanMessage(content=h)) lc_history.append(AIMessage(content=a)) partial = "" new_history = history + [(message, "")] if rag_chain: for chunk in rag_chain.stream( {"input": message, "chat_history": lc_history} ): # LCEL chain yields strings directly (not dicts) if isinstance(chunk, str): partial += chunk elif isinstance(chunk, dict) and "answer" in chunk: partial += chunk["answer"] new_history[-1] = (message, partial) yield new_history, "" else: # Fallback: no FAISS — direct call with injected context context = self.get_context(message) prompt = ( f"You are ocs4dev, a fintech API expert.\n\n" f"Context:\n{context}\n\nQuestion:\n{message}" ) for chunk in llm.stream(prompt): partial += chunk.content new_history[-1] = {"role": "assistant", "content": partial} yield new_history, "" except Exception as e: err = append_msg(append_msg(history, "user", message), "assistant", f"❌ Error: {str(e)}") yield err, "" # ───────────────────────────────────────────────────────────────────────────── # Gradio Interface # ───────────────────────────────────────────────────────────────────────────── def create_gradio_interface(): print("🚀 Starting ocs4dev — Fintech API Integration Assistant") try: assistant = OCS4DevAssistant() print("✅ ocs4dev initialized!") except Exception as e: print(f"❌ Initialization failed: {e}") return None css = """ /* ── Hide ALL scrollbars ── */ *, *::before, *::after { scrollbar-width: none !important; -ms-overflow-style: none !important; } *::-webkit-scrollbar { display: none !important; width: 0 !important; height: 0 !important; } .footer { display: none !important; } /* Info / warning boxes */ .info-box { background: var(--background-fill-secondary); border-left: 4px solid var(--color-accent); border-radius: 4px; padding: 10px 14px; margin: 8px 0; font-size: 13px; } .warn-box { background: var(--background-fill-secondary); border-left: 4px solid #f59e0b; border-radius: 4px; padding: 10px 14px; margin: 8px 0; font-size: 13px; } .status-ok { color: #22c55e !important; font-weight: 600; } .status-err { color: #ef4444 !important; font-weight: 600; } /* ── Submit / Stop buttons live inside the Textbox in Gradio 6.x ── */ /* Ensure they are never clipped or collapsed */ .textbox-wrap button, .textbox button { display: inline-flex !important; visibility: visible !important; opacity: 1 !important; flex-shrink: 0 !important; } """ rag_status = ( '✅ FAISS index loaded' if assistant.vector_store else '⚠️ FAISS index missing — run build_index.py' ) with gr.Blocks( title="ocs4dev — Fintech API Assistant", theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate", neutral_hue="slate"), css=css, fill_height=True, ) as interface: # ── Settings Sidebar ────────────────────────────────────────────────── with gr.Sidebar(open=True): gr.Markdown("## ⚙️ Settings") with gr.Group(): gr.Markdown("### 🤖 Model Provider") provider = gr.Radio( choices=["local", "openai", "anthropic", "google"], value="local", label="Provider", info="Local = free (Qwen 1.5B, no key). API = cloud (needs key).", ) tier = gr.Radio( choices=["budget", "premium"], value="budget", label="Tier", info="Budget: faster & cheaper | Premium: most capable", ) gr.HTML("""
Available models:
🔵 Google: Gemini 2.0 Flash / Gemini 2.5 Pro
🟢 OpenAI: GPT-5 mini / GPT-5.2
🟠 Anthropic: Claude Haiku 4.5 / Claude Opus 4.6
⚫ Local: Qwen2.5-Coder-1.5B-Instruct (free, no key)
""") with gr.Accordion("🔑 API Keys", open=True): gr.HTML( '
⚠️ Security: Use dev/test keys only. ' "Never paste production keys into shared UIs.
" ) openai_key = gr.Textbox( placeholder="sk-...", label="OpenAI key", type="password", info="GPT-5 mini (budget) / GPT-5.2 (premium)", ) anthropic_key = gr.Textbox( placeholder="sk-ant-...", label="Anthropic key", type="password", info="Claude Haiku 4.5 (budget) / Claude Opus 4.6 (premium)", ) google_key = gr.Textbox( placeholder="AIza...", label="Google key", type="password", info="Gemini 2.0 Flash (budget) / Gemini 2.5 Pro (premium)", ) gr.Markdown( "[Get OpenAI key](https://platform.openai.com/api-keys) · " "[Get Anthropic key](https://console.anthropic.com/) · " "[Get Google key](https://aistudio.google.com/apikey)" ) with gr.Accordion("📚 Knowledge Base", open=False): gr.HTML(f"""
{rag_status}

📱 MTN MoMo — Auth, Collections, Disbursements, Remittances
💳 Pesapal — E-commerce API 3.0, POS, Recurring Payments
💰 Sentezo — Wallet Deposits, Withdrawals, Bank Transfers

All retrieval is local via FAISS (no cloud DB).
""") # ── Chat Interface ───────────────────────────────────────────────────── gr.Markdown("# 🏦 ocs4dev — Fintech API Integration Assistant") gr.Markdown( "*Your AI-powered fintech integration partner. Stripe, PayPal, MTN MoMo, Pesapal, " "Sentezo, Square, Adyen & more. " "Select a model in ⚙️ Settings, then start chatting.*" ) gr.ChatInterface( fn=assistant.respond, additional_inputs=[provider, tier, openai_key, anthropic_key, google_key], chatbot=gr.Chatbot( height=480, placeholder=( "### 👋 Welcome to ocs4dev!\n" "Ask me anything about payment API integration:\n" "- Authentication, tokens & API keys\n" "- Code examples with error handling\n" "- Webhook / callback setup\n" "- Subscriptions, payouts & refunds\n" "- Error codes & debugging" ), label="ocs4dev", render_markdown=True, avatar_images=( None, "https://huggingface.co/front/assets/huggingface_logo-noborder.svg", ), ), textbox=gr.Textbox( placeholder="Ask about Stripe payments, MTN MoMo auth, webhook setup, virtual accounts...", label="", lines=2, scale=7, submit_btn="Send ➤", stop_btn="Stop ■", ), examples=[ ["How do I create a Stripe Checkout session with error handling?"], ["Show me MTN MoMo API authentication and access token flow"], ["How do I verify Stripe webhook signatures in Python?"], ["What are the required headers for Sentezo Wallet deposit API?"], ["How do I handle 3D Secure / SCA for card payments?"], ["Show me how to set up PayPal subscriptions with recurring billing"], ["How do I implement idempotent payment requests?"], ["Compare Stripe vs Pesapal for e-commerce in East Africa"], ["How do I build a virtual account system on top of a single-wallet API?"], ], fill_height=True, ) gr.Markdown( "Built with ❤️ by Aaron · " "Qwen2.5-Coder · LangChain · FAISS · Gradio" ) return interface # ───────────────────────────────────────────────────────────────────────────── # Entry point # ───────────────────────────────────────────────────────────────────────────── def main(): interface = create_gradio_interface() if interface: interface.launch( server_name="0.0.0.0", server_port=7860, share=False, inbrowser=False, show_error=True, quiet=False, max_threads=10, ) if __name__ == "__main__": main()