Spaces:

abdelhaqueidali
/

Asawal-Amqran-Dictionary

Running

App Files Files Community

abdelhaqueidali commited on Mar 16, 2025

Commit

913bd94

verified ·

1 Parent(s): e6dff77

Create app.py

Browse files

Files changed (1) hide show

app.py +242 -0

app.py ADDED Viewed

	@@ -0,0 +1,242 @@

+import gradio as gr
+import sqlite3
+import unicodedata
+from typing import List, Dict
+import html
+import re
+def normalize_text(text: str, language: str) -> str:
+    """Normalize text based on language-specific rules."""
+    if not text:
+        return text
+    if language == "Arabic":
+        text = text.replace('أ', 'ا').replace('إ', 'ا').replace('آ', 'ا')
+        text = ''.join(c for c in unicodedata.normalize('NFKD', text)
+                      if not unicodedata.category(c).startswith('M'))
+    elif language == "French":
+        text = ''.join(c for c in unicodedata.normalize('NFKD', text)
+                      if not unicodedata.category(c).startswith('M'))
+    elif language == "Amazigh":
+        text = text.replace('ⵕ', 'ⵔ').replace('ⵯ', '')
+    return text.lower()
+def search_dictionary(search_term: str,
+                     language: str,
+                     exact_match: bool,
+                     word_match: bool,
+                     contains: bool,
+                     starts_with: bool,
+                     ends_with: bool) -> str:
+    if not search_term or search_term.isspace():
+        return "<p>Please enter a search term</p>"
+    conn = sqlite3.connect('asawal_amqran.db')
+    cursor = conn.cursor()
+    normalized_search = normalize_text(search_term, language)
+    search_columns = {
+        "Amazigh": ["word", "latin", "construct", "plural", "acc", "accneg", "inacc",
+                   "variante", "feminine", "fem_construct", "fem_plural",
+                   "fem_plural_construct", "exp_zgh"],
+        "Arabic": ["arabic", "exp_ara", "mean_ar"],
+        "French": ["french", "exp_fra"]
+    }.get(language, [])
+    if not search_columns:
+        return "<p>Please select a language</p>"
+    if not any([exact_match, word_match, contains, starts_with, ends_with]):
+        return "<p>Please select at least one search option</p>"
+    # Store results with their priority level (lower number = higher priority)
+    priority_results = []
+    seen_word_ids = set()
+    # Priority 1: Exact Match
+    if exact_match:
+        conditions = [f"LOWER({col}) = ?" for col in search_columns]
+        query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
+        params = [normalized_search] * len(search_columns)
+        cursor.execute(query, params)
+        column_names = [desc[0] for desc in cursor.description]
+        word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
+        for row in cursor.fetchall():
+            if word_id_idx != -1:
+                word_id = row[word_id_idx]
+                if word_id not in seen_word_ids:
+                    seen_word_ids.add(word_id)
+                    priority_results.append((1, row))
+    # Priority 2: Exact Word Match
+    if word_match:
+        conditions = []
+        for col in search_columns:
+            conditions.extend([
+                f"LOWER({col}) = ?",
+                f"LOWER({col}) LIKE ? AND LOWER({col}) NOT LIKE ?",
+                f"LOWER({col}) LIKE ? AND LOWER({col}) NOT LIKE ?",
+                f"LOWER({col}) LIKE ? AND LOWER({col}) NOT LIKE ?"
+            ])
+        query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
+        params = []
+        for _ in search_columns:
+            params.extend([
+                normalized_search,
+                f"{normalized_search} %", f"%{normalized_search}%",
+                f"% {normalized_search}", f"%{normalized_search}%",
+                f"% {normalized_search} %", f"%{normalized_search}%"
+            ])
+        cursor.execute(query, params)
+        column_names = [desc[0] for desc in cursor.description]
+        word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
+        for row in cursor.fetchall():
+            if word_id_idx != -1:
+                word_id = row[word_id_idx]
+                if word_id not in seen_word_ids:
+                    seen_word_ids.add(word_id)
+                    priority_results.append((2, row))
+    # Priority 3: Contains
+    if contains:
+        conditions = [f"LOWER({col}) LIKE ?" for col in search_columns]
+        query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
+        params = [f"%{normalized_search}%"] * len(search_columns)
+        cursor.execute(query, params)
+        column_names = [desc[0] for desc in cursor.description]
+        word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
+        for row in cursor.fetchall():
+            if word_id_idx != -1:
+                word_id = row[word_id_idx]
+                if word_id not in seen_word_ids:
+                    seen_word_ids.add(word_id)
+                    priority_results.append((3, row))
+    # Priority 4: Starts With
+    if starts_with:
+        conditions = [f"LOWER({col}) LIKE ?" for col in search_columns]
+        query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
+        params = [f"{normalized_search}%"] * len(search_columns)
+        cursor.execute(query, params)
+        column_names = [desc[0] for desc in cursor.description]
+        word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
+        for row in cursor.fetchall():
+            if word_id_idx != -1:
+                word_id = row[word_id_idx]
+                if word_id not in seen_word_ids:
+                    seen_word_ids.add(word_id)
+                    priority_results.append((4, row))
+    # Priority 5: Ends With
+    if ends_with:
+        conditions = [f"LOWER({col}) LIKE ?" for col in search_columns]
+        query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
+        params = [f"%{normalized_search}"] * len(search_columns)
+        cursor.execute(query, params)
+        column_names = [desc[0] for desc in cursor.description]
+        word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
+        for row in cursor.fetchall():
+            if word_id_idx != -1:
+                word_id = row[word_id_idx]
+                if word_id not in seen_word_ids:
+                    seen_word_ids.add(word_id)
+                    priority_results.append((5, row))
+    conn.close()
+    if not priority_results:
+        return "<p>No results found</p>"
+    # Sort by priority (1 = exact match, 2 = word match, etc.)
+    priority_results.sort(key=lambda x: x[0])
+    results = [row for priority, row in priority_results]
+    # Format results as HTML
+    html_output = "<div style='font-family: Arial, sans-serif;'>"
+    column_names = [desc[0] for desc in cursor.description]
+    for result in results:
+        result_dict = dict(zip(column_names, result))
+        html_output += "<div style='border: 1px solid #ccc; margin: 10px; padding: 15px; position: relative;'>"
+        if 'source' in result_dict and result_dict['source']:
+            html_output += f"<div style='text-align: center; font-style: italic;'>{html.escape(result_dict['source'])}</div>"
+        if 'category' in result_dict and result_dict['category']:
+            html_output += f"<div style='position: absolute; top: 10px; right: 10px; font-weight: bold;'>{html.escape(result_dict['category'])}</div>"
+        html_output += "<h3>Word</h3><ul>"
+        for field, label in [
+            ('word', 'Word'), ('latin', 'Latin'), ('construct', 'Construct'),
+            ('plural', 'Plural'), ('acc', 'Accusative'), ('accneg', 'Negative Accusative'),
+            ('inacc', 'Inaccusative'), ('variante', 'Variant'), ('feminine', 'Feminine'),
+            ('fem_construct', 'Feminine Construct'), ('fem_plural', 'Feminine Plural'),
+            ('fem_plural_construct', 'Feminine Plural Construct')
+        ]:
+            if field in result_dict and result_dict[field]:
+                html_output += f"<li><strong>{label}:</strong> {html.escape(result_dict[field])}</li>"
+        html_output += "</ul>"
+        html_output += "<h3>Translations</h3><ul>"
+        if 'french' in result_dict and result_dict['french']:
+            html_output += f"<li><strong>French:</strong> {html.escape(result_dict['french'])}</li>"
+        if 'arabic' in result_dict and result_dict['arabic']:
+            html_output += f"<li><strong>Arabic:</strong> {html.escape(result_dict['arabic'])}</li>"
+        if 'mean_ar' in result_dict and result_dict['mean_ar']:
+            html_output += f"<li><strong>Arabic Meaning:</strong> {html.escape(result_dict['mean_ar'])}</li>"
+        html_output += "</ul>"
+        html_output += "<h3>Expressions</h3><ul>"
+        for field, label in [
+            ('exp_zgh', 'Amazigh Expression'), ('exp_fra', 'French Expression'),
+            ('exp_ara', 'Arabic Expression')
+        ]:
+            if field in result_dict and result_dict[field]:
+                html_output += f"<li><strong>{label}:</strong> {html.escape(result_dict[field])}</li>"
+        html_output += "</ul>"
+        html_output += "</div>"
+    html_output += "</div>"
+    return html_output
+# Gradio interface remains unchanged
+with gr.Blocks(title="Dictionary Search") as demo:
+    gr.Markdown("# Dictionary Search")
+    with gr.Row():
+        with gr.Column(scale=1):
+            search_input = gr.Textbox(label="Search Term", placeholder="Enter search term...")
+            search_button = gr.Button("Search")
+            gr.Markdown("### Language Options")
+            language = gr.Radio(
+                choices=["Amazigh", "Arabic", "French"],
+                label="Select Language",
+                value="Arabic"
+            )
+            gr.Markdown("### Search Options")
+            exact_match = gr.Checkbox(label="Exact Match (whole cell)", value=True)
+            word_match = gr.Checkbox(label="Exact Word Match (within cell)", value=True)
+            contains = gr.Checkbox(label="Contains", value=True)
+            starts_with = gr.Checkbox(label="Starts With", value=False)
+            ends_with = gr.Checkbox(label="Ends With", value=False)
+        with gr.Column(scale=3):
+            output = gr.HTML(label="Results")
+    search_input.submit(
+        search_dictionary,
+        inputs=[search_input, language, exact_match, word_match, contains, starts_with, ends_with],
+        outputs=output
+    )
+    search_button.click(
+        search_dictionary,
+        inputs=[search_input, language, exact_match, word_match, contains, starts_with, ends_with],
+        outputs=output
+    )
+demo.launch()