abdelhaqueidali commited on
Commit
913bd94
·
verified ·
1 Parent(s): e6dff77

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +242 -0
app.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import sqlite3
3
+ import unicodedata
4
+ from typing import List, Dict
5
+ import html
6
+ import re
7
+
8
+ def normalize_text(text: str, language: str) -> str:
9
+ """Normalize text based on language-specific rules."""
10
+ if not text:
11
+ return text
12
+
13
+ if language == "Arabic":
14
+ text = text.replace('أ', 'ا').replace('إ', 'ا').replace('آ', 'ا')
15
+ text = ''.join(c for c in unicodedata.normalize('NFKD', text)
16
+ if not unicodedata.category(c).startswith('M'))
17
+ elif language == "French":
18
+ text = ''.join(c for c in unicodedata.normalize('NFKD', text)
19
+ if not unicodedata.category(c).startswith('M'))
20
+ elif language == "Amazigh":
21
+ text = text.replace('ⵕ', 'ⵔ').replace('ⵯ', '')
22
+
23
+ return text.lower()
24
+
25
+ def search_dictionary(search_term: str,
26
+ language: str,
27
+ exact_match: bool,
28
+ word_match: bool,
29
+ contains: bool,
30
+ starts_with: bool,
31
+ ends_with: bool) -> str:
32
+ if not search_term or search_term.isspace():
33
+ return "<p>Please enter a search term</p>"
34
+
35
+ conn = sqlite3.connect('asawal_amqran.db')
36
+ cursor = conn.cursor()
37
+
38
+ normalized_search = normalize_text(search_term, language)
39
+
40
+ search_columns = {
41
+ "Amazigh": ["word", "latin", "construct", "plural", "acc", "accneg", "inacc",
42
+ "variante", "feminine", "fem_construct", "fem_plural",
43
+ "fem_plural_construct", "exp_zgh"],
44
+ "Arabic": ["arabic", "exp_ara", "mean_ar"],
45
+ "French": ["french", "exp_fra"]
46
+ }.get(language, [])
47
+
48
+ if not search_columns:
49
+ return "<p>Please select a language</p>"
50
+
51
+ if not any([exact_match, word_match, contains, starts_with, ends_with]):
52
+ return "<p>Please select at least one search option</p>"
53
+
54
+ # Store results with their priority level (lower number = higher priority)
55
+ priority_results = []
56
+ seen_word_ids = set()
57
+
58
+ # Priority 1: Exact Match
59
+ if exact_match:
60
+ conditions = [f"LOWER({col}) = ?" for col in search_columns]
61
+ query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
62
+ params = [normalized_search] * len(search_columns)
63
+ cursor.execute(query, params)
64
+ column_names = [desc[0] for desc in cursor.description]
65
+ word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
66
+ for row in cursor.fetchall():
67
+ if word_id_idx != -1:
68
+ word_id = row[word_id_idx]
69
+ if word_id not in seen_word_ids:
70
+ seen_word_ids.add(word_id)
71
+ priority_results.append((1, row))
72
+
73
+ # Priority 2: Exact Word Match
74
+ if word_match:
75
+ conditions = []
76
+ for col in search_columns:
77
+ conditions.extend([
78
+ f"LOWER({col}) = ?",
79
+ f"LOWER({col}) LIKE ? AND LOWER({col}) NOT LIKE ?",
80
+ f"LOWER({col}) LIKE ? AND LOWER({col}) NOT LIKE ?",
81
+ f"LOWER({col}) LIKE ? AND LOWER({col}) NOT LIKE ?"
82
+ ])
83
+ query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
84
+ params = []
85
+ for _ in search_columns:
86
+ params.extend([
87
+ normalized_search,
88
+ f"{normalized_search} %", f"%{normalized_search}%",
89
+ f"% {normalized_search}", f"%{normalized_search}%",
90
+ f"% {normalized_search} %", f"%{normalized_search}%"
91
+ ])
92
+ cursor.execute(query, params)
93
+ column_names = [desc[0] for desc in cursor.description]
94
+ word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
95
+ for row in cursor.fetchall():
96
+ if word_id_idx != -1:
97
+ word_id = row[word_id_idx]
98
+ if word_id not in seen_word_ids:
99
+ seen_word_ids.add(word_id)
100
+ priority_results.append((2, row))
101
+
102
+ # Priority 3: Contains
103
+ if contains:
104
+ conditions = [f"LOWER({col}) LIKE ?" for col in search_columns]
105
+ query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
106
+ params = [f"%{normalized_search}%"] * len(search_columns)
107
+ cursor.execute(query, params)
108
+ column_names = [desc[0] for desc in cursor.description]
109
+ word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
110
+ for row in cursor.fetchall():
111
+ if word_id_idx != -1:
112
+ word_id = row[word_id_idx]
113
+ if word_id not in seen_word_ids:
114
+ seen_word_ids.add(word_id)
115
+ priority_results.append((3, row))
116
+
117
+ # Priority 4: Starts With
118
+ if starts_with:
119
+ conditions = [f"LOWER({col}) LIKE ?" for col in search_columns]
120
+ query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
121
+ params = [f"{normalized_search}%"] * len(search_columns)
122
+ cursor.execute(query, params)
123
+ column_names = [desc[0] for desc in cursor.description]
124
+ word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
125
+ for row in cursor.fetchall():
126
+ if word_id_idx != -1:
127
+ word_id = row[word_id_idx]
128
+ if word_id not in seen_word_ids:
129
+ seen_word_ids.add(word_id)
130
+ priority_results.append((4, row))
131
+
132
+ # Priority 5: Ends With
133
+ if ends_with:
134
+ conditions = [f"LOWER({col}) LIKE ?" for col in search_columns]
135
+ query = f"SELECT * FROM lexie WHERE {' OR '.join(conditions)}"
136
+ params = [f"%{normalized_search}"] * len(search_columns)
137
+ cursor.execute(query, params)
138
+ column_names = [desc[0] for desc in cursor.description]
139
+ word_id_idx = column_names.index('word_id') if 'word_id' in column_names else -1
140
+ for row in cursor.fetchall():
141
+ if word_id_idx != -1:
142
+ word_id = row[word_id_idx]
143
+ if word_id not in seen_word_ids:
144
+ seen_word_ids.add(word_id)
145
+ priority_results.append((5, row))
146
+
147
+ conn.close()
148
+
149
+ if not priority_results:
150
+ return "<p>No results found</p>"
151
+
152
+ # Sort by priority (1 = exact match, 2 = word match, etc.)
153
+ priority_results.sort(key=lambda x: x[0])
154
+ results = [row for priority, row in priority_results]
155
+
156
+ # Format results as HTML
157
+ html_output = "<div style='font-family: Arial, sans-serif;'>"
158
+ column_names = [desc[0] for desc in cursor.description]
159
+
160
+ for result in results:
161
+ result_dict = dict(zip(column_names, result))
162
+
163
+ html_output += "<div style='border: 1px solid #ccc; margin: 10px; padding: 15px; position: relative;'>"
164
+
165
+ if 'source' in result_dict and result_dict['source']:
166
+ html_output += f"<div style='text-align: center; font-style: italic;'>{html.escape(result_dict['source'])}</div>"
167
+ if 'category' in result_dict and result_dict['category']:
168
+ html_output += f"<div style='position: absolute; top: 10px; right: 10px; font-weight: bold;'>{html.escape(result_dict['category'])}</div>"
169
+
170
+ html_output += "<h3>Word</h3><ul>"
171
+ for field, label in [
172
+ ('word', 'Word'), ('latin', 'Latin'), ('construct', 'Construct'),
173
+ ('plural', 'Plural'), ('acc', 'Accusative'), ('accneg', 'Negative Accusative'),
174
+ ('inacc', 'Inaccusative'), ('variante', 'Variant'), ('feminine', 'Feminine'),
175
+ ('fem_construct', 'Feminine Construct'), ('fem_plural', 'Feminine Plural'),
176
+ ('fem_plural_construct', 'Feminine Plural Construct')
177
+ ]:
178
+ if field in result_dict and result_dict[field]:
179
+ html_output += f"<li><strong>{label}:</strong> {html.escape(result_dict[field])}</li>"
180
+ html_output += "</ul>"
181
+
182
+ html_output += "<h3>Translations</h3><ul>"
183
+ if 'french' in result_dict and result_dict['french']:
184
+ html_output += f"<li><strong>French:</strong> {html.escape(result_dict['french'])}</li>"
185
+ if 'arabic' in result_dict and result_dict['arabic']:
186
+ html_output += f"<li><strong>Arabic:</strong> {html.escape(result_dict['arabic'])}</li>"
187
+ if 'mean_ar' in result_dict and result_dict['mean_ar']:
188
+ html_output += f"<li><strong>Arabic Meaning:</strong> {html.escape(result_dict['mean_ar'])}</li>"
189
+ html_output += "</ul>"
190
+
191
+ html_output += "<h3>Expressions</h3><ul>"
192
+ for field, label in [
193
+ ('exp_zgh', 'Amazigh Expression'), ('exp_fra', 'French Expression'),
194
+ ('exp_ara', 'Arabic Expression')
195
+ ]:
196
+ if field in result_dict and result_dict[field]:
197
+ html_output += f"<li><strong>{label}:</strong> {html.escape(result_dict[field])}</li>"
198
+ html_output += "</ul>"
199
+
200
+ html_output += "</div>"
201
+
202
+ html_output += "</div>"
203
+ return html_output
204
+
205
+ # Gradio interface remains unchanged
206
+ with gr.Blocks(title="Dictionary Search") as demo:
207
+ gr.Markdown("# Dictionary Search")
208
+
209
+ with gr.Row():
210
+ with gr.Column(scale=1):
211
+ search_input = gr.Textbox(label="Search Term", placeholder="Enter search term...")
212
+ search_button = gr.Button("Search")
213
+
214
+ gr.Markdown("### Language Options")
215
+ language = gr.Radio(
216
+ choices=["Amazigh", "Arabic", "French"],
217
+ label="Select Language",
218
+ value="Arabic"
219
+ )
220
+
221
+ gr.Markdown("### Search Options")
222
+ exact_match = gr.Checkbox(label="Exact Match (whole cell)", value=True)
223
+ word_match = gr.Checkbox(label="Exact Word Match (within cell)", value=True)
224
+ contains = gr.Checkbox(label="Contains", value=True)
225
+ starts_with = gr.Checkbox(label="Starts With", value=False)
226
+ ends_with = gr.Checkbox(label="Ends With", value=False)
227
+
228
+ with gr.Column(scale=3):
229
+ output = gr.HTML(label="Results")
230
+
231
+ search_input.submit(
232
+ search_dictionary,
233
+ inputs=[search_input, language, exact_match, word_match, contains, starts_with, ends_with],
234
+ outputs=output
235
+ )
236
+ search_button.click(
237
+ search_dictionary,
238
+ inputs=[search_input, language, exact_match, word_match, contains, starts_with, ends_with],
239
+ outputs=output
240
+ )
241
+
242
+ demo.launch()