hfsearch

Sleeping

App Files Files Community

John6666 commited on Jan 4, 2025

Commit

68eb6f0

verified ·

1 Parent(s): 6383f6b

Upload 8 files

Browse files

Files changed (8) hide show

README.md +13 -12
app.py +116 -0
hfconstants.py +7 -0
hfsearch.py +514 -0
pre-requirements.txt +1 -0
requirements.txt +2 -0
subtags.json +0 -0
tags.json +0 -0

README.md CHANGED Viewed

@@ -1,12 +1,13 @@
----
-title: Hfsearch
-emoji: 🏃
-colorFrom: green
-colorTo: purple
-sdk: gradio
-sdk_version: 5.9.1
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Hugging Face🤗 Searcher
+emoji: 🤗🔍
+colorFrom: indigo
+colorTo: purple
+sdk: gradio
+sdk_version: 4.44.0
+app_file: app.py
+pinned: false
+license: mit
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import spaces
+import gradio as gr
+from hfsearch import (HFSearchResult, search, update_filter, update_df, get_labels, get_valid_labels,
+                      get_tags, get_subtag_categories, update_subtag_items, update_tags, update_subtags,
+                      search_ref_repos, DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES)
+from gradio_huggingfacehub_search import HuggingfaceHubSearch
+CSS = """
+.title { align-items: center; text-align: center; }
+.info { align-items: center; text-align: center; }
+"""
+with gr.Blocks(theme="NoCrypt/miku", fill_width=True, css=CSS) as demo:
+    gr.Markdown("# Search Hugging Face🤗", elem_classes="title")
+    with gr.Column():
+        search_result = gr.State(value=HFSearchResult())
+        with gr.Tab("Normal Search"):
+            with gr.Group():
+                with gr.Row(equal_height=True):
+                    repo_types = gr.CheckboxGroup(label="Repo type", choices=["model", "dataset", "space"], value=["model", "dataset", "space"])
+                with gr.Accordion("Advanced", open=False):
+                    with gr.Row(equal_height=True):
+                        filter_str = gr.Textbox(label="Filter", info="String(s) to filter repos", value="")
+                        search_str = gr.Textbox(label="Search", info="A string that will be contained in the returned repo ids", placeholder="bert", value="", lines=1)
+                        author = gr.Textbox(label="Author", info="The author (user or organization)", value="", lines=1)
+                    with gr.Column():
+                        tags = gr.Textbox(label="Tags", info="Tag(s) to filter repos", value="")
+                        with gr.Accordion("Tag input assistance", open=False):
+                            with gr.Row(equal_height=True):
+                                tag_item = gr.Dropdown(label="Item", choices=get_tags(), value=get_tags()[0], allow_custom_value=True, scale=4)
+                                tag_btn = gr.Button("Add", scale=1)
+                            with gr.Row(equal_height=True):
+                                subtag_cat = gr.Dropdown(label="Category", choices=get_subtag_categories(), value=get_subtag_categories()[0], scale=2)
+                                subtag_item = gr.Dropdown(label="Item", choices=[""], value="", allow_custom_value=True, scale=2)
+                                subtug_btn = gr.Button("Add", scale=1)
+                    with gr.Column():
+                        gated_status = gr.Radio(label="Gated status", choices=["gated", "non-gated", "all"], value="all")
+                        appr_status = gr.CheckboxGroup(label="Approval method", choices=["auto", "manual"], value=["auto", "manual"])
+                    with gr.Tab("for Models"):
+                        with gr.Column():
+                            infer_status = gr.Radio(label="Inference status", choices=["warm", "cold", "frozen", "all"], value="all")
+                            gr.Markdown("[About the Inference API status (Warm, Cold, Frozen)](https://huggingface.co/docs/api-inference/supported-models)", elem_classes="info")
+                    #    with gr.Row(equal_height=True):
+                    #        model_task = gr.Textbox(label="Task", info="String(s) of tasks models were designed for", placeholder="fill-mask", value="")
+                    #        trained_dataset = gr.Textbox(label="Trained dataset", info="Trained dataset for a model", value="")
+                    with gr.Tab("for Datasets"):
+                        size_categories = gr.CheckboxGroup(label="Size categories", info="The size of the dataset", choices=DS_SIZE_CATEGORIES, value=[])
+                    #    task_categories = gr.Textbox(label="Task categories", info="Identify datasets by the designed task", value="")
+                    #    task_ids = gr.Textbox(label="Task IDs", info="Identify datasets by the specific task", value="")
+                    #    language_creators = gr.Textbox(label="Language creators", info="Identify datasets with how the data was curated", value="")
+                    #    language = gr.Textbox(label="Language", info="String(s) representing two-character language to filter datasets by", value="")
+                    #    multilinguality = gr.Textbox(label="Multilinguality", info="String(s) representing a filter for datasets that contain multiple languages", value="")
+                    with gr.Tab("for Spaces"):
+                        with gr.Row(equal_height=True):
+                            hardware = gr.CheckboxGroup(label="Specify hardware", choices=SPACE_HARDWARES, value=[])
+                            stage = gr.CheckboxGroup(label="Specify stage", choices=SPACE_STAGES, value=[])
+                    with gr.Row(equal_height=True):
+                        sort = gr.Radio(label="Sort", choices=["last_modified", "likes", "downloads", "trending_score"], value="likes")
+                        sort_method = gr.Radio(label="Sort method", choices=["ascending order", "descending order"], value="ascending order")
+                        limit = gr.Number(label="Limit", info="If 0, fetches all models", value=1000, step=1, minimum=0, maximum=10000000)
+                        fetch_detail = gr.CheckboxGroup(label="Fetch detail", choices=["Space Runtime"], value=["Space Runtime"])
+                    with gr.Row(equal_height=True):
+                        show_labels = gr.CheckboxGroup(label="Show items", choices=get_labels(), value=get_valid_labels())
+                run_button = gr.Button("Search", variant="primary")
+        with gr.Tab("Find Serverless Inference API enabled models"):
+            with gr.Group():
+                with gr.Row(equal_height=True):
+                    infer_repo_types = gr.CheckboxGroup(label="Repo type", choices=["model", "dataset", "space"], value=["model"], visible=False)
+                    with gr.Column():
+                        infer_infer_status = gr.Radio(label="Inference status", choices=["warm", "cold", "frozen", "all"], value="warm")
+                        gr.Markdown("[About the Inference API status (Warm, Cold, Frozen)](https://huggingface.co/docs/api-inference/supported-models)", elem_classes="info")
+                    with gr.Column():
+                        infer_gated_status = gr.Radio(label="Gated status", choices=["gated", "non-gated", "all"], value="all")
+                        infer_appr_status = gr.CheckboxGroup(label="Approval method", choices=["auto", "manual"], value=["auto", "manual"])
+                infer_run_button = gr.Button("Search", variant="primary")
+        with gr.Tab("Find recommended repos"):
+            with gr.Group():
+                with gr.Row(equal_height=True):
+                    #rec_repo_id = gr.Textbox(label="Repo ID", info="Input your favorite repo", value="")
+                    rec_repo_id = HuggingfaceHubSearch(label="Repo ID", placeholder="Input your favorite Repo ID", search_type=["model", "dataset", "space"],
+                                                       sumbit_on_select=False)
+                    rec_repo_types = gr.CheckboxGroup(label="Repo type", choices=["model", "dataset", "space"], value=["model", "dataset", "space"])
+                with gr.Row(equal_height=True):
+                    rec_sort = gr.Radio(label="Sort", choices=["last_modified", "likes", "downloads", "downloads_all_time", "trending_score"], value="likes")
+                    rec_limit = gr.Number(label="Limit", value=20, step=1, minimum=1, maximum=1000)
+                with gr.Accordion("Advanced", open=False):
+                    with gr.Row(equal_height=True):
+                        rec_show_labels = gr.CheckboxGroup(label="Show items", choices=get_labels(), value=get_valid_labels())
+            rec_run_button = gr.Button("Search", variant="primary")
+        with gr.Group():
+            with gr.Accordion("Filter", open=False):
+                hide_labels = gr.CheckboxGroup(label="Hide items", choices=[], value=[], visible=False)
+                with gr.Row(equal_height=True):
+                    filter_item1 = gr.Dropdown(label="Filter item", choices=[""], value="", visible=False)
+                    filter1 = gr.Dropdown(label="Filter", choices=[""], value="", allow_custom_value=True, visible=False)
+                    filter_btn = gr.Button("Apply filter", variant="secondary", visible=False)
+            result_df = gr.DataFrame(label="Results", type="pandas", value=None, interactive=False)
+    run_button.click(search, [repo_types, sort, sort_method, filter_str, search_str, author, tags, infer_status, gated_status, appr_status,
+                              size_categories, limit, hardware, stage, fetch_detail, show_labels, search_result],
+                     [result_df, hide_labels, search_result])\
+    .success(update_filter, [filter_item1, search_result], [filter_item1, filter1, filter_btn, search_result], queue=False)
+    infer_run_button.click(search, [infer_repo_types, sort, sort_method, filter_str, search_str, author, tags, infer_infer_status, infer_gated_status, infer_appr_status,
+                                    size_categories, limit, hardware, stage, fetch_detail, show_labels, search_result],
+                           [result_df, hide_labels, search_result])\
+    .success(update_filter, [filter_item1, search_result], [filter_item1, filter1, filter_btn, search_result], queue=False)
+    gr.on(triggers=[hide_labels.change, filter_btn.click], fn=update_df, inputs=[hide_labels, filter_item1, filter1, search_result],
+          outputs=[result_df, search_result], trigger_mode="once", queue=False, show_api=False)
+    filter_item1.change(update_filter, [filter_item1, search_result], [filter_item1, filter1, filter_btn, search_result], queue=False, show_api=False)
+    subtag_cat.change(update_subtag_items, [subtag_cat], [subtag_item], queue=False, show_api=False)
+    subtug_btn.click(update_subtags, [tags, subtag_cat, subtag_item], [tags], queue=False, show_api=False)
+    tag_btn.click(update_tags, [tags, tag_item], [tags], queue=False, show_api=False)
+    gr.on(triggers=[rec_run_button.click, rec_repo_id.submit], fn=search_ref_repos, inputs=[rec_repo_id, rec_repo_types, rec_sort, rec_show_labels, rec_limit, search_result],
+          outputs=[result_df, hide_labels, search_result])
+demo.queue().launch()

hfconstants.py ADDED Viewed

	@@ -0,0 +1,7 @@

+DS_SIZE_CATEGORIES = ["n<1K", "1K<n<10K", "10K<n<100K", "100K<n<1M", "1M<n<10M", "10M<n<100M",
+    "100M<n<1B", "1B<n<10B", "10B<n<100B", "100B<n<1T", "n>1T"]
+SPACE_HARDWARES = ["cpu-basic", "zero-a10g", "cpu-upgrade", "t4-small", "l4x1", "a10g-large", "l40sx1", "a10g-small", "t4-medium", "cpu-xl", "a100-large"]
+SPACE_STAGES = ["RUNNING", "SLEEPING", "RUNTIME_ERROR", "PAUSED", "BUILD_ERROR", "CONFIG_ERROR", "BUILDING", "APP_STARTING", "RUNNING_APP_STARTING"]

hfsearch.py ADDED Viewed

	@@ -0,0 +1,514 @@

+import spaces
+import gradio as gr
+from huggingface_hub import HfApi, ModelInfo, DatasetInfo, SpaceInfo
+from typing import Union
+import gc
+import pandas as pd
+import datetime
+import json
+import re
+from hfconstants import DS_SIZE_CATEGORIES, SPACE_HARDWARES, SPACE_STAGES
+@spaces.GPU
+def dummy_gpu():
+    pass
+RESULT_ITEMS = {
+    "Type": [1, "str", True],
+    "ID": [2, "markdown", True, "40%"],
+    "Status": [4, "markdown", True],
+    "Gated": [6, "str", True],
+    "Likes": [10, "number", True],
+    "DLs": [12, "number", True],
+    "AllDLs": [13, "number", False],
+    "Trending": [16, "number", True],
+    "LastMod.": [17, "str", True],
+    "Library": [20, "markdown", False],
+    "Pipeline": [21, "markdown", True],
+    "Hardware": [25, "str", False],
+    "Stage": [26, "str", False],
+    "NFAA": [40, "str", False],
+}
+try:
+    with open("tags.json", encoding="utf-8") as f:
+        TAGS = json.load(f)
+    with open("subtags.json", encoding="utf-8") as f:
+        SUBTAGS = json.load(f)
+except Exception as e:
+    TAGS = []
+    SUBTAGS = {}
+    print(e)
+def get_tags():
+    return TAGS[0:1000]
+def get_subtag_categories():
+    return list(SUBTAGS.keys())
+def update_subtag_items(category: str):
+    choices=[""] + list(SUBTAGS.get(category, []))
+    return gr.update(choices=choices, value=choices[0])
+def update_subtags(tags: str, category: str, item: str):
+    addtag = f"{category}:{item}" if item else ""
+    newtags = f"{tags}\n{addtag}" if tags else addtag
+    return newtags
+def update_tags(tags: str, item: str):
+    newtags = f"{tags}\n{item}" if tags else item
+    return newtags
+def get_repo_type(repo_id: str):
+    try:
+        api = HfApi()
+        if api.repo_exists(repo_id=repo_id, repo_type="dataset"): return "dataset"
+        elif api.repo_exists(repo_id=repo_id, repo_type="space"): return "space"
+        elif api.repo_exists(repo_id=repo_id): return "model"
+        else: return None
+    except Exception as e:
+        print(e)
+        raise Exception(f"Repo not found: {repo_id} {e}")
+def sort_dict(d: dict):
+    return dict(sorted(d.items(), key=lambda x: x[1], reverse=True))
+def get_repo_likers(repo_id: str, repo_type: str="model"):
+    try:
+        api = HfApi()
+        user_list = []
+        users = api.list_repo_likers(repo_id=repo_id, repo_type=repo_type)
+        for user in users:
+            user_list.append(user.username)
+        return user_list
+    except Exception as e:
+        print(e)
+        raise Exception(e)
+def get_liked_repos(users: list[str]):
+    try:
+        api = HfApi()
+        likes_dict = {}
+        types_dict = {}
+        for user in users:
+            likes = api.list_liked_repos(user=user)
+            for id in likes.models:
+                likes_dict[id] = likes_dict.get(id, 1) + 1
+                types_dict[id] = "model"
+            for id in likes.datasets:
+                likes_dict[id] = likes_dict.get(id, 1) + 1
+                types_dict[id] = "dataset"
+            for id in likes.spaces:
+                likes_dict[id] = likes_dict.get(id, 1) + 1
+                types_dict[id] = "space"
+        likes_dict = sort_dict(likes_dict)
+        likes_list = list(likes_dict.keys())
+        types_list = [types_dict[x] for x in likes_list]
+        counts_list = list(likes_dict.values())
+        return likes_list, types_list, counts_list
+    except Exception as e:
+        print(e)
+        raise Exception(e)
+def get_repo_collections(repo_id: str, repo_type: str="model", limit=10):
+    try:
+        api = HfApi()
+        if repo_type == "dataset": item = f"datasets/{repo_id}"
+        elif repo_type == "space": item = f"spaces/{repo_id}"
+        else: item = f"models/{repo_id}"
+        cols_dict = {}
+        types_dict = {}
+        cols = api.list_collections(item=item, sort="upvotes", limit=limit)
+        for c in cols:
+            col = api.get_collection(collection_slug=c.slug)
+            for i in col.items:
+                if i.item_type == "paper": continue
+                id = i.item_id
+                cols_dict[id] = cols_dict.get(id, 1) + 1
+                types_dict[id] = i.item_type
+        cols_dict = sort_dict(cols_dict)
+        cols_list = list(cols_dict.keys())
+        types_list = [types_dict[x] for x in cols_list]
+        counts_list = list(cols_dict.values())
+        return cols_list, types_list, counts_list
+    except Exception as e:
+        print(e)
+        raise Exception(e)
+def get_users_collections(users: list[str], limit=10):
+    try:
+        api = HfApi()
+        cols_dict = {}
+        types_dict = {}
+        for user in users[0:6]:
+            cols = api.list_collections(owner=user, sort="upvotes", limit=limit)
+            for c in cols:
+                col = api.get_collection(collection_slug=c.slug)
+                for i in col.items:
+                    if i.item_type == "paper": continue
+                    id = i.item_id
+                    cols_dict[id] = cols_dict.get(id, 1) + 1
+                    types_dict[id] = i.item_type
+        cols_dict = sort_dict(cols_dict)
+        cols_list = list(cols_dict.keys())
+        types_list = [types_dict[x] for x in cols_list]
+        counts_list = list(cols_dict.values())
+        return cols_list, types_list, counts_list
+    except Exception as e:
+        print(e)
+        raise Exception(e)
+def get_ref_repos(repo_id: str):
+    refs = {}
+    types = {}
+    repo_type = get_repo_type(repo_id)
+    likers = get_repo_likers(repo_id, repo_type)[0:10]
+    for i, t, c in zip(*get_liked_repos(likers)):
+        refs[i] = refs.get(i, 0) + c * 2
+        types[i] = t
+    for i, t, c in zip(*get_repo_collections(repo_id, repo_type)):
+        refs[i] = refs.get(i, 0) + c * 5
+        types[i] = t
+    refs = sort_dict(refs)
+    if repo_id in refs.keys(): refs.pop(repo_id)
+    refs_list = list(refs.keys())
+    types_list = [types[x] for x in refs_list]
+    counts_list = list(refs.values())
+    return refs_list, types_list, counts_list
+def str_to_list(s: str):
+    try:
+        m = re.split("\n", s)
+        return [s.strip() for s in list(m)]
+    except Exception:
+        return []
+def is_valid_arg(s: str):
+    return len(str_to_list(s)) > 0
+def get_labels():
+    return list(RESULT_ITEMS.keys())
+def get_valid_labels():
+    return [k for k in list(RESULT_ITEMS.keys()) if RESULT_ITEMS[k][2]]
+def date_to_str(dt: datetime.datetime):
+    return dt.strftime('%Y-%m-%d %H:%M')
+class Labels():
+    VALID_DTYPE = ["str", "number", "bool", "date", "markdown"]
+    def __init__(self):
+        self.types = {}
+        self.orders = {}
+        self.widths = {}
+    def set(self, label: str):
+        if not label in RESULT_ITEMS.keys(): raise Exception(f"Invalid item: {label}")
+        item = RESULT_ITEMS.get(label)
+        if item[1] not in self.VALID_DTYPE: raise Exception(f"Invalid data type: {type}")
+        self.types[label] = item[1]
+        self.orders[label] = item[0]
+        if len(item) > 3: self.widths[label] = item[3]
+        else: self.widths[label] = "10%"
+    def get(self):
+        labels = list(self.types.keys())
+        labels.sort(key=lambda x: self.orders[x])
+        label_types = [self.types[s] for s in labels]
+        return labels, label_types
+    def get_widths(self):
+        labels = list(self.types.keys())
+        label_widths = [self.widths[s] for s in labels]
+        return label_widths
+    def get_null_value(self, type: str):
+        if type == "bool": return False
+        elif type == "number" or type == "date": return 0
+        else: return "None"
+# https://huggingface.co/docs/huggingface_hub/package_reference/hf_api
+# https://huggingface.co/docs/huggingface_hub/package_reference/hf_api#huggingface_hub.ModelInfo
+class HFSearchResult():
+    def __init__(self):
+        self.labels = Labels()
+        self.current_item = {}
+        self.current_item_info = None
+        self.item_list = []
+        self.item_info_list = []
+        self.item_hide_flags = []
+        self.hide_labels = []
+        self.show_labels = []
+        self.filter_items = None
+        self.filters = None
+        gc.collect()
+    def reset(self):
+        self.__init__()
+    def _set(self, data, label: str):
+        self.labels.set(label)
+        self.current_item[label] = data
+    def _next(self):
+        self.item_list.append(self.current_item.copy())
+        self.current_item = {}
+        self.item_info_list.append(self.current_item_info)
+        self.current_item_info = None
+        self.item_hide_flags.append(False)
+    def add_item(self, i: Union[ModelInfo, DatasetInfo, SpaceInfo]):
+        self.current_item_info = i
+        if isinstance(i, ModelInfo): type = "model"
+        elif isinstance(i, DatasetInfo): type = "dataset"
+        elif isinstance(i, SpaceInfo): type = "space"
+        else: return
+        self._set(type, "Type")
+        self._set(i.id, "ID")
+        if i.likes is not None: self._set(i.likes, "Likes")
+        if i.last_modified is not None: self._set(date_to_str(i.last_modified), "LastMod.")
+        if i.trending_score is not None: self._set(int(i.trending_score), "Trending")
+        if i.tags is not None: self._set("True" if "not-for-all-audiences" in i.tags else "False", "NFAA")
+        if type in ["model", "dataset"]:
+            if i.gated is not None: self._set(i.gated if i.gated else "off", "Gated")
+            if i.downloads is not None: self._set(i.downloads, "DLs")
+            if i.downloads_all_time is not None: self._set(i.downloads_all_time, "AllDLs")
+        if type == "model":
+            if i.inference is not None: self._set(i.inference, "Status")
+            if i.library_name is not None: self._set(i.library_name, "Library")
+            if i.pipeline_tag is not None: self._set(i.pipeline_tag, "Pipeline")
+        if type == "space":
+            if i.runtime is not None:
+                self._set(i.runtime.hardware, "Hardware")
+                self._set(i.runtime.stage, "Stage")
+        self._next()
+    def search(self, repo_types: list, sort: str, sort_method: str, filter_str: str, search_str: str, author: str, tags: str, infer: str, gated: str, appr: list[str],
+               size_categories: list, limit: int, hardware: list, stage: list, fetch_detail: list, show_labels: list):
+        try:
+            self.reset()
+            self.show_labels = show_labels.copy()
+            api = HfApi()
+            kwargs = {}
+            mkwargs = {}
+            dkwargs = {}
+            skwargs = {}
+            if filter_str: kwargs["filter"] = str_to_list(filter_str)
+            if search_str: kwargs["search"] = search_str
+            if author: kwargs["author"] = author
+            if tags and is_valid_arg(tags):
+                mkwargs["tags"] = str_to_list(tags)
+                dkwargs["tags"] = str_to_list(tags)
+            if limit > 0: kwargs["limit"] = limit
+            if sort_method == "descending order": kwargs["direction"] = -1
+            if gated == "gated":
+                mkwargs["gated"] = True
+                dkwargs["gated"] = True
+            elif gated == "non-gated":
+                mkwargs["gated"] = False
+                dkwargs["gated"] = False
+            mkwargs["sort"] = sort
+            if len(size_categories) > 0: dkwargs["size_categories"] = size_categories
+            if infer != "all": mkwargs["inference"] = infer
+            if "model" in repo_types:
+                models = api.list_models(full=True, cardData=True, **kwargs, **mkwargs)
+                for model in models:
+                    if model.gated is not None and model.gated and model.gated not in appr: continue
+                    self.add_item(model)
+            if "dataset" in repo_types:
+                datasets = api.list_datasets(full=True, **kwargs, **dkwargs)
+                for dataset in datasets:
+                    if dataset.gated is not None and dataset.gated and dataset.gated not in appr: continue
+                    self.add_item(dataset)
+            if "space" in repo_types:
+                if "Space Runtime" in fetch_detail:
+                    spaces = api.list_spaces(expand=["cardData", "datasets", "disabled", "lastModified", "createdAt",
+                                                     "likes", "models", "private", "runtime", "sdk", "sha", "tags", "trendingScore"], **kwargs, **skwargs)
+                else: spaces = api.list_spaces(full=True, **kwargs, **skwargs)
+                for space in spaces:
+                    if space.gated is not None and space.gated and space.gated not in appr: continue
+                    if space.runtime is not None:
+                         if len(hardware) > 0 and space.runtime.stage == "RUNNING" and space.runtime.hardware not in hardware: continue
+                         if len(stage) > 0 and space.runtime.stage not in stage: continue
+                    self.add_item(space)
+            if sort == "downloads" and ("space" not in repo_types): self.sort("DLs")
+            elif sort == "downloads_all_time" and ("space" not in repo_types): self.sort("AllDLs")
+            elif sort == "likes": self.sort("Likes")
+            elif sort == "trending_score": self.sort("Trending")
+            else: self.sort("LastMod.")
+        except Exception as e:
+            raise Exception(f"Search error: {e}") from e
+    def search_ref_repos(self, repo_id: str, repo_types: str, sort: str, show_labels: list, limit=10):
+        try:
+            self.reset()
+            self.show_labels = show_labels.copy()
+            api = HfApi()
+            repos, types, counts = get_ref_repos(repo_id)
+            i = 0
+            for r, t in zip(repos, types):
+                if i + 1 > limit: break
+                i += 1
+                if t not in repo_types: continue
+                info = api.repo_info(repo_id=r, repo_type=t)
+                if info: self.add_item(info)
+            if sort == "downloads" and ("space" not in repo_types): self.sort("DLs")
+            elif sort == "downloads_all_time" and ("space" not in repo_types): self.sort("AllDLs")
+            elif sort == "likes": self.sort("Likes")
+            elif sort == "trending_score": self.sort("Trending")
+            else: self.sort("LastMod.")
+        except Exception as e:
+            raise Exception(f"Search error: {e}") from e
+    def get(self):
+        labels, label_types = self.labels.get()
+        self._do_filter()
+        dflist = [[item.get(l, self.labels.get_null_value(t)) for l, t in zip(labels, label_types)] for item, is_hide in zip(self.item_list, self.item_hide_flags) if not is_hide]
+        df = self._to_pandas(dflist, labels)
+        show_label_types = [t for l, t in zip(labels, label_types) if l not in self.hide_labels and l in self.show_labels]
+        show_labels = [l for l in labels if l not in self.hide_labels and l in self.show_labels]
+        return df, show_labels, show_label_types
+    def _to_pandas(self, dflist: list, labels: list):
+        # https://pandas.pydata.org/docs/reference/api/pandas.io.formats.style.Styler.apply.html
+        # https://stackoverflow.com/questions/41654949/pandas-style-function-to-highlight-specific-columns
+        # https://stackoverflow.com/questions/69832206/pandas-styling-with-conditional-rules
+        # https://stackoverflow.com/questions/41203959/conditionally-format-python-pandas-cell
+        # https://stackoverflow.com/questions/51187868/how-do-i-remove-and-re-sort-reindex-columns-after-applying-style-in-python-pan
+        # https://stackoverflow.com/questions/36921951/truth-value-of-a-series-is-ambiguous-use-a-empty-a-bool-a-item-a-any-o
+        def rank_df(sdf: pd.DataFrame, df: pd.DataFrame, col: str):
+            ranks = [(0.5, "gold"), (0.75, "orange"), (0.9, "orangered")]
+            for t, color in ranks:
+                sdf.loc[df[col] >= df[col].quantile(q=t), [col]] = f'color: {color}'
+            return sdf
+        def highlight_df(x: pd.DataFrame, df: pd.DataFrame):
+            sdf = pd.DataFrame("", index=x.copy().index, columns=x.copy().columns)
+            columns = df.columns
+            if "Trending" in columns: sdf = rank_df(sdf, df, "Trending")
+            if "Likes" in columns: sdf = rank_df(sdf, df, "Likes")
+            if "AllDLs" in columns: sdf = rank_df(sdf, df, "AllDLs")
+            if "DLs" in columns: sdf = rank_df(sdf, df, "DLs")
+            if "Status" in columns:
+                sdf.loc[df["Status"] == "warm", ["Type"]] = 'color: orange'
+                sdf.loc[df["Status"] == "cold", ["Type"]] = 'color: dodgerblue'
+            if "Gated" in columns:
+                sdf.loc[df["Gated"] == "auto", ["Gated"]] = 'color: dodgerblue'
+                sdf.loc[df["Gated"] == "manual", ["Gated"]] = 'color: crimson'
+            if "Stage" in columns and "Hardware" in columns:
+                sdf.loc[(df["Stage"] == "RUNNING") & (df["Hardware"] != "zero-a10g") & (df["Hardware"] != "cpu-basic") & (df["Hardware"] != "None") & (df["Hardware"]), ["Hardware", "Type"]] = 'color: lime'
+                sdf.loc[(df["Stage"] == "RUNNING") & (df["Hardware"] == "zero-a10g"), ["Hardware", "Type"]] = 'color: green'
+                sdf.loc[(df["Type"] == "space") & (df["Stage"] != "RUNNING")] = 'opacity: 0.5'
+                sdf.loc[(df["Type"] == "space") & (df["Stage"] != "RUNNING"), ["Type"]] = 'color: crimson'
+                sdf.loc[df["Stage"] == "RUNNING", ["Stage"]] = 'color: lime'
+            if "NFAA" in columns: sdf.loc[df["NFAA"] == "True", ["Type"]] = 'background-color: hotpink'
+            show_columns = x.copy().columns
+            style_columns = sdf.columns
+            drop_columns = [c for c in style_columns if c not in show_columns]
+            sdf = sdf.drop(drop_columns, axis=1)
+            return sdf
+        def id_to_md(df: pd.DataFrame):
+            if df["Type"] == "dataset": return f'[{df["ID"]}](https://hf.co/datasets/{df["ID"]})'
+            elif df["Type"] == "space": return f'[{df["ID"]}](https://hf.co/spaces/{df["ID"]})'
+            else: return f'[{df["ID"]}](https://hf.co/{df["ID"]})'
+        def format_md_df(df: pd.DataFrame):
+            df["ID"] = df.apply(id_to_md, axis=1)
+            return df
+        hide_labels = [l for l in labels if l in self.hide_labels or l not in self.show_labels]
+        df = format_md_df(pd.DataFrame(dflist, columns=labels))
+        ref_df = df.copy()
+        df = df.drop(hide_labels, axis=1).style.apply(highlight_df, axis=None, df=ref_df)
+        return df
+    def set_hide(self, hide_labels: list):
+        self.hide_labels = hide_labels.copy()
+    def set_filter(self, filter_item1: str, filter1: str):
+        if not filter_item1 and not filter1:
+            self.filter_items = None
+            self.filters = None
+        else:
+            self.filter_items = [filter_item1]
+            self.filters = [filter1]
+    def _do_filter(self):
+        if self.filters is None or self.filter_items is None:
+            self.item_hide_flags = [False] * len(self.item_list)
+            return
+        labels, label_types = self.labels.get()
+        types = dict(zip(labels, label_types))
+        flags = []
+        for item in self.item_list:
+            flag = False
+            for i, f in zip(self.filter_items, self.filters):
+                if i not in item.keys(): continue
+                t = types[i]
+                if item[i] == self.labels.get_null_value(t):
+                    flag = True
+                    break
+                if t in set(["str", "markdown"]):
+                    if f in item[i]: flag = False
+                    else:
+                        flag = True
+                        break
+            flags.append(flag)
+        self.item_hide_flags = flags
+    def sort(self, key="Likes"):
+        if len(self.item_list) == 0: raise Exception("No item found.")
+        if not key in self.labels.get()[0]: key = "Likes"
+        self.item_list, self.item_hide_flags, self.item_info_list = zip(*sorted(zip(self.item_list, self.item_hide_flags, self.item_info_list), key=lambda x: x[0][key], reverse=True))
+    def get_gr_df(self):
+        df, labels, label_types = self.get()
+        widths = self.labels.get_widths()
+        return gr.update(type="pandas", value=df, headers=labels, datatype=label_types, column_widths=widths, wrap=True)
+    def get_gr_hide_labels(self):
+        return gr.update(choices=self.labels.get()[0], value=[], visible=True)
+    def get_gr_filter_item(self, filter_item: str=""):
+        labels, label_types = self.labels.get()
+        choices = [s for s, t in zip(labels, label_types) if t in set(["str", "markdown"])]
+        if len(choices) == 0: choices = [""]
+        return gr.update(choices=choices, value=filter_item if filter_item else choices[0], visible=True)
+    def get_gr_filter(self, filter_item: str=""):
+        labels = self.labels.get()[0]
+        if not filter_item or filter_item not in set(labels): return gr.update(choices=[""], value="", visible=True)
+        d = {}
+        for item in self.item_list:
+            if filter_item not in item.keys(): continue
+            v = item[filter_item]
+            if v in d.keys(): d[v] += 1
+            else: d[v] = 1
+        return gr.update(choices=[""] + [t[0] for t in sorted(d.items(), key=lambda x : x[1])][:100], value="", visible=True)
+def search(repo_types: list, sort: str, sort_method: str, filter_str: str, search_str: str, author: str, tags: str, infer: str,
+           gated: str, appr: list[str], size_categories: list, limit: int, hardware: list, stage: list, fetch_detail: list, show_labels: list, r: HFSearchResult):
+    try:
+        r.search(repo_types, sort, sort_method, filter_str, search_str, author, tags, infer, gated, appr, size_categories,
+                 limit, hardware, stage, fetch_detail, show_labels)
+        return r.get_gr_df(), r.get_gr_hide_labels(), r
+    except Exception as e:
+        raise gr.Error(e)
+def search_ref_repos(repo_id: str, repo_types: list, sort: str, show_labels: list, limit, r: HFSearchResult):
+    try:
+        if not repo_id: raise gr.Error("Input Repo ID")
+        r.search_ref_repos(repo_id, repo_types, sort, show_labels, limit)
+        return r.get_gr_df(), r.get_gr_hide_labels(), r
+    except Exception as e:
+        raise gr.Error(e)
+def update_df(hide_labels: list, filter_item1: str, filter1: str, r: HFSearchResult):
+    r.set_hide(hide_labels)
+    r.set_filter(filter_item1, filter1)
+    return r.get_gr_df(), r
+def update_filter(filter_item1: str, r: HFSearchResult):
+    return r.get_gr_filter_item(filter_item1), r.get_gr_filter(filter_item1), gr.update(visible=True), r

pre-requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip>=24.1

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ huggingface_hub
2	+ gradio_huggingfacehub_search

subtags.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tags.json ADDED Viewed

The diff for this file is too large to render. See raw diff