Spaces:

Aye10032
/

top5_error_rate

Build error

App Files Files Community

Aye10032 commited on Apr 11, 2025

Commit

ab21cd8

1 Parent(s): 5453f99

update

Browse files

Files changed (6) hide show

README.md +15 -0
app.py +47 -3
pyproject.toml +17 -0
requirements.txt +0 -0
top5_error_rate.py +29 -18
uv.lock +21 -1

README.md CHANGED Viewed

@@ -23,3 +23,18 @@ Top-5 Error Rate = (Number of incorrect top-5 predictions) / (Total number of ca
  Where:
 - Top-5 Accuracy: The proportion of cases where the true label is among the model's top 5 predicted classes.
 - Incorrect top-5 prediction: The true label is not in the top 5 predicted classes (ranked by probability).

  Where:
 - Top-5 Accuracy: The proportion of cases where the true label is among the model's top 5 predicted classes.
 - Incorrect top-5 prediction: The true label is not in the top 5 predicted classes (ranked by probability).
+## How to Use
+At minimum, this metric requires predictions and references as inputs.
+```python
+accuracy_metric = evaluate.load("Aye10032/top5_error_rate")
+results = accuracy_metric.compute(references=[[0, 1, 2, 3, 4]], predictions=[0])
+print(results)
+```
+output is
+```
+{'top5_error_rate': 0.0}
+```

app.py CHANGED Viewed

@@ -1,6 +1,50 @@
 import evaluate
-from evaluate.utils import launch_gradio_widget
-module = evaluate.load("Aye10032/top5_error_rate")
-launch_gradio_widget(module)

+import sys
+from pathlib import Path
 import evaluate
+import gradio as gr
+import polars as pl
+from evaluate import parse_readme
+metric = evaluate.load("Aye10032/top5_error_rate")
+def compute(data):
+    print(data)
+    # return metric.compute()
+    result = {
+        "predictions": [list(map(int, pred.split(","))) for pred in data["predictions"]],
+        "references": data["references"].cast(pl.Int64).to_list()
+    }
+    print(result)
+    return metric.compute(**result)
+local_path = Path(sys.path[0])
+default_value = pl.DataFrame({
+    'predictions': ['1,2,3,4,5', '1,2,3,4,5', '1,2,3,4,5'],
+    'references': ['0', '1', '2']
+})
+iface = gr.Interface(
+    fn=compute,
+    inputs=gr.Dataframe(
+        headers=['predictions', 'references'],
+        col_count=2,
+        row_count=1,
+        datatype='str',
+        type='polars',
+        value=default_value
+    ),
+    outputs=gr.Textbox(label=metric.name),
+    description=(
+            metric.info.description
+            + "\nIf this is a text-based metric, make sure to wrap you input in double quotes."
+              " Alternatively you can use a JSON-formatted list as input."
+    ),
+    title=f"Metric: {metric.name}",
+    article=parse_readme(local_path / "README.md"),
+)
+iface.launch()

pyproject.toml CHANGED Viewed

@@ -6,4 +6,21 @@ readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
     "evaluate[template]>=0.4.3",
 ]

 requires-python = ">=3.13"
 dependencies = [
     "evaluate[template]>=0.4.3",
+    "gradio>=5.24.0",
+    "polars>=1.27.1",
 ]
+[tool.ruff]
+# Allow lines to be as long as 120.
+line-length = 100
+extend-exclude = ["log", "data"]
+[tool.ruff.format]
+# 使用单引号
+quote-style = "single"
+# 启用docstring代码片段格式化
+docstring-code-format = true
+[tool.ruff.lint]
+# On top of the default `select` (`E4`, E7`, `E9`, and `F`), enable flake8-bugbear (`B`) and flake8-quotes (`Q`).
+extend-select = ["I"]

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ

top5_error_rate.py CHANGED Viewed

@@ -2,6 +2,7 @@ from typing import Dict, Any
 import datasets
 import evaluate
 from evaluate.utils.file_utils import add_start_docstrings
 _DESCRIPTION = """
@@ -14,21 +15,22 @@ Top-5 Error Rate = (Number of incorrect top-5 predictions) / (Total number of ca
 - Incorrect top-5 prediction: The true label is not in the top 5 predicted classes (ranked by probability).
 """
 _KWARGS_DESCRIPTION = """
 Args:
-    predictions (`list` of `list` of `int`): Predicted labels.
     references (`list` of `int`): Ground truth labels.
 Returns:
-    accuracy (`float` or `int`): Accuracy score. Minimum possible value is 0. Maximum possible value is 1.0, or the number of examples input.
 Examples:
-    >>> accuracy_metric = evaluate.load("accuracy")
-    >>> results = accuracy_metric.compute(references=[0, 1, 2, 0, 1, 2], predictions=[0, 1, 1, 2, 1, 0])
     >>> print(results)
-    {'accuracy': 0.5}
 """
 _CITATION = """
 """
@@ -42,7 +44,7 @@ class Top5ErrorRate(evaluate.Metric):
             inputs_description=_KWARGS_DESCRIPTION,
             features=datasets.Features(
                 {
-                    "predictions": datasets.Sequence(list[datasets.Value("int32")]),
                     "references": datasets.Sequence(datasets.Value("int32")),
                 }
                 if self.config_name == "multilabel"
@@ -55,17 +57,26 @@ class Top5ErrorRate(evaluate.Metric):
         )
     def _compute(
-        self,
-        *,
-        predictions: list[list[int]] = None,
-        references: list[int] = None,
-        **kwargs,
     ) -> Dict[str, Any]:
-        total = len(references)
-        correct = sum(1 for pred, ref in zip(predictions, references) if ref in pred)
-        error_rate = 1.0 - (correct / total)
         return {
-            "top5_error_rate": float(error_rate)
-        }

 import datasets
 import evaluate
+import numpy as np
 from evaluate.utils.file_utils import add_start_docstrings
 _DESCRIPTION = """
 - Incorrect top-5 prediction: The true label is not in the top 5 predicted classes (ranked by probability).
 """
 _KWARGS_DESCRIPTION = """
 Args:
+    predictions (`list` of `list` of `int`): Predicted labels. Each inner list should contain the top-5 predicted class indices.
     references (`list` of `int`): Ground truth labels.
 Returns:
+    top5_error_rate (`float`): Top-5 Error Rate score. Minimum possible value is 0. Maximum possible value is 1.0.
 Examples:
+    >>> metric = evaluate.load("top5_error_rate")
+    >>> results = metric.compute(
+    ...     references=[0, 1, 2],
+    ...     predictions=[[0, 1, 2, 3, 4], [1, 0, 2, 3, 4], [2, 0, 1, 3, 4]]
+    ... )
     >>> print(results)
+    {'top5_error_rate': 0.0}
 """
 _CITATION = """
 """
             inputs_description=_KWARGS_DESCRIPTION,
             features=datasets.Features(
                 {
+                    "predictions": datasets.Sequence(list[datasets.Value("float")]),
                     "references": datasets.Sequence(datasets.Value("int32")),
                 }
                 if self.config_name == "multilabel"
         )
     def _compute(
+            self,
+            *,
+            predictions: list[list[float]] = None,
+            references: list[int] = None,
+            **kwargs,
     ) -> Dict[str, Any]:
+        # to numpy array
+        outputs = np.array(predictions)
+        labels = np.array(references)
+        # Top-1 ACC
+        pred = outputs.argmax(axis=1)
+        acc = (pred == labels).mean()
+        # Top-5 Error Rate
+        top5_indices = outputs.argsort(axis=1)[:, -5:]
+        correct = (labels.reshape(-1, 1) == top5_indices).any(axis=1)
+        top5_error_rate = 1 - correct.mean()
         return {
+            "accuracy": acc,
+            "top5_error_rate": top5_error_rate
+        }

uv.lock CHANGED Viewed

@@ -736,6 +736,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cf/6c/41c21c6c8af92b9fea313aa47c75de49e2f9a467964ee33eb0135d47eb64/pillow-11.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:67cd427c68926108778a9005f2a04adbd5e67c442ed21d95389fe1d595458756", size = 2377651 },
 ]
 [[package]]
 name = "propcache"
 version = "0.3.1"
@@ -1055,10 +1069,16 @@ version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
     { name = "evaluate", extra = ["template"] },
 ]
 [package.metadata]
-requires-dist = [{ name = "evaluate", extras = ["template"], specifier = ">=0.4.3" }]
 [[package]]
 name = "tqdm"

     { url = "https://files.pythonhosted.org/packages/cf/6c/41c21c6c8af92b9fea313aa47c75de49e2f9a467964ee33eb0135d47eb64/pillow-11.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:67cd427c68926108778a9005f2a04adbd5e67c442ed21d95389fe1d595458756", size = 2377651 },
 ]
+[[package]]
+name = "polars"
+version = "1.27.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e1/96/56ab877d3d690bd8e67f5c6aabfd3aa8bc7c33ee901767905f564a6ade36/polars-1.27.1.tar.gz", hash = "sha256:94fcb0216b56cd0594aa777db1760a41ad0dfffed90d2ca446cf9294d2e97f02", size = 4555382 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/f4/be965ca4e1372805d0d2313bb4ed8eae88804fc3bfeb6cb0a07c53191bdb/polars-1.27.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:ba7ad4f8046d00dd97c1369e46a4b7e00ffcff5d38c0f847ee4b9b1bb182fb18", size = 34756840 },
+    { url = "https://files.pythonhosted.org/packages/c0/1a/ae019d323e83c6e8a9b4323f3fea94e047715847dfa4c4cbaf20a6f8444e/polars-1.27.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:339e3948748ad6fa7a42e613c3fb165b497ed797e93fce1aa2cddf00fbc16cac", size = 31616000 },
+    { url = "https://files.pythonhosted.org/packages/20/c1/c65924c0ca186f481c02b531f1ec66c34f9bbecc11d70246562bb4949876/polars-1.27.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f801e0d9da198eb97cfb4e8af4242b8396878ff67b655c71570b7e333102b72b", size = 35388976 },
+    { url = "https://files.pythonhosted.org/packages/88/c2/37720b8794935f1e77bde439564fa421a41f5fed8111aeb7b9ca0ebafb2d/polars-1.27.1-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:4d18a29c65222451818b63cd397b2e95c20412ea0065d735a20a4a79a7b26e8a", size = 32586083 },
+    { url = "https://files.pythonhosted.org/packages/41/3d/1bb108eb278c1eafb303f78c515fb71c9828944eba3fb5c0ac432b9fad28/polars-1.27.1-cp39-abi3-win_amd64.whl", hash = "sha256:a4f832cf478b282d97f8bf86eeae2df66fa1384de1c49bc61f7224a10cc6a5df", size = 35602500 },
+    { url = "https://files.pythonhosted.org/packages/0f/5c/cc23daf0a228d6fadbbfc8a8c5165be33157abe5b9d72af3e127e0542857/polars-1.27.1-cp39-abi3-win_arm64.whl", hash = "sha256:4f238ee2e3c5660345cb62c0f731bbd6768362db96c058098359ecffa42c3c6c", size = 31891470 },
+]
 [[package]]
 name = "propcache"
 version = "0.3.1"
 source = { virtual = "." }
 dependencies = [
     { name = "evaluate", extra = ["template"] },
+    { name = "gradio" },
+    { name = "polars" },
 ]
 [package.metadata]
+requires-dist = [
+    { name = "evaluate", extras = ["template"], specifier = ">=0.4.3" },
+    { name = "gradio", specifier = ">=5.24.0" },
+    { name = "polars", specifier = ">=1.27.1" },
+]
 [[package]]
 name = "tqdm"