End of training
Browse files- README.md +5 -4
- all_results.json +24 -24
- eval_results.json +10 -10
- predict_results.json +8 -8
- predictions.txt +0 -0
- tb/events.out.tfevents.1725898709.0a1c9bec2a53.65267.1 +3 -0
- train.log +48 -0
- train_results.json +7 -7
- trainer_state.json +163 -156
README.md
CHANGED
|
@@ -2,9 +2,10 @@
|
|
| 2 |
library_name: transformers
|
| 3 |
base_model: IVN-RIN/bioBIT
|
| 4 |
tags:
|
|
|
|
| 5 |
- generated_from_trainer
|
| 6 |
datasets:
|
| 7 |
-
- drugtemist-it-fasttext-75-ner
|
| 8 |
metrics:
|
| 9 |
- precision
|
| 10 |
- recall
|
|
@@ -17,8 +18,8 @@ model-index:
|
|
| 17 |
name: Token Classification
|
| 18 |
type: token-classification
|
| 19 |
dataset:
|
| 20 |
-
name: drugtemist-it-fasttext-75-ner
|
| 21 |
-
type: drugtemist-it-fasttext-75-ner
|
| 22 |
config: DrugTEMIST Italian NER
|
| 23 |
split: validation
|
| 24 |
args: DrugTEMIST Italian NER
|
|
@@ -42,7 +43,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 42 |
|
| 43 |
# output
|
| 44 |
|
| 45 |
-
This model is a fine-tuned version of [IVN-RIN/bioBIT](https://huggingface.co/IVN-RIN/bioBIT) on the drugtemist-it-fasttext-75-ner dataset.
|
| 46 |
It achieves the following results on the evaluation set:
|
| 47 |
- Loss: 0.0094
|
| 48 |
- Precision: 0.9169
|
|
|
|
| 2 |
library_name: transformers
|
| 3 |
base_model: IVN-RIN/bioBIT
|
| 4 |
tags:
|
| 5 |
+
- token-classification
|
| 6 |
- generated_from_trainer
|
| 7 |
datasets:
|
| 8 |
+
- Rodrigo1771/drugtemist-it-fasttext-75-ner
|
| 9 |
metrics:
|
| 10 |
- precision
|
| 11 |
- recall
|
|
|
|
| 18 |
name: Token Classification
|
| 19 |
type: token-classification
|
| 20 |
dataset:
|
| 21 |
+
name: Rodrigo1771/drugtemist-it-fasttext-75-ner
|
| 22 |
+
type: Rodrigo1771/drugtemist-it-fasttext-75-ner
|
| 23 |
config: DrugTEMIST Italian NER
|
| 24 |
split: validation
|
| 25 |
args: DrugTEMIST Italian NER
|
|
|
|
| 43 |
|
| 44 |
# output
|
| 45 |
|
| 46 |
+
This model is a fine-tuned version of [IVN-RIN/bioBIT](https://huggingface.co/IVN-RIN/bioBIT) on the Rodrigo1771/drugtemist-it-fasttext-75-ner dataset.
|
| 47 |
It achieves the following results on the evaluation set:
|
| 48 |
- Loss: 0.0094
|
| 49 |
- Precision: 0.9169
|
all_results.json
CHANGED
|
@@ -1,26 +1,26 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch": 9.
|
| 3 |
-
"eval_accuracy": 0.
|
| 4 |
-
"eval_f1": 0.
|
| 5 |
-
"eval_loss": 0.
|
| 6 |
-
"eval_precision": 0.
|
| 7 |
-
"eval_recall": 0.
|
| 8 |
-
"eval_runtime":
|
| 9 |
-
"eval_samples":
|
| 10 |
-
"eval_samples_per_second":
|
| 11 |
-
"eval_steps_per_second":
|
| 12 |
-
"predict_accuracy": 0.
|
| 13 |
-
"predict_f1": 0.
|
| 14 |
-
"predict_loss": 0.
|
| 15 |
-
"predict_precision": 0.
|
| 16 |
-
"predict_recall": 0.
|
| 17 |
-
"predict_runtime":
|
| 18 |
-
"predict_samples_per_second":
|
| 19 |
-
"predict_steps_per_second":
|
| 20 |
-
"total_flos": 1.
|
| 21 |
-
"train_loss": 0.
|
| 22 |
-
"train_runtime":
|
| 23 |
-
"train_samples":
|
| 24 |
-
"train_samples_per_second":
|
| 25 |
-
"train_steps_per_second":
|
| 26 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 9.989701338825952,
|
| 3 |
+
"eval_accuracy": 0.9986302259153467,
|
| 4 |
+
"eval_f1": 0.9230769230769231,
|
| 5 |
+
"eval_loss": 0.009366312995553017,
|
| 6 |
+
"eval_precision": 0.9169054441260746,
|
| 7 |
+
"eval_recall": 0.9293320425943853,
|
| 8 |
+
"eval_runtime": 17.51,
|
| 9 |
+
"eval_samples": 6798,
|
| 10 |
+
"eval_samples_per_second": 388.236,
|
| 11 |
+
"eval_steps_per_second": 48.544,
|
| 12 |
+
"predict_accuracy": 0.998192886032094,
|
| 13 |
+
"predict_f1": 0.8964950711938664,
|
| 14 |
+
"predict_loss": 0.012696487829089165,
|
| 15 |
+
"predict_precision": 0.8772775991425509,
|
| 16 |
+
"predict_recall": 0.9165733482642777,
|
| 17 |
+
"predict_runtime": 33.039,
|
| 18 |
+
"predict_samples_per_second": 442.053,
|
| 19 |
+
"predict_steps_per_second": 55.268,
|
| 20 |
+
"total_flos": 1.7736053837017554e+16,
|
| 21 |
+
"train_loss": 0.002068036902580679,
|
| 22 |
+
"train_runtime": 2701.3395,
|
| 23 |
+
"train_samples": 31053,
|
| 24 |
+
"train_samples_per_second": 114.954,
|
| 25 |
+
"train_steps_per_second": 1.795
|
| 26 |
}
|
eval_results.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch": 9.
|
| 3 |
-
"eval_accuracy": 0.
|
| 4 |
-
"eval_f1": 0.
|
| 5 |
-
"eval_loss": 0.
|
| 6 |
-
"eval_precision": 0.
|
| 7 |
-
"eval_recall": 0.
|
| 8 |
-
"eval_runtime":
|
| 9 |
-
"eval_samples":
|
| 10 |
-
"eval_samples_per_second":
|
| 11 |
-
"eval_steps_per_second":
|
| 12 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 9.989701338825952,
|
| 3 |
+
"eval_accuracy": 0.9986302259153467,
|
| 4 |
+
"eval_f1": 0.9230769230769231,
|
| 5 |
+
"eval_loss": 0.009366312995553017,
|
| 6 |
+
"eval_precision": 0.9169054441260746,
|
| 7 |
+
"eval_recall": 0.9293320425943853,
|
| 8 |
+
"eval_runtime": 17.51,
|
| 9 |
+
"eval_samples": 6798,
|
| 10 |
+
"eval_samples_per_second": 388.236,
|
| 11 |
+
"eval_steps_per_second": 48.544
|
| 12 |
}
|
predict_results.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"predict_accuracy": 0.
|
| 3 |
-
"predict_f1": 0.
|
| 4 |
-
"predict_loss": 0.
|
| 5 |
-
"predict_precision": 0.
|
| 6 |
-
"predict_recall": 0.
|
| 7 |
-
"predict_runtime":
|
| 8 |
-
"predict_samples_per_second":
|
| 9 |
-
"predict_steps_per_second":
|
| 10 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"predict_accuracy": 0.998192886032094,
|
| 3 |
+
"predict_f1": 0.8964950711938664,
|
| 4 |
+
"predict_loss": 0.012696487829089165,
|
| 5 |
+
"predict_precision": 0.8772775991425509,
|
| 6 |
+
"predict_recall": 0.9165733482642777,
|
| 7 |
+
"predict_runtime": 33.039,
|
| 8 |
+
"predict_samples_per_second": 442.053,
|
| 9 |
+
"predict_steps_per_second": 55.268
|
| 10 |
}
|
predictions.txt
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tb/events.out.tfevents.1725898709.0a1c9bec2a53.65267.1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64471db1189ffc9cdb3c758e8bad5d7fe51fb1f89b23f34814e661acc0324845
|
| 3 |
+
size 560
|
train.log
CHANGED
|
@@ -1588,3 +1588,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
|
|
| 1588 |
{'eval_loss': 0.009366312995553017, 'eval_precision': 0.9169054441260746, 'eval_recall': 0.9293320425943853, 'eval_f1': 0.9230769230769231, 'eval_accuracy': 0.9986302259153467, 'eval_runtime': 17.4762, 'eval_samples_per_second': 388.987, 'eval_steps_per_second': 48.638, 'epoch': 9.99}
|
| 1589 |
{'train_runtime': 2701.3395, 'train_samples_per_second': 114.954, 'train_steps_per_second': 1.795, 'train_loss': 0.002068036902580679, 'epoch': 9.99}
|
| 1590 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1591 |
0%| | 0/850 [00:00<?, ?it/s]
|
| 1592 |
1%| | 9/850 [00:00<00:09, 84.85it/s]
|
| 1593 |
2%|▏ | 18/850 [00:00<00:12, 69.16it/s]
|
| 1594 |
3%|▎ | 26/850 [00:00<00:12, 67.03it/s]
|
| 1595 |
4%|▍ | 34/850 [00:00<00:11, 69.34it/s]
|
| 1596 |
5%|▍ | 42/850 [00:00<00:12, 66.38it/s]
|
| 1597 |
6%|▌ | 49/850 [00:00<00:14, 55.56it/s]
|
| 1598 |
7%|▋ | 57/850 [00:00<00:13, 60.68it/s]
|
| 1599 |
8%|▊ | 64/850 [00:01<00:13, 59.50it/s]
|
| 1600 |
8%|▊ | 71/850 [00:01<00:14, 53.38it/s]
|
| 1601 |
9%|▉ | 77/850 [00:01<00:14, 54.52it/s]
|
| 1602 |
10%|▉ | 83/850 [00:01<00:14, 54.60it/s]
|
| 1603 |
11%|█ | 91/850 [00:01<00:12, 59.33it/s]
|
| 1604 |
12%|█▏ | 98/850 [00:01<00:12, 60.45it/s]
|
| 1605 |
12%|█▏ | 106/850 [00:01<00:11, 64.35it/s]
|
| 1606 |
13%|█▎ | 113/850 [00:01<00:11, 63.96it/s]
|
| 1607 |
14%|█▍ | 120/850 [00:01<00:11, 64.78it/s]
|
| 1608 |
15%|█▍ | 127/850 [00:02<00:11, 65.12it/s]
|
| 1609 |
16%|█▌ | 134/850 [00:02<00:11, 61.87it/s]
|
| 1610 |
17%|█▋ | 142/850 [00:02<00:11, 64.23it/s]
|
| 1611 |
18%|█▊ | 149/850 [00:02<00:11, 60.11it/s]
|
| 1612 |
18%|█▊ | 156/850 [00:02<00:11, 60.66it/s]
|
| 1613 |
19%|█▉ | 163/850 [00:02<00:10, 62.78it/s]
|
| 1614 |
20%|██ | 170/850 [00:02<00:10, 63.49it/s]
|
| 1615 |
21%|██ | 177/850 [00:02<00:10, 65.22it/s]
|
| 1616 |
22%|██▏ | 184/850 [00:02<00:10, 62.35it/s]
|
| 1617 |
22%|██▏ | 191/850 [00:03<00:10, 60.53it/s]
|
| 1618 |
23%|██▎ | 198/850 [00:03<00:10, 62.55it/s]
|
| 1619 |
24%|██▍ | 205/850 [00:03<00:10, 61.27it/s]
|
| 1620 |
25%|██▍ | 212/850 [00:03<00:10, 61.68it/s]
|
| 1621 |
26%|██▌ | 219/850 [00:03<00:10, 60.02it/s]
|
| 1622 |
27%|██▋ | 227/850 [00:03<00:09, 63.15it/s]
|
| 1623 |
28%|██▊ | 234/850 [00:03<00:09, 64.86it/s]
|
| 1624 |
28%|██▊ | 241/850 [00:03<00:09, 63.81it/s]
|
| 1625 |
29%|██▉ | 248/850 [00:04<00:09, 61.38it/s]
|
| 1626 |
30%|███ | 256/850 [00:04<00:09, 65.97it/s]
|
| 1627 |
31%|███ | 263/850 [00:04<00:09, 65.18it/s]
|
| 1628 |
32%|███▏ | 271/850 [00:04<00:08, 66.94it/s]
|
| 1629 |
33%|███▎ | 279/850 [00:04<00:08, 68.88it/s]
|
| 1630 |
34%|███▍ | 287/850 [00:04<00:08, 69.84it/s]
|
| 1631 |
35%|███▍ | 295/850 [00:04<00:08, 69.34it/s]
|
| 1632 |
36%|███▌ | 302/850 [00:04<00:08, 66.76it/s]
|
| 1633 |
36%|███▋ | 309/850 [00:04<00:08, 67.41it/s]
|
| 1634 |
37%|███▋ | 316/850 [00:04<00:07, 67.96it/s]
|
| 1635 |
38%|███▊ | 323/850 [00:05<00:07, 68.48it/s]
|
| 1636 |
39%|███▉ | 330/850 [00:05<00:07, 67.10it/s]
|
| 1637 |
40%|███▉ | 337/850 [00:05<00:07, 66.09it/s]
|
| 1638 |
40%|████ | 344/850 [00:05<00:07, 66.02it/s]
|
| 1639 |
41%|████▏ | 351/850 [00:05<00:07, 66.82it/s]
|
| 1640 |
42%|████▏ | 358/850 [00:05<00:07, 64.35it/s]
|
| 1641 |
43%|████▎ | 365/850 [00:05<00:07, 63.66it/s]
|
| 1642 |
44%|████▍ | 372/850 [00:05<00:07, 65.24it/s]
|
| 1643 |
45%|████▍ | 379/850 [00:05<00:07, 59.31it/s]
|
| 1644 |
45%|████▌ | 386/850 [00:06<00:07, 59.44it/s]
|
| 1645 |
46%|████▌ | 393/850 [00:06<00:07, 58.18it/s]
|
| 1646 |
47%|████▋ | 399/850 [00:06<00:07, 56.50it/s]
|
| 1647 |
48%|████▊ | 407/850 [00:06<00:07, 62.14it/s]
|
| 1648 |
49%|████▉ | 415/850 [00:06<00:06, 66.41it/s]
|
| 1649 |
50%|████▉ | 423/850 [00:06<00:06, 68.67it/s]
|
| 1650 |
51%|█████ | 431/850 [00:06<00:05, 70.17it/s]
|
| 1651 |
52%|█████▏ | 439/850 [00:06<00:06, 66.16it/s]
|
| 1652 |
52%|█████▏ | 446/850 [00:07<00:06, 63.58it/s]
|
| 1653 |
53%|█████▎ | 453/850 [00:07<00:06, 61.74it/s]
|
| 1654 |
54%|█████▍ | 461/850 [00:07<00:05, 64.99it/s]
|
| 1655 |
55%|█████▌ | 469/850 [00:07<00:05, 67.95it/s]
|
| 1656 |
56%|█████▌ | 477/850 [00:07<00:05, 69.70it/s]
|
| 1657 |
57%|█████▋ | 485/850 [00:07<00:05, 66.98it/s]
|
| 1658 |
58%|█████▊ | 492/850 [00:07<00:05, 65.57it/s]
|
| 1659 |
59%|█████▉ | 500/850 [00:07<00:05, 67.64it/s]
|
| 1660 |
60%|█████▉ | 508/850 [00:07<00:04, 69.00it/s]
|
| 1661 |
61%|██████ | 515/850 [00:08<00:04, 67.70it/s]
|
| 1662 |
61%|██████▏ | 522/850 [00:08<00:04, 68.14it/s]
|
| 1663 |
62%|██████▏ | 529/850 [00:08<00:05, 63.43it/s]
|
| 1664 |
63%|██████▎ | 536/850 [00:08<00:05, 62.14it/s]
|
| 1665 |
64%|██████▍ | 543/850 [00:08<00:05, 60.12it/s]
|
| 1666 |
65%|██████▍ | 550/850 [00:08<00:05, 58.62it/s]
|
| 1667 |
66%|██████▌ | 557/850 [00:08<00:04, 59.23it/s]
|
| 1668 |
66%|██████▌ | 563/850 [00:08<00:05, 55.59it/s]
|
| 1669 |
67%|██████▋ | 570/850 [00:08<00:04, 57.72it/s]
|
| 1670 |
68%|██████▊ | 577/850 [00:09<00:04, 57.68it/s]
|
| 1671 |
69%|██████▊ | 584/850 [00:09<00:04, 59.76it/s]
|
| 1672 |
70%|██████▉ | 591/850 [00:09<00:04, 60.25it/s]
|
| 1673 |
70%|███████ | 598/850 [00:09<00:04, 60.81it/s]
|
| 1674 |
71%|███████ | 605/850 [00:09<00:04, 60.53it/s]
|
| 1675 |
72%|███████▏ | 612/850 [00:09<00:03, 62.53it/s]
|
| 1676 |
73%|███████▎ | 619/850 [00:09<00:03, 64.26it/s]
|
| 1677 |
74%|███████▎ | 626/850 [00:09<00:03, 65.14it/s]
|
| 1678 |
74%|███████▍ | 633/850 [00:09<00:03, 63.37it/s]
|
| 1679 |
75%|███████▌ | 640/850 [00:10<00:03, 64.52it/s]
|
| 1680 |
76%|███████▌ | 648/850 [00:10<00:02, 67.99it/s]
|
| 1681 |
77%|███████▋ | 655/850 [00:10<00:02, 68.40it/s]
|
| 1682 |
78%|███████▊ | 662/850 [00:10<00:02, 68.11it/s]
|
| 1683 |
79%|███████▊ | 669/850 [00:10<00:02, 68.30it/s]
|
| 1684 |
80%|███████▉ | 676/850 [00:10<00:02, 64.43it/s]
|
| 1685 |
80%|████████ | 683/850 [00:10<00:02, 64.38it/s]
|
| 1686 |
81%|████████ | 690/850 [00:10<00:02, 63.19it/s]
|
| 1687 |
82%|████████▏ | 697/850 [00:10<00:02, 63.47it/s]
|
| 1688 |
83%|████████▎ | 704/850 [00:11<00:02, 62.55it/s]
|
| 1689 |
84%|████████▎ | 711/850 [00:11<00:02, 59.59it/s]
|
| 1690 |
84%|████████▍ | 717/850 [00:11<00:02, 58.91it/s]
|
| 1691 |
85%|████████▌ | 724/850 [00:11<00:02, 59.52it/s]
|
| 1692 |
86%|████████▌ | 730/850 [00:11<00:02, 57.96it/s]
|
| 1693 |
87%|████████▋ | 737/850 [00:11<00:01, 60.64it/s]
|
| 1694 |
88%|████████▊ | 744/850 [00:11<00:01, 57.91it/s]
|
| 1695 |
88%|████████▊ | 750/850 [00:11<00:01, 55.74it/s]
|
| 1696 |
89%|████████▉ | 757/850 [00:12<00:01, 59.42it/s]
|
| 1697 |
90%|████████▉ | 764/850 [00:12<00:01, 61.63it/s]
|
| 1698 |
91%|█████████ | 771/850 [00:12<00:01, 59.98it/s]
|
| 1699 |
92%|█████████▏| 778/850 [00:12<00:01, 59.34it/s]
|
| 1700 |
92%|█████████▏| 784/850 [00:12<00:01, 56.57it/s]
|
| 1701 |
93%|█████████▎| 790/850 [00:12<00:01, 54.23it/s]
|
| 1702 |
94%|█████████▍| 797/850 [00:12<00:00, 58.26it/s]
|
| 1703 |
94%|█████████▍| 803/850 [00:12<00:00, 58.70it/s]
|
| 1704 |
95%|█████████▌| 809/850 [00:12<00:00, 58.22it/s]
|
| 1705 |
96%|█████████▌| 815/850 [00:13<00:00, 57.34it/s]
|
| 1706 |
97%|█████████▋| 823/850 [00:13<00:00, 62.38it/s]
|
| 1707 |
98%|█████████▊| 830/850 [00:13<00:00, 60.31it/s]
|
| 1708 |
98%|█████████▊| 837/850 [00:13<00:00, 61.46it/s]
|
| 1709 |
99%|█████████▉| 844/850 [00:13<00:00, 62.52it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1710 |
0%| | 0/1826 [00:00<?, ?it/s]
|
| 1711 |
1%| | 10/1826 [00:00<00:19, 94.11it/s]
|
| 1712 |
1%| | 20/1826 [00:00<00:26, 68.33it/s]
|
| 1713 |
2%|▏ | 28/1826 [00:00<00:26, 67.41it/s]
|
| 1714 |
2%|▏ | 35/1826 [00:00<00:26, 67.50it/s]
|
| 1715 |
2%|▏ | 43/1826 [00:00<00:25, 70.74it/s]
|
| 1716 |
3%|▎ | 51/1826 [00:00<00:24, 72.57it/s]
|
| 1717 |
3%|▎ | 59/1826 [00:00<00:26, 67.19it/s]
|
| 1718 |
4%|▎ | 66/1826 [00:00<00:28, 61.91it/s]
|
| 1719 |
4%|▍ | 74/1826 [00:01<00:27, 64.76it/s]
|
| 1720 |
4%|▍ | 82/1826 [00:01<00:25, 67.85it/s]
|
| 1721 |
5%|▍ | 89/1826 [00:01<00:25, 66.96it/s]
|
| 1722 |
5%|▌ | 98/1826 [00:01<00:23, 72.93it/s]
|
| 1723 |
6%|▌ | 107/1826 [00:01<00:22, 76.04it/s]
|
| 1724 |
6%|▋ | 115/1826 [00:01<00:22, 74.77it/s]
|
| 1725 |
7%|▋ | 123/1826 [00:01<00:25, 67.64it/s]
|
| 1726 |
7%|▋ | 130/1826 [00:01<00:25, 65.55it/s]
|
| 1727 |
8%|▊ | 138/1826 [00:02<00:25, 66.69it/s]
|
| 1728 |
8%|▊ | 146/1826 [00:02<00:24, 69.01it/s]
|
| 1729 |
8%|▊ | 153/1826 [00:02<00:24, 69.21it/s]
|
| 1730 |
9%|▉ | 160/1826 [00:02<00:27, 60.78it/s]
|
| 1731 |
9%|▉ | 167/1826 [00:02<00:26, 61.52it/s]
|
| 1732 |
10%|▉ | 176/1826 [00:02<00:24, 68.18it/s]
|
| 1733 |
10%|█ | 184/1826 [00:02<00:23, 69.66it/s]
|
| 1734 |
11%|█ | 192/1826 [00:02<00:23, 69.46it/s]
|
| 1735 |
11%|█ | 200/1826 [00:02<00:24, 67.41it/s]
|
| 1736 |
11%|█▏ | 207/1826 [00:03<00:24, 66.24it/s]
|
| 1737 |
12%|█▏ | 214/1826 [00:03<00:24, 66.73it/s]
|
| 1738 |
12%|█▏ | 221/1826 [00:03<00:23, 67.00it/s]
|
| 1739 |
12%|█▏ | 228/1826 [00:03<00:25, 62.86it/s]
|
| 1740 |
13%|█▎ | 235/1826 [00:03<00:26, 59.84it/s]
|
| 1741 |
13%|█▎ | 242/1826 [00:03<00:25, 61.91it/s]
|
| 1742 |
14%|█▎ | 249/1826 [00:03<00:25, 61.34it/s]
|
| 1743 |
14%|█▍ | 256/1826 [00:03<00:25, 62.00it/s]
|
| 1744 |
14%|█▍ | 263/1826 [00:03<00:24, 62.93it/s]
|
| 1745 |
15%|█▍ | 271/1826 [00:04<00:23, 67.26it/s]
|
| 1746 |
15%|█▌ | 280/1826 [00:04<00:21, 72.72it/s]
|
| 1747 |
16%|█▌ | 288/1826 [00:04<00:21, 70.72it/s]
|
| 1748 |
16%|█▌ | 296/1826 [00:04<00:21, 69.79it/s]
|
| 1749 |
17%|█▋ | 304/1826 [00:04<00:22, 67.51it/s]
|
| 1750 |
17%|█▋ | 312/1826 [00:04<00:21, 69.04it/s]
|
| 1751 |
17%|█▋ | 319/1826 [00:04<00:21, 68.83it/s]
|
| 1752 |
18%|█▊ | 327/1826 [00:04<00:21, 70.40it/s]
|
| 1753 |
18%|█▊ | 335/1826 [00:04<00:21, 69.17it/s]
|
| 1754 |
19%|█▉ | 343/1826 [00:05<00:21, 70.48it/s]
|
| 1755 |
19%|█▉ | 351/1826 [00:05<00:22, 65.72it/s]
|
| 1756 |
20%|█▉ | 358/1826 [00:05<00:22, 64.30it/s]
|
| 1757 |
20%|██ | 366/1826 [00:05<00:21, 68.34it/s]
|
| 1758 |
20%|██ | 374/1826 [00:05<00:20, 69.37it/s]
|
| 1759 |
21%|██ | 382/1826 [00:05<00:20, 71.56it/s]
|
| 1760 |
21%|██▏ | 390/1826 [00:05<00:22, 64.90it/s]
|
| 1761 |
22%|██▏ | 398/1826 [00:05<00:21, 66.61it/s]
|
| 1762 |
22%|██▏ | 405/1826 [00:06<00:21, 66.68it/s]
|
| 1763 |
23%|██▎ | 412/1826 [00:06<00:22, 63.63it/s]
|
| 1764 |
23%|██▎ | 420/1826 [00:06<00:21, 65.85it/s]
|
| 1765 |
23%|██▎ | 427/1826 [00:06<00:22, 62.95it/s]
|
| 1766 |
24%|██▍ | 434/1826 [00:06<00:21, 63.77it/s]
|
| 1767 |
24%|██▍ | 441/1826 [00:06<00:22, 61.74it/s]
|
| 1768 |
25%|██▍ | 450/1826 [00:06<00:20, 68.00it/s]
|
| 1769 |
25%|██▌ | 457/1826 [00:06<00:20, 67.45it/s]
|
| 1770 |
26%|██▌ | 466/1826 [00:06<00:19, 69.66it/s]
|
| 1771 |
26%|██▌ | 473/1826 [00:07<00:21, 64.33it/s]
|
| 1772 |
26%|██▋ | 480/1826 [00:07<00:21, 63.73it/s]
|
| 1773 |
27%|██▋ | 487/1826 [00:07<00:21, 62.35it/s]
|
| 1774 |
27%|██▋ | 494/1826 [00:07<00:21, 62.04it/s]
|
| 1775 |
27%|██▋ | 502/1826 [00:07<00:20, 64.86it/s]
|
| 1776 |
28%|██▊ | 510/1826 [00:07<00:19, 67.59it/s]
|
| 1777 |
28%|██▊ | 518/1826 [00:07<00:18, 70.73it/s]
|
| 1778 |
29%|██▉ | 527/1826 [00:07<00:17, 73.95it/s]
|
| 1779 |
29%|██▉ | 535/1826 [00:07<00:17, 71.81it/s]
|
| 1780 |
30%|██▉ | 543/1826 [00:08<00:17, 71.49it/s]
|
| 1781 |
30%|███ | 551/1826 [00:08<00:19, 65.90it/s]
|
| 1782 |
31%|███ | 558/1826 [00:08<00:19, 64.66it/s]
|
| 1783 |
31%|███ | 567/1826 [00:08<00:17, 70.27it/s]
|
| 1784 |
32%|███▏ | 576/1826 [00:08<00:17, 73.36it/s]
|
| 1785 |
32%|███▏ | 584/1826 [00:08<00:18, 68.09it/s]
|
| 1786 |
32%|███▏ | 592/1826 [00:08<00:17, 70.54it/s]
|
| 1787 |
33%|███▎ | 600/1826 [00:08<00:17, 69.88it/s]
|
| 1788 |
33%|███▎ | 608/1826 [00:09<00:18, 65.58it/s]
|
| 1789 |
34%|███▎ | 615/1826 [00:09<00:18, 65.45it/s]
|
| 1790 |
34%|███▍ | 622/1826 [00:09<00:18, 66.26it/s]
|
| 1791 |
35%|███▍ | 632/1826 [00:09<00:16, 73.73it/s]
|
| 1792 |
35%|███▌ | 641/1826 [00:09<00:15, 76.81it/s]
|
| 1793 |
36%|███▌ | 649/1826 [00:09<00:16, 71.30it/s]
|
| 1794 |
36%|███▌ | 657/1826 [00:09<00:16, 69.22it/s]
|
| 1795 |
37%|███▋ | 667/1826 [00:09<00:15, 75.05it/s]
|
| 1796 |
37%|███▋ | 675/1826 [00:09<00:16, 71.22it/s]
|
| 1797 |
37%|███▋ | 683/1826 [00:10<00:16, 69.84it/s]
|
| 1798 |
38%|███▊ | 692/1826 [00:10<00:15, 73.46it/s]
|
| 1799 |
38%|███▊ | 700/1826 [00:10<00:15, 73.83it/s]
|
| 1800 |
39%|███▉ | 708/1826 [00:10<00:14, 75.07it/s]
|
| 1801 |
39%|███▉ | 718/1826 [00:10<00:13, 79.86it/s]
|
| 1802 |
40%|███▉ | 727/1826 [00:10<00:13, 78.55it/s]
|
| 1803 |
40%|████ | 735/1826 [00:10<00:14, 76.87it/s]
|
| 1804 |
41%|████ | 743/1826 [00:10<00:14, 75.47it/s]
|
| 1805 |
41%|████ | 752/1826 [00:10<00:13, 79.11it/s]
|
| 1806 |
42%|████▏ | 760/1826 [00:11<00:13, 77.59it/s]
|
| 1807 |
42%|████▏ | 769/1826 [00:11<00:13, 78.55it/s]
|
| 1808 |
43%|████▎ | 777/1826 [00:11<00:13, 77.65it/s]
|
| 1809 |
43%|████▎ | 785/1826 [00:11<00:14, 71.54it/s]
|
| 1810 |
43%|████▎ | 793/1826 [00:11<00:14, 73.07it/s]
|
| 1811 |
44%|████▍ | 801/1826 [00:11<00:15, 68.26it/s]
|
| 1812 |
44%|████▍ | 809/1826 [00:11<00:14, 69.66it/s]
|
| 1813 |
45%|████▍ | 817/1826 [00:11<00:14, 71.79it/s]
|
| 1814 |
45%|████▌ | 826/1826 [00:11<00:13, 76.14it/s]
|
| 1815 |
46%|████▌ | 834/1826 [00:12<00:14, 68.30it/s]
|
| 1816 |
46%|████▌ | 842/1826 [00:12<00:14, 69.69it/s]
|
| 1817 |
47%|████▋ | 851/1826 [00:12<00:13, 72.87it/s]
|
| 1818 |
47%|████▋ | 860/1826 [00:12<00:12, 77.24it/s]
|
| 1819 |
48%|████▊ | 868/1826 [00:12<00:13, 72.75it/s]
|
| 1820 |
48%|████▊ | 876/1826 [00:12<00:13, 72.25it/s]
|
| 1821 |
48%|████▊ | 884/1826 [00:12<00:13, 71.43it/s]
|
| 1822 |
49%|████▉ | 892/1826 [00:12<00:13, 68.24it/s]
|
| 1823 |
49%|████▉ | 902/1826 [00:13<00:12, 74.85it/s]
|
| 1824 |
50%|████▉ | 911/1826 [00:13<00:11, 77.59it/s]
|
| 1825 |
50%|█████ | 920/1826 [00:13<00:11, 80.11it/s]
|
| 1826 |
51%|█████ | 929/1826 [00:13<00:11, 79.42it/s]
|
| 1827 |
51%|█████▏ | 937/1826 [00:13<00:12, 73.54it/s]
|
| 1828 |
52%|█████▏ | 945/1826 [00:13<00:12, 68.71it/s]
|
| 1829 |
52%|█████▏ | 952/1826 [00:13<00:12, 68.31it/s]
|
| 1830 |
53%|█████▎ | 960/1826 [00:13<00:12, 69.39it/s]
|
| 1831 |
53%|█████▎ | 968/1826 [00:13<00:11, 71.85it/s]
|
| 1832 |
53%|█████▎ | 976/1826 [00:14<00:12, 67.88it/s]
|
| 1833 |
54%|█████▍ | 985/1826 [00:14<00:11, 71.98it/s]
|
| 1834 |
54%|█████▍ | 993/1826 [00:14<00:11, 69.95it/s]
|
| 1835 |
55%|█████▍ | 1001/1826 [00:14<00:12, 67.60it/s]
|
| 1836 |
55%|█████▌ | 1008/1826 [00:14<00:12, 68.06it/s]
|
| 1837 |
56%|█████▌ | 1016/1826 [00:14<00:11, 70.58it/s]
|
| 1838 |
56%|█████▌ | 1025/1826 [00:14<00:10, 74.99it/s]
|
| 1839 |
57%|█████▋ | 1033/1826 [00:14<00:10, 73.75it/s]
|
| 1840 |
57%|█████▋ | 1041/1826 [00:14<00:10, 71.80it/s]
|
| 1841 |
57%|█████▋ | 1049/1826 [00:15<00:11, 69.86it/s]
|
| 1842 |
58%|█████▊ | 1057/1826 [00:15<00:11, 68.46it/s]
|
| 1843 |
58%|█████▊ | 1066/1826 [00:15<00:10, 73.61it/s]
|
| 1844 |
59%|█████▉ | 1074/1826 [00:15<00:10, 74.53it/s]
|
| 1845 |
59%|█████▉ | 1083/1826 [00:15<00:09, 78.56it/s]
|
| 1846 |
60%|█████▉ | 1092/1826 [00:15<00:09, 79.51it/s]
|
| 1847 |
60%|██████ | 1101/1826 [00:15<00:09, 79.20it/s]
|
| 1848 |
61%|██████ | 1109/1826 [00:15<00:09, 74.39it/s]
|
| 1849 |
61%|██████ | 1117/1826 [00:15<00:09, 71.61it/s]
|
| 1850 |
62%|██████▏ | 1126/1826 [00:16<00:09, 73.71it/s]
|
| 1851 |
62%|██████▏ | 1134/1826 [00:16<00:09, 75.14it/s]
|
| 1852 |
63%|██████▎ | 1142/1826 [00:16<00:09, 72.85it/s]
|
| 1853 |
63%|██████▎ | 1150/1826 [00:16<00:09, 71.67it/s]
|
| 1854 |
63%|██████▎ | 1158/1826 [00:16<00:09, 71.29it/s]
|
| 1855 |
64%|██████▍ | 1166/1826 [00:16<00:09, 67.84it/s]
|
| 1856 |
64%|██████▍ | 1174/1826 [00:16<00:09, 69.65it/s]
|
| 1857 |
65%|██████▍ | 1182/1826 [00:16<00:09, 67.30it/s]
|
| 1858 |
65%|██████▌ | 1190/1826 [00:17<00:09, 68.54it/s]
|
| 1859 |
66%|██████▌ | 1198/1826 [00:17<00:08, 70.13it/s]
|
| 1860 |
66%|██████▌ | 1206/1826 [00:17<00:08, 70.01it/s]
|
| 1861 |
66%|██████▋ | 1214/1826 [00:17<00:09, 64.04it/s]
|
| 1862 |
67%|██████▋ | 1221/1826 [00:17<00:09, 62.76it/s]
|
| 1863 |
67%|██████▋ | 1230/1826 [00:17<00:08, 68.49it/s]
|
| 1864 |
68%|██████▊ | 1238/1826 [00:17<00:08, 69.88it/s]
|
| 1865 |
68%|██████▊ | 1246/1826 [00:17<00:08, 70.54it/s]
|
| 1866 |
69%|██████▊ | 1254/1826 [00:17<00:08, 64.45it/s]
|
| 1867 |
69%|██████▉ | 1262/1826 [00:18<00:08, 66.57it/s]
|
| 1868 |
70%|██████▉ | 1270/1826 [00:18<00:08, 69.08it/s]
|
| 1869 |
70%|██████▉ | 1278/1826 [00:18<00:07, 71.87it/s]
|
| 1870 |
70%|███████ | 1286/1826 [00:18<00:07, 72.39it/s]
|
| 1871 |
71%|███████ | 1295/1826 [00:18<00:06, 76.32it/s]
|
| 1872 |
71%|███████▏ | 1304/1826 [00:18<00:06, 79.67it/s]
|
| 1873 |
72%|███████▏ | 1313/1826 [00:18<00:06, 79.61it/s]
|
| 1874 |
72%|███████▏ | 1322/1826 [00:18<00:06, 78.03it/s]
|
| 1875 |
73%|███████▎ | 1330/1826 [00:18<00:06, 75.63it/s]
|
| 1876 |
73%|███████▎ | 1338/1826 [00:19<00:06, 73.90it/s]
|
| 1877 |
74%|███████▎ | 1346/1826 [00:19<00:06, 69.31it/s]
|
| 1878 |
74%|███████▍ | 1354/1826 [00:19<00:06, 70.19it/s]
|
| 1879 |
75%|███████▍ | 1362/1826 [00:19<00:06, 71.84it/s]
|
| 1880 |
75%|███████▌ | 1370/1826 [00:19<00:06, 67.49it/s]
|
| 1881 |
76%|███████▌ | 1379/1826 [00:19<00:06, 72.42it/s]
|
| 1882 |
76%|███████▌ | 1387/1826 [00:19<00:05, 74.47it/s]
|
| 1883 |
76%|███████▋ | 1395/1826 [00:19<00:05, 75.32it/s]
|
| 1884 |
77%|███████▋ | 1404/1826 [00:19<00:05, 79.20it/s]
|
| 1885 |
77%|███████▋ | 1413/1826 [00:20<00:05, 80.03it/s]
|
| 1886 |
78%|███████▊ | 1422/1826 [00:20<00:05, 76.15it/s]
|
| 1887 |
78%|███████▊ | 1430/1826 [00:20<00:05, 72.26it/s]
|
| 1888 |
79%|███████▉ | 1438/1826 [00:20<00:05, 68.01it/s]
|
| 1889 |
79%|███████▉ | 1446/1826 [00:20<00:05, 69.61it/s]
|
| 1890 |
80%|███████▉ | 1454/1826 [00:20<00:05, 71.49it/s]
|
| 1891 |
80%|████████ | 1462/1826 [00:20<00:05, 72.56it/s]
|
| 1892 |
81%|████████ | 1470/1826 [00:20<00:05, 66.00it/s]
|
| 1893 |
81%|████████ | 1477/1826 [00:21<00:05, 65.05it/s]
|
| 1894 |
81%|████████▏ | 1484/1826 [00:21<00:05, 65.40it/s]
|
| 1895 |
82%|████████▏ | 1491/1826 [00:21<00:05, 60.28it/s]
|
| 1896 |
82%|████████▏ | 1498/1826 [00:21<00:05, 61.97it/s]
|
| 1897 |
82%|████████▏ | 1505/1826 [00:21<00:05, 60.61it/s]
|
| 1898 |
83%|████████▎ | 1513/1826 [00:21<00:04, 64.51it/s]
|
| 1899 |
83%|████████▎ | 1520/1826 [00:21<00:04, 65.43it/s]
|
| 1900 |
84%|████████▎ | 1527/1826 [00:21<00:04, 62.45it/s]
|
| 1901 |
84%|████████▍ | 1534/1826 [00:21<00:04, 64.07it/s]
|
| 1902 |
84%|████████▍ | 1541/1826 [00:22<00:04, 63.72it/s]
|
| 1903 |
85%|████████▍ | 1548/1826 [00:22<00:04, 64.27it/s]
|
| 1904 |
85%|████████▌ | 1555/1826 [00:22<00:04, 65.72it/s]
|
| 1905 |
86%|████████▌ | 1562/1826 [00:22<00:03, 66.77it/s]
|
| 1906 |
86%|████████▌ | 1569/1826 [00:22<00:04, 63.11it/s]
|
| 1907 |
86%|████████▋ | 1576/1826 [00:22<00:03, 62.95it/s]
|
| 1908 |
87%|████████▋ | 1584/1826 [00:22<00:03, 65.44it/s]
|
| 1909 |
87%|████████▋ | 1591/1826 [00:22<00:03, 63.71it/s]
|
| 1910 |
88%|████████▊ | 1598/1826 [00:22<00:03, 63.28it/s]
|
| 1911 |
88%|████████▊ | 1607/1826 [00:23<00:03, 69.37it/s]
|
| 1912 |
88%|████████▊ | 1614/1826 [00:23<00:03, 68.35it/s]
|
| 1913 |
89%|████████▉ | 1621/1826 [00:23<00:03, 61.61it/s]
|
| 1914 |
89%|████████▉ | 1628/1826 [00:23<00:03, 56.69it/s]
|
| 1915 |
90%|████████▉ | 1636/1826 [00:23<00:03, 60.23it/s]
|
| 1916 |
90%|████████▉ | 1643/1826 [00:23<00:03, 60.95it/s]
|
| 1917 |
90%|█████████ | 1651/1826 [00:23<00:02, 65.46it/s]
|
| 1918 |
91%|█████████ | 1658/1826 [00:23<00:02, 64.85it/s]
|
| 1919 |
91%|█████████ | 1665/1826 [00:24<00:02, 65.01it/s]
|
| 1920 |
92%|█████████▏| 1673/1826 [00:24<00:02, 68.49it/s]
|
| 1921 |
92%|█████████▏| 1681/1826 [00:24<00:02, 70.31it/s]
|
| 1922 |
93%|█████████▎| 1690/1826 [00:24<00:01, 73.05it/s]
|
| 1923 |
93%|█████████▎| 1698/1826 [00:24<00:02, 63.59it/s]
|
| 1924 |
93%|█████████▎| 1706/1826 [00:24<00:01, 65.52it/s]
|
| 1925 |
94%|█████████▍| 1714/1826 [00:24<00:01, 68.18it/s]
|
| 1926 |
94%|█████████▍| 1721/1826 [00:24<00:01, 61.69it/s]
|
| 1927 |
95%|█████████▍| 1729/1826 [00:24<00:01, 65.81it/s]
|
| 1928 |
95%|█████████▌| 1737/1826 [00:25<00:01, 67.43it/s]
|
| 1929 |
96%|█████████▌| 1746/1826 [00:25<00:01, 71.34it/s]
|
| 1930 |
96%|█████████▌| 1754/1826 [00:25<00:01, 69.71it/s]
|
| 1931 |
96%|█████████▋| 1762/1826 [00:25<00:00, 68.18it/s]
|
| 1932 |
97%|█████████▋| 1769/1826 [00:25<00:00, 66.79it/s]
|
| 1933 |
97%|█████████▋| 1776/1826 [00:25<00:00, 65.37it/s]
|
| 1934 |
98%|█████████▊| 1784/1826 [00:25<00:00, 69.28it/s]
|
| 1935 |
98%|█████████▊| 1791/1826 [00:25<00:00, 64.98it/s]
|
| 1936 |
98%|█████████▊| 1798/1826 [00:26<00:00, 60.14it/s]
|
| 1937 |
99%|█████████▉| 1805/1826 [00:26<00:00, 62.25it/s]
|
| 1938 |
99%|█████████▉| 1813/1826 [00:26<00:00, 66.07it/s]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1588 |
{'eval_loss': 0.009366312995553017, 'eval_precision': 0.9169054441260746, 'eval_recall': 0.9293320425943853, 'eval_f1': 0.9230769230769231, 'eval_accuracy': 0.9986302259153467, 'eval_runtime': 17.4762, 'eval_samples_per_second': 388.987, 'eval_steps_per_second': 48.638, 'epoch': 9.99}
|
| 1589 |
{'train_runtime': 2701.3395, 'train_samples_per_second': 114.954, 'train_steps_per_second': 1.795, 'train_loss': 0.002068036902580679, 'epoch': 9.99}
|
| 1590 |
|
| 1591 |
+
***** train metrics *****
|
| 1592 |
+
epoch = 9.9897
|
| 1593 |
+
total_flos = 16517987GF
|
| 1594 |
+
train_loss = 0.0021
|
| 1595 |
+
train_runtime = 0:45:01.33
|
| 1596 |
+
train_samples = 31053
|
| 1597 |
+
train_samples_per_second = 114.954
|
| 1598 |
+
train_steps_per_second = 1.795
|
| 1599 |
+
09/09/2024 16:18:12 - INFO - __main__ - *** Evaluate ***
|
| 1600 |
+
[INFO|trainer.py:811] 2024-09-09 16:18:12,179 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
|
| 1601 |
+
[INFO|trainer.py:3819] 2024-09-09 16:18:12,182 >>
|
| 1602 |
+
***** Running Evaluation *****
|
| 1603 |
+
[INFO|trainer.py:3821] 2024-09-09 16:18:12,182 >> Num examples = 6798
|
| 1604 |
+
[INFO|trainer.py:3824] 2024-09-09 16:18:12,182 >> Batch size = 8
|
| 1605 |
+
|
| 1606 |
0%| | 0/850 [00:00<?, ?it/s]
|
| 1607 |
1%| | 9/850 [00:00<00:09, 84.85it/s]
|
| 1608 |
2%|▏ | 18/850 [00:00<00:12, 69.16it/s]
|
| 1609 |
3%|▎ | 26/850 [00:00<00:12, 67.03it/s]
|
| 1610 |
4%|▍ | 34/850 [00:00<00:11, 69.34it/s]
|
| 1611 |
5%|▍ | 42/850 [00:00<00:12, 66.38it/s]
|
| 1612 |
6%|▌ | 49/850 [00:00<00:14, 55.56it/s]
|
| 1613 |
7%|▋ | 57/850 [00:00<00:13, 60.68it/s]
|
| 1614 |
8%|▊ | 64/850 [00:01<00:13, 59.50it/s]
|
| 1615 |
8%|▊ | 71/850 [00:01<00:14, 53.38it/s]
|
| 1616 |
9%|▉ | 77/850 [00:01<00:14, 54.52it/s]
|
| 1617 |
10%|▉ | 83/850 [00:01<00:14, 54.60it/s]
|
| 1618 |
11%|█ | 91/850 [00:01<00:12, 59.33it/s]
|
| 1619 |
12%|█▏ | 98/850 [00:01<00:12, 60.45it/s]
|
| 1620 |
12%|█▏ | 106/850 [00:01<00:11, 64.35it/s]
|
| 1621 |
13%|█▎ | 113/850 [00:01<00:11, 63.96it/s]
|
| 1622 |
14%|█▍ | 120/850 [00:01<00:11, 64.78it/s]
|
| 1623 |
15%|█▍ | 127/850 [00:02<00:11, 65.12it/s]
|
| 1624 |
16%|█▌ | 134/850 [00:02<00:11, 61.87it/s]
|
| 1625 |
17%|█▋ | 142/850 [00:02<00:11, 64.23it/s]
|
| 1626 |
18%|█▊ | 149/850 [00:02<00:11, 60.11it/s]
|
| 1627 |
18%|█▊ | 156/850 [00:02<00:11, 60.66it/s]
|
| 1628 |
19%|█▉ | 163/850 [00:02<00:10, 62.78it/s]
|
| 1629 |
20%|██ | 170/850 [00:02<00:10, 63.49it/s]
|
| 1630 |
21%|██ | 177/850 [00:02<00:10, 65.22it/s]
|
| 1631 |
22%|██▏ | 184/850 [00:02<00:10, 62.35it/s]
|
| 1632 |
22%|██▏ | 191/850 [00:03<00:10, 60.53it/s]
|
| 1633 |
23%|██▎ | 198/850 [00:03<00:10, 62.55it/s]
|
| 1634 |
24%|██▍ | 205/850 [00:03<00:10, 61.27it/s]
|
| 1635 |
25%|██▍ | 212/850 [00:03<00:10, 61.68it/s]
|
| 1636 |
26%|██▌ | 219/850 [00:03<00:10, 60.02it/s]
|
| 1637 |
27%|██▋ | 227/850 [00:03<00:09, 63.15it/s]
|
| 1638 |
28%|██▊ | 234/850 [00:03<00:09, 64.86it/s]
|
| 1639 |
28%|██▊ | 241/850 [00:03<00:09, 63.81it/s]
|
| 1640 |
29%|██▉ | 248/850 [00:04<00:09, 61.38it/s]
|
| 1641 |
30%|███ | 256/850 [00:04<00:09, 65.97it/s]
|
| 1642 |
31%|███ | 263/850 [00:04<00:09, 65.18it/s]
|
| 1643 |
32%|███▏ | 271/850 [00:04<00:08, 66.94it/s]
|
| 1644 |
33%|███▎ | 279/850 [00:04<00:08, 68.88it/s]
|
| 1645 |
34%|███▍ | 287/850 [00:04<00:08, 69.84it/s]
|
| 1646 |
35%|███▍ | 295/850 [00:04<00:08, 69.34it/s]
|
| 1647 |
36%|███▌ | 302/850 [00:04<00:08, 66.76it/s]
|
| 1648 |
36%|███▋ | 309/850 [00:04<00:08, 67.41it/s]
|
| 1649 |
37%|███▋ | 316/850 [00:04<00:07, 67.96it/s]
|
| 1650 |
38%|███▊ | 323/850 [00:05<00:07, 68.48it/s]
|
| 1651 |
39%|███▉ | 330/850 [00:05<00:07, 67.10it/s]
|
| 1652 |
40%|███▉ | 337/850 [00:05<00:07, 66.09it/s]
|
| 1653 |
40%|████ | 344/850 [00:05<00:07, 66.02it/s]
|
| 1654 |
41%|████▏ | 351/850 [00:05<00:07, 66.82it/s]
|
| 1655 |
42%|████▏ | 358/850 [00:05<00:07, 64.35it/s]
|
| 1656 |
43%|████▎ | 365/850 [00:05<00:07, 63.66it/s]
|
| 1657 |
44%|████▍ | 372/850 [00:05<00:07, 65.24it/s]
|
| 1658 |
45%|████▍ | 379/850 [00:05<00:07, 59.31it/s]
|
| 1659 |
45%|████▌ | 386/850 [00:06<00:07, 59.44it/s]
|
| 1660 |
46%|████▌ | 393/850 [00:06<00:07, 58.18it/s]
|
| 1661 |
47%|████▋ | 399/850 [00:06<00:07, 56.50it/s]
|
| 1662 |
48%|████▊ | 407/850 [00:06<00:07, 62.14it/s]
|
| 1663 |
49%|████▉ | 415/850 [00:06<00:06, 66.41it/s]
|
| 1664 |
50%|████▉ | 423/850 [00:06<00:06, 68.67it/s]
|
| 1665 |
51%|█████ | 431/850 [00:06<00:05, 70.17it/s]
|
| 1666 |
52%|█████▏ | 439/850 [00:06<00:06, 66.16it/s]
|
| 1667 |
52%|█████▏ | 446/850 [00:07<00:06, 63.58it/s]
|
| 1668 |
53%|█████▎ | 453/850 [00:07<00:06, 61.74it/s]
|
| 1669 |
54%|█████▍ | 461/850 [00:07<00:05, 64.99it/s]
|
| 1670 |
55%|█████▌ | 469/850 [00:07<00:05, 67.95it/s]
|
| 1671 |
56%|█████▌ | 477/850 [00:07<00:05, 69.70it/s]
|
| 1672 |
57%|█████▋ | 485/850 [00:07<00:05, 66.98it/s]
|
| 1673 |
58%|█████▊ | 492/850 [00:07<00:05, 65.57it/s]
|
| 1674 |
59%|█████▉ | 500/850 [00:07<00:05, 67.64it/s]
|
| 1675 |
60%|█████▉ | 508/850 [00:07<00:04, 69.00it/s]
|
| 1676 |
61%|██████ | 515/850 [00:08<00:04, 67.70it/s]
|
| 1677 |
61%|██████▏ | 522/850 [00:08<00:04, 68.14it/s]
|
| 1678 |
62%|██████▏ | 529/850 [00:08<00:05, 63.43it/s]
|
| 1679 |
63%|██████▎ | 536/850 [00:08<00:05, 62.14it/s]
|
| 1680 |
64%|██████▍ | 543/850 [00:08<00:05, 60.12it/s]
|
| 1681 |
65%|██████▍ | 550/850 [00:08<00:05, 58.62it/s]
|
| 1682 |
66%|██████▌ | 557/850 [00:08<00:04, 59.23it/s]
|
| 1683 |
66%|██████▌ | 563/850 [00:08<00:05, 55.59it/s]
|
| 1684 |
67%|██████▋ | 570/850 [00:08<00:04, 57.72it/s]
|
| 1685 |
68%|██████▊ | 577/850 [00:09<00:04, 57.68it/s]
|
| 1686 |
69%|██████▊ | 584/850 [00:09<00:04, 59.76it/s]
|
| 1687 |
70%|██████▉ | 591/850 [00:09<00:04, 60.25it/s]
|
| 1688 |
70%|███████ | 598/850 [00:09<00:04, 60.81it/s]
|
| 1689 |
71%|███████ | 605/850 [00:09<00:04, 60.53it/s]
|
| 1690 |
72%|███████▏ | 612/850 [00:09<00:03, 62.53it/s]
|
| 1691 |
73%|███████▎ | 619/850 [00:09<00:03, 64.26it/s]
|
| 1692 |
74%|███████▎ | 626/850 [00:09<00:03, 65.14it/s]
|
| 1693 |
74%|███████▍ | 633/850 [00:09<00:03, 63.37it/s]
|
| 1694 |
75%|███████▌ | 640/850 [00:10<00:03, 64.52it/s]
|
| 1695 |
76%|███████▌ | 648/850 [00:10<00:02, 67.99it/s]
|
| 1696 |
77%|███████▋ | 655/850 [00:10<00:02, 68.40it/s]
|
| 1697 |
78%|███████▊ | 662/850 [00:10<00:02, 68.11it/s]
|
| 1698 |
79%|███████▊ | 669/850 [00:10<00:02, 68.30it/s]
|
| 1699 |
80%|███████▉ | 676/850 [00:10<00:02, 64.43it/s]
|
| 1700 |
80%|████████ | 683/850 [00:10<00:02, 64.38it/s]
|
| 1701 |
81%|████████ | 690/850 [00:10<00:02, 63.19it/s]
|
| 1702 |
82%|████████▏ | 697/850 [00:10<00:02, 63.47it/s]
|
| 1703 |
83%|████████▎ | 704/850 [00:11<00:02, 62.55it/s]
|
| 1704 |
84%|████████▎ | 711/850 [00:11<00:02, 59.59it/s]
|
| 1705 |
84%|████████▍ | 717/850 [00:11<00:02, 58.91it/s]
|
| 1706 |
85%|████████▌ | 724/850 [00:11<00:02, 59.52it/s]
|
| 1707 |
86%|████████▌ | 730/850 [00:11<00:02, 57.96it/s]
|
| 1708 |
87%|████████▋ | 737/850 [00:11<00:01, 60.64it/s]
|
| 1709 |
88%|████████▊ | 744/850 [00:11<00:01, 57.91it/s]
|
| 1710 |
88%|████████▊ | 750/850 [00:11<00:01, 55.74it/s]
|
| 1711 |
89%|████████▉ | 757/850 [00:12<00:01, 59.42it/s]
|
| 1712 |
90%|████████▉ | 764/850 [00:12<00:01, 61.63it/s]
|
| 1713 |
91%|█████████ | 771/850 [00:12<00:01, 59.98it/s]
|
| 1714 |
92%|█████████▏| 778/850 [00:12<00:01, 59.34it/s]
|
| 1715 |
92%|█████████▏| 784/850 [00:12<00:01, 56.57it/s]
|
| 1716 |
93%|█████████▎| 790/850 [00:12<00:01, 54.23it/s]
|
| 1717 |
94%|█████████▍| 797/850 [00:12<00:00, 58.26it/s]
|
| 1718 |
94%|█████████▍| 803/850 [00:12<00:00, 58.70it/s]
|
| 1719 |
95%|█████████▌| 809/850 [00:12<00:00, 58.22it/s]
|
| 1720 |
96%|█████████▌| 815/850 [00:13<00:00, 57.34it/s]
|
| 1721 |
97%|█████████▋| 823/850 [00:13<00:00, 62.38it/s]
|
| 1722 |
98%|█████████▊| 830/850 [00:13<00:00, 60.31it/s]
|
| 1723 |
98%|█████████▊| 837/850 [00:13<00:00, 61.46it/s]
|
| 1724 |
99%|█████████▉| 844/850 [00:13<00:00, 62.52it/s]
|
| 1725 |
+
***** eval metrics *****
|
| 1726 |
+
epoch = 9.9897
|
| 1727 |
+
eval_accuracy = 0.9986
|
| 1728 |
+
eval_f1 = 0.9231
|
| 1729 |
+
eval_loss = 0.0094
|
| 1730 |
+
eval_precision = 0.9169
|
| 1731 |
+
eval_recall = 0.9293
|
| 1732 |
+
eval_runtime = 0:00:17.51
|
| 1733 |
+
eval_samples = 6798
|
| 1734 |
+
eval_samples_per_second = 388.236
|
| 1735 |
+
eval_steps_per_second = 48.544
|
| 1736 |
+
09/09/2024 16:18:29 - INFO - __main__ - *** Predict ***
|
| 1737 |
+
[INFO|trainer.py:811] 2024-09-09 16:18:29,694 >> The following columns in the test set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
|
| 1738 |
+
[INFO|trainer.py:3819] 2024-09-09 16:18:29,696 >>
|
| 1739 |
+
***** Running Prediction *****
|
| 1740 |
+
[INFO|trainer.py:3821] 2024-09-09 16:18:29,696 >> Num examples = 14605
|
| 1741 |
+
[INFO|trainer.py:3824] 2024-09-09 16:18:29,696 >> Batch size = 8
|
| 1742 |
+
|
| 1743 |
0%| | 0/1826 [00:00<?, ?it/s]
|
| 1744 |
1%| | 10/1826 [00:00<00:19, 94.11it/s]
|
| 1745 |
1%| | 20/1826 [00:00<00:26, 68.33it/s]
|
| 1746 |
2%|▏ | 28/1826 [00:00<00:26, 67.41it/s]
|
| 1747 |
2%|▏ | 35/1826 [00:00<00:26, 67.50it/s]
|
| 1748 |
2%|▏ | 43/1826 [00:00<00:25, 70.74it/s]
|
| 1749 |
3%|▎ | 51/1826 [00:00<00:24, 72.57it/s]
|
| 1750 |
3%|▎ | 59/1826 [00:00<00:26, 67.19it/s]
|
| 1751 |
4%|▎ | 66/1826 [00:00<00:28, 61.91it/s]
|
| 1752 |
4%|▍ | 74/1826 [00:01<00:27, 64.76it/s]
|
| 1753 |
4%|▍ | 82/1826 [00:01<00:25, 67.85it/s]
|
| 1754 |
5%|▍ | 89/1826 [00:01<00:25, 66.96it/s]
|
| 1755 |
5%|▌ | 98/1826 [00:01<00:23, 72.93it/s]
|
| 1756 |
6%|▌ | 107/1826 [00:01<00:22, 76.04it/s]
|
| 1757 |
6%|▋ | 115/1826 [00:01<00:22, 74.77it/s]
|
| 1758 |
7%|▋ | 123/1826 [00:01<00:25, 67.64it/s]
|
| 1759 |
7%|▋ | 130/1826 [00:01<00:25, 65.55it/s]
|
| 1760 |
8%|▊ | 138/1826 [00:02<00:25, 66.69it/s]
|
| 1761 |
8%|▊ | 146/1826 [00:02<00:24, 69.01it/s]
|
| 1762 |
8%|▊ | 153/1826 [00:02<00:24, 69.21it/s]
|
| 1763 |
9%|▉ | 160/1826 [00:02<00:27, 60.78it/s]
|
| 1764 |
9%|▉ | 167/1826 [00:02<00:26, 61.52it/s]
|
| 1765 |
10%|▉ | 176/1826 [00:02<00:24, 68.18it/s]
|
| 1766 |
10%|█ | 184/1826 [00:02<00:23, 69.66it/s]
|
| 1767 |
11%|█ | 192/1826 [00:02<00:23, 69.46it/s]
|
| 1768 |
11%|█ | 200/1826 [00:02<00:24, 67.41it/s]
|
| 1769 |
11%|█▏ | 207/1826 [00:03<00:24, 66.24it/s]
|
| 1770 |
12%|█▏ | 214/1826 [00:03<00:24, 66.73it/s]
|
| 1771 |
12%|█▏ | 221/1826 [00:03<00:23, 67.00it/s]
|
| 1772 |
12%|█▏ | 228/1826 [00:03<00:25, 62.86it/s]
|
| 1773 |
13%|█▎ | 235/1826 [00:03<00:26, 59.84it/s]
|
| 1774 |
13%|█▎ | 242/1826 [00:03<00:25, 61.91it/s]
|
| 1775 |
14%|█▎ | 249/1826 [00:03<00:25, 61.34it/s]
|
| 1776 |
14%|█▍ | 256/1826 [00:03<00:25, 62.00it/s]
|
| 1777 |
14%|█▍ | 263/1826 [00:03<00:24, 62.93it/s]
|
| 1778 |
15%|█▍ | 271/1826 [00:04<00:23, 67.26it/s]
|
| 1779 |
15%|█▌ | 280/1826 [00:04<00:21, 72.72it/s]
|
| 1780 |
16%|█▌ | 288/1826 [00:04<00:21, 70.72it/s]
|
| 1781 |
16%|█▌ | 296/1826 [00:04<00:21, 69.79it/s]
|
| 1782 |
17%|█▋ | 304/1826 [00:04<00:22, 67.51it/s]
|
| 1783 |
17%|█▋ | 312/1826 [00:04<00:21, 69.04it/s]
|
| 1784 |
17%|█▋ | 319/1826 [00:04<00:21, 68.83it/s]
|
| 1785 |
18%|█▊ | 327/1826 [00:04<00:21, 70.40it/s]
|
| 1786 |
18%|█▊ | 335/1826 [00:04<00:21, 69.17it/s]
|
| 1787 |
19%|█▉ | 343/1826 [00:05<00:21, 70.48it/s]
|
| 1788 |
19%|█▉ | 351/1826 [00:05<00:22, 65.72it/s]
|
| 1789 |
20%|█▉ | 358/1826 [00:05<00:22, 64.30it/s]
|
| 1790 |
20%|██ | 366/1826 [00:05<00:21, 68.34it/s]
|
| 1791 |
20%|██ | 374/1826 [00:05<00:20, 69.37it/s]
|
| 1792 |
21%|██ | 382/1826 [00:05<00:20, 71.56it/s]
|
| 1793 |
21%|██▏ | 390/1826 [00:05<00:22, 64.90it/s]
|
| 1794 |
22%|██▏ | 398/1826 [00:05<00:21, 66.61it/s]
|
| 1795 |
22%|██▏ | 405/1826 [00:06<00:21, 66.68it/s]
|
| 1796 |
23%|██▎ | 412/1826 [00:06<00:22, 63.63it/s]
|
| 1797 |
23%|██▎ | 420/1826 [00:06<00:21, 65.85it/s]
|
| 1798 |
23%|██▎ | 427/1826 [00:06<00:22, 62.95it/s]
|
| 1799 |
24%|██▍ | 434/1826 [00:06<00:21, 63.77it/s]
|
| 1800 |
24%|██▍ | 441/1826 [00:06<00:22, 61.74it/s]
|
| 1801 |
25%|██▍ | 450/1826 [00:06<00:20, 68.00it/s]
|
| 1802 |
25%|██▌ | 457/1826 [00:06<00:20, 67.45it/s]
|
| 1803 |
26%|██▌ | 466/1826 [00:06<00:19, 69.66it/s]
|
| 1804 |
26%|██▌ | 473/1826 [00:07<00:21, 64.33it/s]
|
| 1805 |
26%|██▋ | 480/1826 [00:07<00:21, 63.73it/s]
|
| 1806 |
27%|██▋ | 487/1826 [00:07<00:21, 62.35it/s]
|
| 1807 |
27%|██▋ | 494/1826 [00:07<00:21, 62.04it/s]
|
| 1808 |
27%|██▋ | 502/1826 [00:07<00:20, 64.86it/s]
|
| 1809 |
28%|██▊ | 510/1826 [00:07<00:19, 67.59it/s]
|
| 1810 |
28%|██▊ | 518/1826 [00:07<00:18, 70.73it/s]
|
| 1811 |
29%|██▉ | 527/1826 [00:07<00:17, 73.95it/s]
|
| 1812 |
29%|██▉ | 535/1826 [00:07<00:17, 71.81it/s]
|
| 1813 |
30%|██▉ | 543/1826 [00:08<00:17, 71.49it/s]
|
| 1814 |
30%|███ | 551/1826 [00:08<00:19, 65.90it/s]
|
| 1815 |
31%|███ | 558/1826 [00:08<00:19, 64.66it/s]
|
| 1816 |
31%|███ | 567/1826 [00:08<00:17, 70.27it/s]
|
| 1817 |
32%|███▏ | 576/1826 [00:08<00:17, 73.36it/s]
|
| 1818 |
32%|███▏ | 584/1826 [00:08<00:18, 68.09it/s]
|
| 1819 |
32%|███▏ | 592/1826 [00:08<00:17, 70.54it/s]
|
| 1820 |
33%|███▎ | 600/1826 [00:08<00:17, 69.88it/s]
|
| 1821 |
33%|███▎ | 608/1826 [00:09<00:18, 65.58it/s]
|
| 1822 |
34%|███▎ | 615/1826 [00:09<00:18, 65.45it/s]
|
| 1823 |
34%|███▍ | 622/1826 [00:09<00:18, 66.26it/s]
|
| 1824 |
35%|███▍ | 632/1826 [00:09<00:16, 73.73it/s]
|
| 1825 |
35%|███▌ | 641/1826 [00:09<00:15, 76.81it/s]
|
| 1826 |
36%|███▌ | 649/1826 [00:09<00:16, 71.30it/s]
|
| 1827 |
36%|███▌ | 657/1826 [00:09<00:16, 69.22it/s]
|
| 1828 |
37%|███▋ | 667/1826 [00:09<00:15, 75.05it/s]
|
| 1829 |
37%|███▋ | 675/1826 [00:09<00:16, 71.22it/s]
|
| 1830 |
37%|███▋ | 683/1826 [00:10<00:16, 69.84it/s]
|
| 1831 |
38%|███▊ | 692/1826 [00:10<00:15, 73.46it/s]
|
| 1832 |
38%|███▊ | 700/1826 [00:10<00:15, 73.83it/s]
|
| 1833 |
39%|███▉ | 708/1826 [00:10<00:14, 75.07it/s]
|
| 1834 |
39%|███▉ | 718/1826 [00:10<00:13, 79.86it/s]
|
| 1835 |
40%|███▉ | 727/1826 [00:10<00:13, 78.55it/s]
|
| 1836 |
40%|████ | 735/1826 [00:10<00:14, 76.87it/s]
|
| 1837 |
41%|████ | 743/1826 [00:10<00:14, 75.47it/s]
|
| 1838 |
41%|████ | 752/1826 [00:10<00:13, 79.11it/s]
|
| 1839 |
42%|████▏ | 760/1826 [00:11<00:13, 77.59it/s]
|
| 1840 |
42%|████▏ | 769/1826 [00:11<00:13, 78.55it/s]
|
| 1841 |
43%|████▎ | 777/1826 [00:11<00:13, 77.65it/s]
|
| 1842 |
43%|████▎ | 785/1826 [00:11<00:14, 71.54it/s]
|
| 1843 |
43%|████▎ | 793/1826 [00:11<00:14, 73.07it/s]
|
| 1844 |
44%|████▍ | 801/1826 [00:11<00:15, 68.26it/s]
|
| 1845 |
44%|████▍ | 809/1826 [00:11<00:14, 69.66it/s]
|
| 1846 |
45%|████▍ | 817/1826 [00:11<00:14, 71.79it/s]
|
| 1847 |
45%|████▌ | 826/1826 [00:11<00:13, 76.14it/s]
|
| 1848 |
46%|████▌ | 834/1826 [00:12<00:14, 68.30it/s]
|
| 1849 |
46%|████▌ | 842/1826 [00:12<00:14, 69.69it/s]
|
| 1850 |
47%|████▋ | 851/1826 [00:12<00:13, 72.87it/s]
|
| 1851 |
47%|████▋ | 860/1826 [00:12<00:12, 77.24it/s]
|
| 1852 |
48%|████▊ | 868/1826 [00:12<00:13, 72.75it/s]
|
| 1853 |
48%|████▊ | 876/1826 [00:12<00:13, 72.25it/s]
|
| 1854 |
48%|████▊ | 884/1826 [00:12<00:13, 71.43it/s]
|
| 1855 |
49%|████▉ | 892/1826 [00:12<00:13, 68.24it/s]
|
| 1856 |
49%|████▉ | 902/1826 [00:13<00:12, 74.85it/s]
|
| 1857 |
50%|████▉ | 911/1826 [00:13<00:11, 77.59it/s]
|
| 1858 |
50%|█████ | 920/1826 [00:13<00:11, 80.11it/s]
|
| 1859 |
51%|█████ | 929/1826 [00:13<00:11, 79.42it/s]
|
| 1860 |
51%|█████▏ | 937/1826 [00:13<00:12, 73.54it/s]
|
| 1861 |
52%|█████▏ | 945/1826 [00:13<00:12, 68.71it/s]
|
| 1862 |
52%|█████▏ | 952/1826 [00:13<00:12, 68.31it/s]
|
| 1863 |
53%|█████▎ | 960/1826 [00:13<00:12, 69.39it/s]
|
| 1864 |
53%|█████▎ | 968/1826 [00:13<00:11, 71.85it/s]
|
| 1865 |
53%|█████▎ | 976/1826 [00:14<00:12, 67.88it/s]
|
| 1866 |
54%|█████▍ | 985/1826 [00:14<00:11, 71.98it/s]
|
| 1867 |
54%|█████▍ | 993/1826 [00:14<00:11, 69.95it/s]
|
| 1868 |
55%|█████▍ | 1001/1826 [00:14<00:12, 67.60it/s]
|
| 1869 |
55%|█████▌ | 1008/1826 [00:14<00:12, 68.06it/s]
|
| 1870 |
56%|█████▌ | 1016/1826 [00:14<00:11, 70.58it/s]
|
| 1871 |
56%|█████▌ | 1025/1826 [00:14<00:10, 74.99it/s]
|
| 1872 |
57%|█████▋ | 1033/1826 [00:14<00:10, 73.75it/s]
|
| 1873 |
57%|█████▋ | 1041/1826 [00:14<00:10, 71.80it/s]
|
| 1874 |
57%|█████▋ | 1049/1826 [00:15<00:11, 69.86it/s]
|
| 1875 |
58%|█████▊ | 1057/1826 [00:15<00:11, 68.46it/s]
|
| 1876 |
58%|█████▊ | 1066/1826 [00:15<00:10, 73.61it/s]
|
| 1877 |
59%|█████▉ | 1074/1826 [00:15<00:10, 74.53it/s]
|
| 1878 |
59%|█████▉ | 1083/1826 [00:15<00:09, 78.56it/s]
|
| 1879 |
60%|█████▉ | 1092/1826 [00:15<00:09, 79.51it/s]
|
| 1880 |
60%|██████ | 1101/1826 [00:15<00:09, 79.20it/s]
|
| 1881 |
61%|██████ | 1109/1826 [00:15<00:09, 74.39it/s]
|
| 1882 |
61%|██████ | 1117/1826 [00:15<00:09, 71.61it/s]
|
| 1883 |
62%|██████▏ | 1126/1826 [00:16<00:09, 73.71it/s]
|
| 1884 |
62%|██████▏ | 1134/1826 [00:16<00:09, 75.14it/s]
|
| 1885 |
63%|██████▎ | 1142/1826 [00:16<00:09, 72.85it/s]
|
| 1886 |
63%|██████▎ | 1150/1826 [00:16<00:09, 71.67it/s]
|
| 1887 |
63%|██████▎ | 1158/1826 [00:16<00:09, 71.29it/s]
|
| 1888 |
64%|██████▍ | 1166/1826 [00:16<00:09, 67.84it/s]
|
| 1889 |
64%|██████▍ | 1174/1826 [00:16<00:09, 69.65it/s]
|
| 1890 |
65%|██████▍ | 1182/1826 [00:16<00:09, 67.30it/s]
|
| 1891 |
65%|██████▌ | 1190/1826 [00:17<00:09, 68.54it/s]
|
| 1892 |
66%|██████▌ | 1198/1826 [00:17<00:08, 70.13it/s]
|
| 1893 |
66%|██████▌ | 1206/1826 [00:17<00:08, 70.01it/s]
|
| 1894 |
66%|██████▋ | 1214/1826 [00:17<00:09, 64.04it/s]
|
| 1895 |
67%|██████▋ | 1221/1826 [00:17<00:09, 62.76it/s]
|
| 1896 |
67%|██████▋ | 1230/1826 [00:17<00:08, 68.49it/s]
|
| 1897 |
68%|██████▊ | 1238/1826 [00:17<00:08, 69.88it/s]
|
| 1898 |
68%|██████▊ | 1246/1826 [00:17<00:08, 70.54it/s]
|
| 1899 |
69%|██████▊ | 1254/1826 [00:17<00:08, 64.45it/s]
|
| 1900 |
69%|██████▉ | 1262/1826 [00:18<00:08, 66.57it/s]
|
| 1901 |
70%|██████▉ | 1270/1826 [00:18<00:08, 69.08it/s]
|
| 1902 |
70%|██████▉ | 1278/1826 [00:18<00:07, 71.87it/s]
|
| 1903 |
70%|███████ | 1286/1826 [00:18<00:07, 72.39it/s]
|
| 1904 |
71%|███████ | 1295/1826 [00:18<00:06, 76.32it/s]
|
| 1905 |
71%|███████▏ | 1304/1826 [00:18<00:06, 79.67it/s]
|
| 1906 |
72%|███████▏ | 1313/1826 [00:18<00:06, 79.61it/s]
|
| 1907 |
72%|███████▏ | 1322/1826 [00:18<00:06, 78.03it/s]
|
| 1908 |
73%|███████▎ | 1330/1826 [00:18<00:06, 75.63it/s]
|
| 1909 |
73%|███████▎ | 1338/1826 [00:19<00:06, 73.90it/s]
|
| 1910 |
74%|███████▎ | 1346/1826 [00:19<00:06, 69.31it/s]
|
| 1911 |
74%|███████▍ | 1354/1826 [00:19<00:06, 70.19it/s]
|
| 1912 |
75%|███████▍ | 1362/1826 [00:19<00:06, 71.84it/s]
|
| 1913 |
75%|███████▌ | 1370/1826 [00:19<00:06, 67.49it/s]
|
| 1914 |
76%|███████▌ | 1379/1826 [00:19<00:06, 72.42it/s]
|
| 1915 |
76%|███████▌ | 1387/1826 [00:19<00:05, 74.47it/s]
|
| 1916 |
76%|███████▋ | 1395/1826 [00:19<00:05, 75.32it/s]
|
| 1917 |
77%|███████▋ | 1404/1826 [00:19<00:05, 79.20it/s]
|
| 1918 |
77%|███████▋ | 1413/1826 [00:20<00:05, 80.03it/s]
|
| 1919 |
78%|███████▊ | 1422/1826 [00:20<00:05, 76.15it/s]
|
| 1920 |
78%|███████▊ | 1430/1826 [00:20<00:05, 72.26it/s]
|
| 1921 |
79%|███████▉ | 1438/1826 [00:20<00:05, 68.01it/s]
|
| 1922 |
79%|███████▉ | 1446/1826 [00:20<00:05, 69.61it/s]
|
| 1923 |
80%|███████▉ | 1454/1826 [00:20<00:05, 71.49it/s]
|
| 1924 |
80%|████████ | 1462/1826 [00:20<00:05, 72.56it/s]
|
| 1925 |
81%|████████ | 1470/1826 [00:20<00:05, 66.00it/s]
|
| 1926 |
81%|████████ | 1477/1826 [00:21<00:05, 65.05it/s]
|
| 1927 |
81%|████████▏ | 1484/1826 [00:21<00:05, 65.40it/s]
|
| 1928 |
82%|████████▏ | 1491/1826 [00:21<00:05, 60.28it/s]
|
| 1929 |
82%|████████▏ | 1498/1826 [00:21<00:05, 61.97it/s]
|
| 1930 |
82%|████████▏ | 1505/1826 [00:21<00:05, 60.61it/s]
|
| 1931 |
83%|████████▎ | 1513/1826 [00:21<00:04, 64.51it/s]
|
| 1932 |
83%|████████▎ | 1520/1826 [00:21<00:04, 65.43it/s]
|
| 1933 |
84%|████████▎ | 1527/1826 [00:21<00:04, 62.45it/s]
|
| 1934 |
84%|████████▍ | 1534/1826 [00:21<00:04, 64.07it/s]
|
| 1935 |
84%|████████▍ | 1541/1826 [00:22<00:04, 63.72it/s]
|
| 1936 |
85%|████████▍ | 1548/1826 [00:22<00:04, 64.27it/s]
|
| 1937 |
85%|████████▌ | 1555/1826 [00:22<00:04, 65.72it/s]
|
| 1938 |
86%|████████▌ | 1562/1826 [00:22<00:03, 66.77it/s]
|
| 1939 |
86%|████████▌ | 1569/1826 [00:22<00:04, 63.11it/s]
|
| 1940 |
86%|████████▋ | 1576/1826 [00:22<00:03, 62.95it/s]
|
| 1941 |
87%|████████▋ | 1584/1826 [00:22<00:03, 65.44it/s]
|
| 1942 |
87%|████████▋ | 1591/1826 [00:22<00:03, 63.71it/s]
|
| 1943 |
88%|████████▊ | 1598/1826 [00:22<00:03, 63.28it/s]
|
| 1944 |
88%|████████▊ | 1607/1826 [00:23<00:03, 69.37it/s]
|
| 1945 |
88%|████████▊ | 1614/1826 [00:23<00:03, 68.35it/s]
|
| 1946 |
89%|████████▉ | 1621/1826 [00:23<00:03, 61.61it/s]
|
| 1947 |
89%|████████▉ | 1628/1826 [00:23<00:03, 56.69it/s]
|
| 1948 |
90%|████████▉ | 1636/1826 [00:23<00:03, 60.23it/s]
|
| 1949 |
90%|████████▉ | 1643/1826 [00:23<00:03, 60.95it/s]
|
| 1950 |
90%|█████████ | 1651/1826 [00:23<00:02, 65.46it/s]
|
| 1951 |
91%|█████████ | 1658/1826 [00:23<00:02, 64.85it/s]
|
| 1952 |
91%|█████████ | 1665/1826 [00:24<00:02, 65.01it/s]
|
| 1953 |
92%|█████████▏| 1673/1826 [00:24<00:02, 68.49it/s]
|
| 1954 |
92%|█████████▏| 1681/1826 [00:24<00:02, 70.31it/s]
|
| 1955 |
93%|█████████▎| 1690/1826 [00:24<00:01, 73.05it/s]
|
| 1956 |
93%|█████████▎| 1698/1826 [00:24<00:02, 63.59it/s]
|
| 1957 |
93%|█████████▎| 1706/1826 [00:24<00:01, 65.52it/s]
|
| 1958 |
94%|█████████▍| 1714/1826 [00:24<00:01, 68.18it/s]
|
| 1959 |
94%|█████████▍| 1721/1826 [00:24<00:01, 61.69it/s]
|
| 1960 |
95%|█████████▍| 1729/1826 [00:24<00:01, 65.81it/s]
|
| 1961 |
95%|█████████▌| 1737/1826 [00:25<00:01, 67.43it/s]
|
| 1962 |
96%|█████████▌| 1746/1826 [00:25<00:01, 71.34it/s]
|
| 1963 |
96%|█████████▌| 1754/1826 [00:25<00:01, 69.71it/s]
|
| 1964 |
96%|█████████▋| 1762/1826 [00:25<00:00, 68.18it/s]
|
| 1965 |
97%|█████████▋| 1769/1826 [00:25<00:00, 66.79it/s]
|
| 1966 |
97%|█████████▋| 1776/1826 [00:25<00:00, 65.37it/s]
|
| 1967 |
98%|█████████▊| 1784/1826 [00:25<00:00, 69.28it/s]
|
| 1968 |
98%|█████████▊| 1791/1826 [00:25<00:00, 64.98it/s]
|
| 1969 |
98%|█████████▊| 1798/1826 [00:26<00:00, 60.14it/s]
|
| 1970 |
99%|█████████▉| 1805/1826 [00:26<00:00, 62.25it/s]
|
| 1971 |
99%|█████████▉| 1813/1826 [00:26<00:00, 66.07it/s]
|
| 1972 |
+
[INFO|trainer.py:3503] 2024-09-09 16:19:03,621 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
|
| 1973 |
+
[INFO|configuration_utils.py:472] 2024-09-09 16:19:03,622 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
|
| 1974 |
+
[INFO|modeling_utils.py:2799] 2024-09-09 16:19:04,830 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
|
| 1975 |
+
[INFO|tokenization_utils_base.py:2684] 2024-09-09 16:19:04,831 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
|
| 1976 |
+
[INFO|tokenization_utils_base.py:2693] 2024-09-09 16:19:04,832 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
|
| 1977 |
+
***** predict metrics *****
|
| 1978 |
+
predict_accuracy = 0.9982
|
| 1979 |
+
predict_f1 = 0.8965
|
| 1980 |
+
predict_loss = 0.0127
|
| 1981 |
+
predict_precision = 0.8773
|
| 1982 |
+
predict_recall = 0.9166
|
| 1983 |
+
predict_runtime = 0:00:33.03
|
| 1984 |
+
predict_samples_per_second = 442.053
|
| 1985 |
+
predict_steps_per_second = 55.268
|
| 1986 |
+
|
train_results.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch": 9.
|
| 3 |
-
"total_flos": 1.
|
| 4 |
-
"train_loss": 0.
|
| 5 |
-
"train_runtime":
|
| 6 |
-
"train_samples":
|
| 7 |
-
"train_samples_per_second":
|
| 8 |
-
"train_steps_per_second":
|
| 9 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 9.989701338825952,
|
| 3 |
+
"total_flos": 1.7736053837017554e+16,
|
| 4 |
+
"train_loss": 0.002068036902580679,
|
| 5 |
+
"train_runtime": 2701.3395,
|
| 6 |
+
"train_samples": 31053,
|
| 7 |
+
"train_samples_per_second": 114.954,
|
| 8 |
+
"train_steps_per_second": 1.795
|
| 9 |
}
|
trainer_state.json
CHANGED
|
@@ -1,201 +1,208 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-
|
| 4 |
-
"epoch": 9.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
-
"epoch": 0.
|
| 13 |
-
"eval_accuracy": 0.
|
| 14 |
-
"eval_f1": 0.
|
| 15 |
-
"eval_loss": 0.
|
| 16 |
-
"eval_precision": 0.
|
| 17 |
-
"eval_recall": 0.
|
| 18 |
-
"eval_runtime":
|
| 19 |
-
"eval_samples_per_second":
|
| 20 |
-
"eval_steps_per_second":
|
| 21 |
-
"step":
|
| 22 |
-
},
|
| 23 |
-
{
|
| 24 |
-
"epoch": 1.
|
| 25 |
-
"grad_norm": 0.
|
| 26 |
-
"learning_rate": 4.
|
| 27 |
-
"loss": 0.
|
| 28 |
"step": 500
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"epoch": 2.0,
|
| 32 |
-
"eval_accuracy": 0.
|
| 33 |
-
"eval_f1": 0.
|
| 34 |
-
"eval_loss": 0.
|
| 35 |
-
"eval_precision": 0.
|
| 36 |
-
"eval_recall": 0.
|
| 37 |
-
"eval_runtime":
|
| 38 |
-
"eval_samples_per_second":
|
| 39 |
-
"eval_steps_per_second":
|
| 40 |
-
"step":
|
| 41 |
-
},
|
| 42 |
-
{
|
| 43 |
-
"epoch": 2.
|
| 44 |
-
"grad_norm": 0.
|
| 45 |
-
"learning_rate": 3.
|
| 46 |
-
"loss": 0.
|
| 47 |
"step": 1000
|
| 48 |
},
|
| 49 |
{
|
| 50 |
-
"epoch": 2.
|
| 51 |
-
"eval_accuracy": 0.
|
| 52 |
-
"eval_f1": 0.
|
| 53 |
-
"eval_loss": 0.
|
| 54 |
-
"eval_precision": 0.
|
| 55 |
-
"eval_recall": 0.
|
| 56 |
-
"eval_runtime":
|
| 57 |
-
"eval_samples_per_second":
|
| 58 |
-
"eval_steps_per_second":
|
| 59 |
-
"step":
|
| 60 |
},
|
| 61 |
{
|
| 62 |
-
"epoch": 3.
|
| 63 |
-
"grad_norm": 0.
|
| 64 |
-
"learning_rate": 3.
|
| 65 |
-
"loss": 0.
|
| 66 |
"step": 1500
|
| 67 |
},
|
| 68 |
{
|
| 69 |
"epoch": 4.0,
|
| 70 |
-
"eval_accuracy": 0.
|
| 71 |
-
"eval_f1": 0.
|
| 72 |
-
"eval_loss": 0.
|
| 73 |
-
"eval_precision": 0.
|
| 74 |
-
"eval_recall": 0.
|
| 75 |
-
"eval_runtime":
|
| 76 |
-
"eval_samples_per_second":
|
| 77 |
-
"eval_steps_per_second":
|
| 78 |
-
"step":
|
| 79 |
-
},
|
| 80 |
-
{
|
| 81 |
-
"epoch": 4.
|
| 82 |
-
"grad_norm": 0.
|
| 83 |
-
"learning_rate": 2.
|
| 84 |
-
"loss": 0.
|
| 85 |
"step": 2000
|
| 86 |
},
|
| 87 |
{
|
| 88 |
-
"epoch": 4.
|
| 89 |
-
"eval_accuracy": 0.
|
| 90 |
-
"eval_f1": 0.
|
| 91 |
-
"eval_loss": 0.
|
| 92 |
-
"eval_precision": 0.
|
| 93 |
-
"eval_recall": 0.
|
| 94 |
-
"eval_runtime":
|
| 95 |
-
"eval_samples_per_second":
|
| 96 |
-
"eval_steps_per_second":
|
| 97 |
-
"step":
|
| 98 |
},
|
| 99 |
{
|
| 100 |
-
"epoch": 5.
|
| 101 |
-
"grad_norm": 0.
|
| 102 |
-
"learning_rate": 2.
|
| 103 |
-
"loss": 0.
|
| 104 |
"step": 2500
|
| 105 |
},
|
| 106 |
{
|
| 107 |
"epoch": 6.0,
|
| 108 |
-
"eval_accuracy": 0.
|
| 109 |
-
"eval_f1": 0.
|
| 110 |
-
"eval_loss": 0.
|
| 111 |
-
"eval_precision": 0.
|
| 112 |
-
"eval_recall": 0.
|
| 113 |
-
"eval_runtime":
|
| 114 |
-
"eval_samples_per_second":
|
| 115 |
-
"eval_steps_per_second":
|
| 116 |
-
"step":
|
| 117 |
-
},
|
| 118 |
-
{
|
| 119 |
-
"epoch": 6.
|
| 120 |
-
"grad_norm": 0.
|
| 121 |
-
"learning_rate": 1.
|
| 122 |
-
"loss": 0.
|
| 123 |
"step": 3000
|
| 124 |
},
|
| 125 |
{
|
| 126 |
-
"epoch": 6.
|
| 127 |
-
"eval_accuracy": 0.
|
| 128 |
-
"eval_f1": 0.
|
| 129 |
-
"eval_loss": 0.
|
| 130 |
-
"eval_precision": 0.
|
| 131 |
-
"eval_recall": 0.
|
| 132 |
-
"eval_runtime":
|
| 133 |
-
"eval_samples_per_second":
|
| 134 |
-
"eval_steps_per_second":
|
| 135 |
-
"step":
|
| 136 |
},
|
| 137 |
{
|
| 138 |
-
"epoch":
|
| 139 |
-
"
|
| 140 |
-
"
|
| 141 |
-
"
|
| 142 |
-
"eval_precision": 0.9266480965645311,
|
| 143 |
-
"eval_recall": 0.9301025163094129,
|
| 144 |
-
"eval_runtime": 15.0386,
|
| 145 |
-
"eval_samples_per_second": 461.877,
|
| 146 |
-
"eval_steps_per_second": 57.785,
|
| 147 |
-
"step": 3484
|
| 148 |
-
},
|
| 149 |
-
{
|
| 150 |
-
"epoch": 8.036739380022961,
|
| 151 |
-
"grad_norm": 0.0006512438994832337,
|
| 152 |
-
"learning_rate": 9.770114942528738e-06,
|
| 153 |
-
"loss": 0.0006,
|
| 154 |
"step": 3500
|
| 155 |
},
|
| 156 |
{
|
| 157 |
-
"epoch": 8.
|
| 158 |
-
"eval_accuracy": 0.
|
| 159 |
-
"eval_f1": 0.
|
| 160 |
-
"eval_loss": 0.
|
| 161 |
-
"eval_precision": 0.
|
| 162 |
-
"eval_recall": 0.
|
| 163 |
-
"eval_runtime":
|
| 164 |
-
"eval_samples_per_second":
|
| 165 |
-
"eval_steps_per_second":
|
| 166 |
-
"step":
|
| 167 |
-
},
|
| 168 |
-
{
|
| 169 |
-
"epoch":
|
| 170 |
-
"grad_norm": 0.
|
| 171 |
-
"learning_rate":
|
| 172 |
-
"loss": 0.
|
| 173 |
"step": 4000
|
| 174 |
},
|
| 175 |
{
|
| 176 |
-
"epoch":
|
| 177 |
-
"eval_accuracy": 0.
|
| 178 |
-
"eval_f1": 0.
|
| 179 |
-
"eval_loss": 0.
|
| 180 |
-
"eval_precision": 0.
|
| 181 |
-
"eval_recall": 0.
|
| 182 |
-
"eval_runtime":
|
| 183 |
-
"eval_samples_per_second":
|
| 184 |
-
"eval_steps_per_second":
|
| 185 |
-
"step":
|
| 186 |
-
},
|
| 187 |
-
{
|
| 188 |
-
"epoch": 9.
|
| 189 |
-
"
|
| 190 |
-
"
|
| 191 |
-
"
|
| 192 |
-
"
|
| 193 |
-
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
}
|
| 196 |
],
|
| 197 |
"logging_steps": 500,
|
| 198 |
-
"max_steps":
|
| 199 |
"num_input_tokens_seen": 0,
|
| 200 |
"num_train_epochs": 10,
|
| 201 |
"save_steps": 500,
|
|
@@ -211,7 +218,7 @@
|
|
| 211 |
"attributes": {}
|
| 212 |
}
|
| 213 |
},
|
| 214 |
-
"total_flos": 1.
|
| 215 |
"train_batch_size": 32,
|
| 216 |
"trial_name": null,
|
| 217 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.9230769230769231,
|
| 3 |
+
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-4850",
|
| 4 |
+
"epoch": 9.989701338825952,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 4850,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
+
"epoch": 0.9989701338825953,
|
| 13 |
+
"eval_accuracy": 0.9984030122046221,
|
| 14 |
+
"eval_f1": 0.9132331555986428,
|
| 15 |
+
"eval_loss": 0.004271956626325846,
|
| 16 |
+
"eval_precision": 0.9145631067961165,
|
| 17 |
+
"eval_recall": 0.9119070667957405,
|
| 18 |
+
"eval_runtime": 17.5218,
|
| 19 |
+
"eval_samples_per_second": 387.973,
|
| 20 |
+
"eval_steps_per_second": 48.511,
|
| 21 |
+
"step": 485
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"epoch": 1.0298661174047374,
|
| 25 |
+
"grad_norm": 0.35554030537605286,
|
| 26 |
+
"learning_rate": 4.484536082474227e-05,
|
| 27 |
+
"loss": 0.0127,
|
| 28 |
"step": 500
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"epoch": 2.0,
|
| 32 |
+
"eval_accuracy": 0.9984679304076863,
|
| 33 |
+
"eval_f1": 0.9130645944633317,
|
| 34 |
+
"eval_loss": 0.005422212183475494,
|
| 35 |
+
"eval_precision": 0.9161793372319688,
|
| 36 |
+
"eval_recall": 0.9099709583736689,
|
| 37 |
+
"eval_runtime": 17.524,
|
| 38 |
+
"eval_samples_per_second": 387.925,
|
| 39 |
+
"eval_steps_per_second": 48.505,
|
| 40 |
+
"step": 971
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"epoch": 2.059732234809475,
|
| 44 |
+
"grad_norm": 0.2354772835969925,
|
| 45 |
+
"learning_rate": 3.9690721649484535e-05,
|
| 46 |
+
"loss": 0.0033,
|
| 47 |
"step": 1000
|
| 48 |
},
|
| 49 |
{
|
| 50 |
+
"epoch": 2.998970133882595,
|
| 51 |
+
"eval_accuracy": 0.9985912749935082,
|
| 52 |
+
"eval_f1": 0.9173955296404277,
|
| 53 |
+
"eval_loss": 0.0059292372316122055,
|
| 54 |
+
"eval_precision": 0.9209756097560976,
|
| 55 |
+
"eval_recall": 0.9138431752178122,
|
| 56 |
+
"eval_runtime": 17.4807,
|
| 57 |
+
"eval_samples_per_second": 388.886,
|
| 58 |
+
"eval_steps_per_second": 48.625,
|
| 59 |
+
"step": 1456
|
| 60 |
},
|
| 61 |
{
|
| 62 |
+
"epoch": 3.089598352214212,
|
| 63 |
+
"grad_norm": 0.026833873242139816,
|
| 64 |
+
"learning_rate": 3.4536082474226805e-05,
|
| 65 |
+
"loss": 0.0016,
|
| 66 |
"step": 1500
|
| 67 |
},
|
| 68 |
{
|
| 69 |
"epoch": 4.0,
|
| 70 |
+
"eval_accuracy": 0.9985523240716697,
|
| 71 |
+
"eval_f1": 0.9135212613473483,
|
| 72 |
+
"eval_loss": 0.006239830516278744,
|
| 73 |
+
"eval_precision": 0.9018867924528302,
|
| 74 |
+
"eval_recall": 0.925459825750242,
|
| 75 |
+
"eval_runtime": 17.8043,
|
| 76 |
+
"eval_samples_per_second": 381.818,
|
| 77 |
+
"eval_steps_per_second": 47.741,
|
| 78 |
+
"step": 1942
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"epoch": 4.11946446961895,
|
| 82 |
+
"grad_norm": 0.1513855904340744,
|
| 83 |
+
"learning_rate": 2.9381443298969075e-05,
|
| 84 |
+
"loss": 0.001,
|
| 85 |
"step": 2000
|
| 86 |
},
|
| 87 |
{
|
| 88 |
+
"epoch": 4.998970133882596,
|
| 89 |
+
"eval_accuracy": 0.9986042586341211,
|
| 90 |
+
"eval_f1": 0.920328343795268,
|
| 91 |
+
"eval_loss": 0.006359434220939875,
|
| 92 |
+
"eval_precision": 0.918111753371869,
|
| 93 |
+
"eval_recall": 0.9225556631171346,
|
| 94 |
+
"eval_runtime": 17.4628,
|
| 95 |
+
"eval_samples_per_second": 389.284,
|
| 96 |
+
"eval_steps_per_second": 48.675,
|
| 97 |
+
"step": 2427
|
| 98 |
},
|
| 99 |
{
|
| 100 |
+
"epoch": 5.1493305870236865,
|
| 101 |
+
"grad_norm": 0.007741741370409727,
|
| 102 |
+
"learning_rate": 2.422680412371134e-05,
|
| 103 |
+
"loss": 0.0006,
|
| 104 |
"step": 2500
|
| 105 |
},
|
| 106 |
{
|
| 107 |
"epoch": 6.0,
|
| 108 |
+
"eval_accuracy": 0.9985523240716697,
|
| 109 |
+
"eval_f1": 0.9180009704027171,
|
| 110 |
+
"eval_loss": 0.00812861043959856,
|
| 111 |
+
"eval_precision": 0.9202334630350194,
|
| 112 |
+
"eval_recall": 0.9157792836398838,
|
| 113 |
+
"eval_runtime": 17.5273,
|
| 114 |
+
"eval_samples_per_second": 387.852,
|
| 115 |
+
"eval_steps_per_second": 48.496,
|
| 116 |
+
"step": 2913
|
| 117 |
+
},
|
| 118 |
+
{
|
| 119 |
+
"epoch": 6.179196704428424,
|
| 120 |
+
"grad_norm": 0.06708419322967529,
|
| 121 |
+
"learning_rate": 1.9072164948453608e-05,
|
| 122 |
+
"loss": 0.0004,
|
| 123 |
"step": 3000
|
| 124 |
},
|
| 125 |
{
|
| 126 |
+
"epoch": 6.998970133882596,
|
| 127 |
+
"eval_accuracy": 0.9985653077122826,
|
| 128 |
+
"eval_f1": 0.920328343795268,
|
| 129 |
+
"eval_loss": 0.008457792922854424,
|
| 130 |
+
"eval_precision": 0.918111753371869,
|
| 131 |
+
"eval_recall": 0.9225556631171346,
|
| 132 |
+
"eval_runtime": 17.4726,
|
| 133 |
+
"eval_samples_per_second": 389.067,
|
| 134 |
+
"eval_steps_per_second": 48.648,
|
| 135 |
+
"step": 3398
|
| 136 |
},
|
| 137 |
{
|
| 138 |
+
"epoch": 7.209062821833162,
|
| 139 |
+
"grad_norm": 0.0032746351789683104,
|
| 140 |
+
"learning_rate": 1.3917525773195878e-05,
|
| 141 |
+
"loss": 0.0003,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
"step": 3500
|
| 143 |
},
|
| 144 |
{
|
| 145 |
+
"epoch": 8.0,
|
| 146 |
+
"eval_accuracy": 0.9985847831732018,
|
| 147 |
+
"eval_f1": 0.9210653753026635,
|
| 148 |
+
"eval_loss": 0.00870645884424448,
|
| 149 |
+
"eval_precision": 0.9215116279069767,
|
| 150 |
+
"eval_recall": 0.920619554695063,
|
| 151 |
+
"eval_runtime": 17.5192,
|
| 152 |
+
"eval_samples_per_second": 388.031,
|
| 153 |
+
"eval_steps_per_second": 48.518,
|
| 154 |
+
"step": 3884
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"epoch": 8.2389289392379,
|
| 158 |
+
"grad_norm": 0.0013693332439288497,
|
| 159 |
+
"learning_rate": 8.762886597938144e-06,
|
| 160 |
+
"loss": 0.0002,
|
| 161 |
"step": 4000
|
| 162 |
},
|
| 163 |
{
|
| 164 |
+
"epoch": 8.998970133882596,
|
| 165 |
+
"eval_accuracy": 0.9985198649701377,
|
| 166 |
+
"eval_f1": 0.9206500956022945,
|
| 167 |
+
"eval_loss": 0.008967469446361065,
|
| 168 |
+
"eval_precision": 0.9093484419263456,
|
| 169 |
+
"eval_recall": 0.9322362052274927,
|
| 170 |
+
"eval_runtime": 17.7573,
|
| 171 |
+
"eval_samples_per_second": 382.828,
|
| 172 |
+
"eval_steps_per_second": 47.868,
|
| 173 |
+
"step": 4369
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"epoch": 9.268795056642636,
|
| 177 |
+
"grad_norm": 0.0005869244341738522,
|
| 178 |
+
"learning_rate": 3.608247422680412e-06,
|
| 179 |
+
"loss": 0.0001,
|
| 180 |
+
"step": 4500
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"epoch": 9.989701338825952,
|
| 184 |
+
"eval_accuracy": 0.9986302259153467,
|
| 185 |
+
"eval_f1": 0.9230769230769231,
|
| 186 |
+
"eval_loss": 0.009366312995553017,
|
| 187 |
+
"eval_precision": 0.9169054441260746,
|
| 188 |
+
"eval_recall": 0.9293320425943853,
|
| 189 |
+
"eval_runtime": 17.4762,
|
| 190 |
+
"eval_samples_per_second": 388.987,
|
| 191 |
+
"eval_steps_per_second": 48.638,
|
| 192 |
+
"step": 4850
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"epoch": 9.989701338825952,
|
| 196 |
+
"step": 4850,
|
| 197 |
+
"total_flos": 1.7736053837017554e+16,
|
| 198 |
+
"train_loss": 0.002068036902580679,
|
| 199 |
+
"train_runtime": 2701.3395,
|
| 200 |
+
"train_samples_per_second": 114.954,
|
| 201 |
+
"train_steps_per_second": 1.795
|
| 202 |
}
|
| 203 |
],
|
| 204 |
"logging_steps": 500,
|
| 205 |
+
"max_steps": 4850,
|
| 206 |
"num_input_tokens_seen": 0,
|
| 207 |
"num_train_epochs": 10,
|
| 208 |
"save_steps": 500,
|
|
|
|
| 218 |
"attributes": {}
|
| 219 |
}
|
| 220 |
},
|
| 221 |
+
"total_flos": 1.7736053837017554e+16,
|
| 222 |
"train_batch_size": 32,
|
| 223 |
"trial_name": null,
|
| 224 |
"trial_params": null
|