Rodrigo1771 commited on
Commit
891a14a
·
verified ·
1 Parent(s): f293e66

End of training

Browse files
README.md CHANGED
@@ -2,9 +2,10 @@
2
  library_name: transformers
3
  base_model: IVN-RIN/bioBIT
4
  tags:
 
5
  - generated_from_trainer
6
  datasets:
7
- - drugtemist-it-fasttext-75-ner
8
  metrics:
9
  - precision
10
  - recall
@@ -17,8 +18,8 @@ model-index:
17
  name: Token Classification
18
  type: token-classification
19
  dataset:
20
- name: drugtemist-it-fasttext-75-ner
21
- type: drugtemist-it-fasttext-75-ner
22
  config: DrugTEMIST Italian NER
23
  split: validation
24
  args: DrugTEMIST Italian NER
@@ -42,7 +43,7 @@ should probably proofread and complete it, then remove this comment. -->
42
 
43
  # output
44
 
45
- This model is a fine-tuned version of [IVN-RIN/bioBIT](https://huggingface.co/IVN-RIN/bioBIT) on the drugtemist-it-fasttext-75-ner dataset.
46
  It achieves the following results on the evaluation set:
47
  - Loss: 0.0094
48
  - Precision: 0.9169
 
2
  library_name: transformers
3
  base_model: IVN-RIN/bioBIT
4
  tags:
5
+ - token-classification
6
  - generated_from_trainer
7
  datasets:
8
+ - Rodrigo1771/drugtemist-it-fasttext-75-ner
9
  metrics:
10
  - precision
11
  - recall
 
18
  name: Token Classification
19
  type: token-classification
20
  dataset:
21
+ name: Rodrigo1771/drugtemist-it-fasttext-75-ner
22
+ type: Rodrigo1771/drugtemist-it-fasttext-75-ner
23
  config: DrugTEMIST Italian NER
24
  split: validation
25
  args: DrugTEMIST Italian NER
 
43
 
44
  # output
45
 
46
+ This model is a fine-tuned version of [IVN-RIN/bioBIT](https://huggingface.co/IVN-RIN/bioBIT) on the Rodrigo1771/drugtemist-it-fasttext-75-ner dataset.
47
  It achieves the following results on the evaluation set:
48
  - Loss: 0.0094
49
  - Precision: 0.9169
all_results.json CHANGED
@@ -1,26 +1,26 @@
1
  {
2
- "epoch": 9.988518943742825,
3
- "eval_accuracy": 0.9986674294644602,
4
- "eval_f1": 0.9284386617100372,
5
- "eval_loss": 0.0068497806787490845,
6
- "eval_precision": 0.9258572752548656,
7
- "eval_recall": 0.9310344827586207,
8
- "eval_runtime": 15.1137,
9
- "eval_samples": 6946,
10
- "eval_samples_per_second": 459.582,
11
- "eval_steps_per_second": 57.497,
12
- "predict_accuracy": 0.9987433820748771,
13
- "predict_f1": 0.9237312163311596,
14
- "predict_loss": 0.00740663381293416,
15
- "predict_precision": 0.9024930747922437,
16
- "predict_recall": 0.945993031358885,
17
- "predict_runtime": 28.4889,
18
- "predict_samples_per_second": 516.517,
19
- "predict_steps_per_second": 64.587,
20
- "total_flos": 1.1084127968547612e+16,
21
- "train_loss": 0.0030981004957495064,
22
- "train_runtime": 1801.8371,
23
- "train_samples": 27841,
24
- "train_samples_per_second": 154.515,
25
- "train_steps_per_second": 2.414
26
  }
 
1
  {
2
+ "epoch": 9.989701338825952,
3
+ "eval_accuracy": 0.9986302259153467,
4
+ "eval_f1": 0.9230769230769231,
5
+ "eval_loss": 0.009366312995553017,
6
+ "eval_precision": 0.9169054441260746,
7
+ "eval_recall": 0.9293320425943853,
8
+ "eval_runtime": 17.51,
9
+ "eval_samples": 6798,
10
+ "eval_samples_per_second": 388.236,
11
+ "eval_steps_per_second": 48.544,
12
+ "predict_accuracy": 0.998192886032094,
13
+ "predict_f1": 0.8964950711938664,
14
+ "predict_loss": 0.012696487829089165,
15
+ "predict_precision": 0.8772775991425509,
16
+ "predict_recall": 0.9165733482642777,
17
+ "predict_runtime": 33.039,
18
+ "predict_samples_per_second": 442.053,
19
+ "predict_steps_per_second": 55.268,
20
+ "total_flos": 1.7736053837017554e+16,
21
+ "train_loss": 0.002068036902580679,
22
+ "train_runtime": 2701.3395,
23
+ "train_samples": 31053,
24
+ "train_samples_per_second": 114.954,
25
+ "train_steps_per_second": 1.795
26
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 9.988518943742825,
3
- "eval_accuracy": 0.9986674294644602,
4
- "eval_f1": 0.9284386617100372,
5
- "eval_loss": 0.0068497806787490845,
6
- "eval_precision": 0.9258572752548656,
7
- "eval_recall": 0.9310344827586207,
8
- "eval_runtime": 15.1137,
9
- "eval_samples": 6946,
10
- "eval_samples_per_second": 459.582,
11
- "eval_steps_per_second": 57.497
12
  }
 
1
  {
2
+ "epoch": 9.989701338825952,
3
+ "eval_accuracy": 0.9986302259153467,
4
+ "eval_f1": 0.9230769230769231,
5
+ "eval_loss": 0.009366312995553017,
6
+ "eval_precision": 0.9169054441260746,
7
+ "eval_recall": 0.9293320425943853,
8
+ "eval_runtime": 17.51,
9
+ "eval_samples": 6798,
10
+ "eval_samples_per_second": 388.236,
11
+ "eval_steps_per_second": 48.544
12
  }
predict_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "predict_accuracy": 0.9987433820748771,
3
- "predict_f1": 0.9237312163311596,
4
- "predict_loss": 0.00740663381293416,
5
- "predict_precision": 0.9024930747922437,
6
- "predict_recall": 0.945993031358885,
7
- "predict_runtime": 28.4889,
8
- "predict_samples_per_second": 516.517,
9
- "predict_steps_per_second": 64.587
10
  }
 
1
  {
2
+ "predict_accuracy": 0.998192886032094,
3
+ "predict_f1": 0.8964950711938664,
4
+ "predict_loss": 0.012696487829089165,
5
+ "predict_precision": 0.8772775991425509,
6
+ "predict_recall": 0.9165733482642777,
7
+ "predict_runtime": 33.039,
8
+ "predict_samples_per_second": 442.053,
9
+ "predict_steps_per_second": 55.268
10
  }
predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
tb/events.out.tfevents.1725898709.0a1c9bec2a53.65267.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64471db1189ffc9cdb3c758e8bad5d7fe51fb1f89b23f34814e661acc0324845
3
+ size 560
train.log CHANGED
@@ -1588,3 +1588,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
1588
  {'eval_loss': 0.009366312995553017, 'eval_precision': 0.9169054441260746, 'eval_recall': 0.9293320425943853, 'eval_f1': 0.9230769230769231, 'eval_accuracy': 0.9986302259153467, 'eval_runtime': 17.4762, 'eval_samples_per_second': 388.987, 'eval_steps_per_second': 48.638, 'epoch': 9.99}
1589
  {'train_runtime': 2701.3395, 'train_samples_per_second': 114.954, 'train_steps_per_second': 1.795, 'train_loss': 0.002068036902580679, 'epoch': 9.99}
1590
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1591
  0%| | 0/850 [00:00<?, ?it/s]
1592
  1%| | 9/850 [00:00<00:09, 84.85it/s]
1593
  2%|▏ | 18/850 [00:00<00:12, 69.16it/s]
1594
  3%|▎ | 26/850 [00:00<00:12, 67.03it/s]
1595
  4%|▍ | 34/850 [00:00<00:11, 69.34it/s]
1596
  5%|▍ | 42/850 [00:00<00:12, 66.38it/s]
1597
  6%|▌ | 49/850 [00:00<00:14, 55.56it/s]
1598
  7%|▋ | 57/850 [00:00<00:13, 60.68it/s]
1599
  8%|▊ | 64/850 [00:01<00:13, 59.50it/s]
1600
  8%|▊ | 71/850 [00:01<00:14, 53.38it/s]
1601
  9%|▉ | 77/850 [00:01<00:14, 54.52it/s]
1602
  10%|▉ | 83/850 [00:01<00:14, 54.60it/s]
1603
  11%|█ | 91/850 [00:01<00:12, 59.33it/s]
1604
  12%|█▏ | 98/850 [00:01<00:12, 60.45it/s]
1605
  12%|█▏ | 106/850 [00:01<00:11, 64.35it/s]
1606
  13%|█▎ | 113/850 [00:01<00:11, 63.96it/s]
1607
  14%|█▍ | 120/850 [00:01<00:11, 64.78it/s]
1608
  15%|█▍ | 127/850 [00:02<00:11, 65.12it/s]
1609
  16%|█▌ | 134/850 [00:02<00:11, 61.87it/s]
1610
  17%|█▋ | 142/850 [00:02<00:11, 64.23it/s]
1611
  18%|█▊ | 149/850 [00:02<00:11, 60.11it/s]
1612
  18%|█▊ | 156/850 [00:02<00:11, 60.66it/s]
1613
  19%|█▉ | 163/850 [00:02<00:10, 62.78it/s]
1614
  20%|██ | 170/850 [00:02<00:10, 63.49it/s]
1615
  21%|██ | 177/850 [00:02<00:10, 65.22it/s]
1616
  22%|██▏ | 184/850 [00:02<00:10, 62.35it/s]
1617
  22%|██▏ | 191/850 [00:03<00:10, 60.53it/s]
1618
  23%|██▎ | 198/850 [00:03<00:10, 62.55it/s]
1619
  24%|██▍ | 205/850 [00:03<00:10, 61.27it/s]
1620
  25%|██▍ | 212/850 [00:03<00:10, 61.68it/s]
1621
  26%|██▌ | 219/850 [00:03<00:10, 60.02it/s]
1622
  27%|██▋ | 227/850 [00:03<00:09, 63.15it/s]
1623
  28%|██▊ | 234/850 [00:03<00:09, 64.86it/s]
1624
  28%|██▊ | 241/850 [00:03<00:09, 63.81it/s]
1625
  29%|██▉ | 248/850 [00:04<00:09, 61.38it/s]
1626
  30%|███ | 256/850 [00:04<00:09, 65.97it/s]
1627
  31%|███ | 263/850 [00:04<00:09, 65.18it/s]
1628
  32%|███▏ | 271/850 [00:04<00:08, 66.94it/s]
1629
  33%|███▎ | 279/850 [00:04<00:08, 68.88it/s]
1630
  34%|███▍ | 287/850 [00:04<00:08, 69.84it/s]
1631
  35%|███▍ | 295/850 [00:04<00:08, 69.34it/s]
1632
  36%|███▌ | 302/850 [00:04<00:08, 66.76it/s]
1633
  36%|███▋ | 309/850 [00:04<00:08, 67.41it/s]
1634
  37%|███▋ | 316/850 [00:04<00:07, 67.96it/s]
1635
  38%|███▊ | 323/850 [00:05<00:07, 68.48it/s]
1636
  39%|███▉ | 330/850 [00:05<00:07, 67.10it/s]
1637
  40%|███▉ | 337/850 [00:05<00:07, 66.09it/s]
1638
  40%|████ | 344/850 [00:05<00:07, 66.02it/s]
1639
  41%|████▏ | 351/850 [00:05<00:07, 66.82it/s]
1640
  42%|████▏ | 358/850 [00:05<00:07, 64.35it/s]
1641
  43%|████▎ | 365/850 [00:05<00:07, 63.66it/s]
1642
  44%|████▍ | 372/850 [00:05<00:07, 65.24it/s]
1643
  45%|████▍ | 379/850 [00:05<00:07, 59.31it/s]
1644
  45%|████▌ | 386/850 [00:06<00:07, 59.44it/s]
1645
  46%|████▌ | 393/850 [00:06<00:07, 58.18it/s]
1646
  47%|████▋ | 399/850 [00:06<00:07, 56.50it/s]
1647
  48%|████▊ | 407/850 [00:06<00:07, 62.14it/s]
1648
  49%|████▉ | 415/850 [00:06<00:06, 66.41it/s]
1649
  50%|████▉ | 423/850 [00:06<00:06, 68.67it/s]
1650
  51%|█████ | 431/850 [00:06<00:05, 70.17it/s]
1651
  52%|█████▏ | 439/850 [00:06<00:06, 66.16it/s]
1652
  52%|█████▏ | 446/850 [00:07<00:06, 63.58it/s]
1653
  53%|█████▎ | 453/850 [00:07<00:06, 61.74it/s]
1654
  54%|█████▍ | 461/850 [00:07<00:05, 64.99it/s]
1655
  55%|█████▌ | 469/850 [00:07<00:05, 67.95it/s]
1656
  56%|█████▌ | 477/850 [00:07<00:05, 69.70it/s]
1657
  57%|█████▋ | 485/850 [00:07<00:05, 66.98it/s]
1658
  58%|█████▊ | 492/850 [00:07<00:05, 65.57it/s]
1659
  59%|█████▉ | 500/850 [00:07<00:05, 67.64it/s]
1660
  60%|█████▉ | 508/850 [00:07<00:04, 69.00it/s]
1661
  61%|██████ | 515/850 [00:08<00:04, 67.70it/s]
1662
  61%|██████▏ | 522/850 [00:08<00:04, 68.14it/s]
1663
  62%|██████▏ | 529/850 [00:08<00:05, 63.43it/s]
1664
  63%|██████▎ | 536/850 [00:08<00:05, 62.14it/s]
1665
  64%|██████▍ | 543/850 [00:08<00:05, 60.12it/s]
1666
  65%|██████▍ | 550/850 [00:08<00:05, 58.62it/s]
1667
  66%|██████▌ | 557/850 [00:08<00:04, 59.23it/s]
1668
  66%|██████▌ | 563/850 [00:08<00:05, 55.59it/s]
1669
  67%|██████▋ | 570/850 [00:08<00:04, 57.72it/s]
1670
  68%|██████▊ | 577/850 [00:09<00:04, 57.68it/s]
1671
  69%|██████▊ | 584/850 [00:09<00:04, 59.76it/s]
1672
  70%|██████▉ | 591/850 [00:09<00:04, 60.25it/s]
1673
  70%|███████ | 598/850 [00:09<00:04, 60.81it/s]
1674
  71%|███████ | 605/850 [00:09<00:04, 60.53it/s]
1675
  72%|███████▏ | 612/850 [00:09<00:03, 62.53it/s]
1676
  73%|███████▎ | 619/850 [00:09<00:03, 64.26it/s]
1677
  74%|███████▎ | 626/850 [00:09<00:03, 65.14it/s]
1678
  74%|███████▍ | 633/850 [00:09<00:03, 63.37it/s]
1679
  75%|███████▌ | 640/850 [00:10<00:03, 64.52it/s]
1680
  76%|███████▌ | 648/850 [00:10<00:02, 67.99it/s]
1681
  77%|███████▋ | 655/850 [00:10<00:02, 68.40it/s]
1682
  78%|███████▊ | 662/850 [00:10<00:02, 68.11it/s]
1683
  79%|███████▊ | 669/850 [00:10<00:02, 68.30it/s]
1684
  80%|███████▉ | 676/850 [00:10<00:02, 64.43it/s]
1685
  80%|████████ | 683/850 [00:10<00:02, 64.38it/s]
1686
  81%|████████ | 690/850 [00:10<00:02, 63.19it/s]
1687
  82%|████████▏ | 697/850 [00:10<00:02, 63.47it/s]
1688
  83%|████████▎ | 704/850 [00:11<00:02, 62.55it/s]
1689
  84%|████████▎ | 711/850 [00:11<00:02, 59.59it/s]
1690
  84%|████████▍ | 717/850 [00:11<00:02, 58.91it/s]
1691
  85%|████████▌ | 724/850 [00:11<00:02, 59.52it/s]
1692
  86%|████████▌ | 730/850 [00:11<00:02, 57.96it/s]
1693
  87%|████████▋ | 737/850 [00:11<00:01, 60.64it/s]
1694
  88%|████████▊ | 744/850 [00:11<00:01, 57.91it/s]
1695
  88%|████████▊ | 750/850 [00:11<00:01, 55.74it/s]
1696
  89%|████████▉ | 757/850 [00:12<00:01, 59.42it/s]
1697
  90%|████████▉ | 764/850 [00:12<00:01, 61.63it/s]
1698
  91%|█████████ | 771/850 [00:12<00:01, 59.98it/s]
1699
  92%|█████████▏| 778/850 [00:12<00:01, 59.34it/s]
1700
  92%|█████████▏| 784/850 [00:12<00:01, 56.57it/s]
1701
  93%|█████████▎| 790/850 [00:12<00:01, 54.23it/s]
1702
  94%|█████████▍| 797/850 [00:12<00:00, 58.26it/s]
1703
  94%|█████████▍| 803/850 [00:12<00:00, 58.70it/s]
1704
  95%|█████████▌| 809/850 [00:12<00:00, 58.22it/s]
1705
  96%|█████████▌| 815/850 [00:13<00:00, 57.34it/s]
1706
  97%|█████████▋| 823/850 [00:13<00:00, 62.38it/s]
1707
  98%|█████████▊| 830/850 [00:13<00:00, 60.31it/s]
1708
  98%|█████████▊| 837/850 [00:13<00:00, 61.46it/s]
1709
  99%|█████████▉| 844/850 [00:13<00:00, 62.52it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1710
  0%| | 0/1826 [00:00<?, ?it/s]
1711
  1%| | 10/1826 [00:00<00:19, 94.11it/s]
1712
  1%| | 20/1826 [00:00<00:26, 68.33it/s]
1713
  2%|▏ | 28/1826 [00:00<00:26, 67.41it/s]
1714
  2%|▏ | 35/1826 [00:00<00:26, 67.50it/s]
1715
  2%|▏ | 43/1826 [00:00<00:25, 70.74it/s]
1716
  3%|▎ | 51/1826 [00:00<00:24, 72.57it/s]
1717
  3%|▎ | 59/1826 [00:00<00:26, 67.19it/s]
1718
  4%|▎ | 66/1826 [00:00<00:28, 61.91it/s]
1719
  4%|▍ | 74/1826 [00:01<00:27, 64.76it/s]
1720
  4%|▍ | 82/1826 [00:01<00:25, 67.85it/s]
1721
  5%|▍ | 89/1826 [00:01<00:25, 66.96it/s]
1722
  5%|▌ | 98/1826 [00:01<00:23, 72.93it/s]
1723
  6%|▌ | 107/1826 [00:01<00:22, 76.04it/s]
1724
  6%|▋ | 115/1826 [00:01<00:22, 74.77it/s]
1725
  7%|▋ | 123/1826 [00:01<00:25, 67.64it/s]
1726
  7%|▋ | 130/1826 [00:01<00:25, 65.55it/s]
1727
  8%|▊ | 138/1826 [00:02<00:25, 66.69it/s]
1728
  8%|▊ | 146/1826 [00:02<00:24, 69.01it/s]
1729
  8%|▊ | 153/1826 [00:02<00:24, 69.21it/s]
1730
  9%|▉ | 160/1826 [00:02<00:27, 60.78it/s]
1731
  9%|▉ | 167/1826 [00:02<00:26, 61.52it/s]
1732
  10%|▉ | 176/1826 [00:02<00:24, 68.18it/s]
1733
  10%|█ | 184/1826 [00:02<00:23, 69.66it/s]
1734
  11%|█ | 192/1826 [00:02<00:23, 69.46it/s]
1735
  11%|█ | 200/1826 [00:02<00:24, 67.41it/s]
1736
  11%|█▏ | 207/1826 [00:03<00:24, 66.24it/s]
1737
  12%|█▏ | 214/1826 [00:03<00:24, 66.73it/s]
1738
  12%|█▏ | 221/1826 [00:03<00:23, 67.00it/s]
1739
  12%|█▏ | 228/1826 [00:03<00:25, 62.86it/s]
1740
  13%|█▎ | 235/1826 [00:03<00:26, 59.84it/s]
1741
  13%|█▎ | 242/1826 [00:03<00:25, 61.91it/s]
1742
  14%|█▎ | 249/1826 [00:03<00:25, 61.34it/s]
1743
  14%|█▍ | 256/1826 [00:03<00:25, 62.00it/s]
1744
  14%|█▍ | 263/1826 [00:03<00:24, 62.93it/s]
1745
  15%|█▍ | 271/1826 [00:04<00:23, 67.26it/s]
1746
  15%|█▌ | 280/1826 [00:04<00:21, 72.72it/s]
1747
  16%|█▌ | 288/1826 [00:04<00:21, 70.72it/s]
1748
  16%|█▌ | 296/1826 [00:04<00:21, 69.79it/s]
1749
  17%|█▋ | 304/1826 [00:04<00:22, 67.51it/s]
1750
  17%|█▋ | 312/1826 [00:04<00:21, 69.04it/s]
1751
  17%|█▋ | 319/1826 [00:04<00:21, 68.83it/s]
1752
  18%|█▊ | 327/1826 [00:04<00:21, 70.40it/s]
1753
  18%|█▊ | 335/1826 [00:04<00:21, 69.17it/s]
1754
  19%|█▉ | 343/1826 [00:05<00:21, 70.48it/s]
1755
  19%|█▉ | 351/1826 [00:05<00:22, 65.72it/s]
1756
  20%|█▉ | 358/1826 [00:05<00:22, 64.30it/s]
1757
  20%|██ | 366/1826 [00:05<00:21, 68.34it/s]
1758
  20%|██ | 374/1826 [00:05<00:20, 69.37it/s]
1759
  21%|██ | 382/1826 [00:05<00:20, 71.56it/s]
1760
  21%|██▏ | 390/1826 [00:05<00:22, 64.90it/s]
1761
  22%|██▏ | 398/1826 [00:05<00:21, 66.61it/s]
1762
  22%|██▏ | 405/1826 [00:06<00:21, 66.68it/s]
1763
  23%|██▎ | 412/1826 [00:06<00:22, 63.63it/s]
1764
  23%|██▎ | 420/1826 [00:06<00:21, 65.85it/s]
1765
  23%|██▎ | 427/1826 [00:06<00:22, 62.95it/s]
1766
  24%|██▍ | 434/1826 [00:06<00:21, 63.77it/s]
1767
  24%|██▍ | 441/1826 [00:06<00:22, 61.74it/s]
1768
  25%|██▍ | 450/1826 [00:06<00:20, 68.00it/s]
1769
  25%|██▌ | 457/1826 [00:06<00:20, 67.45it/s]
1770
  26%|██▌ | 466/1826 [00:06<00:19, 69.66it/s]
1771
  26%|██▌ | 473/1826 [00:07<00:21, 64.33it/s]
1772
  26%|██▋ | 480/1826 [00:07<00:21, 63.73it/s]
1773
  27%|██▋ | 487/1826 [00:07<00:21, 62.35it/s]
1774
  27%|██▋ | 494/1826 [00:07<00:21, 62.04it/s]
1775
  27%|██▋ | 502/1826 [00:07<00:20, 64.86it/s]
1776
  28%|██▊ | 510/1826 [00:07<00:19, 67.59it/s]
1777
  28%|██▊ | 518/1826 [00:07<00:18, 70.73it/s]
1778
  29%|██▉ | 527/1826 [00:07<00:17, 73.95it/s]
1779
  29%|██▉ | 535/1826 [00:07<00:17, 71.81it/s]
1780
  30%|██▉ | 543/1826 [00:08<00:17, 71.49it/s]
1781
  30%|███ | 551/1826 [00:08<00:19, 65.90it/s]
1782
  31%|███ | 558/1826 [00:08<00:19, 64.66it/s]
1783
  31%|███ | 567/1826 [00:08<00:17, 70.27it/s]
1784
  32%|███▏ | 576/1826 [00:08<00:17, 73.36it/s]
1785
  32%|███▏ | 584/1826 [00:08<00:18, 68.09it/s]
1786
  32%|███▏ | 592/1826 [00:08<00:17, 70.54it/s]
1787
  33%|███▎ | 600/1826 [00:08<00:17, 69.88it/s]
1788
  33%|███▎ | 608/1826 [00:09<00:18, 65.58it/s]
1789
  34%|███▎ | 615/1826 [00:09<00:18, 65.45it/s]
1790
  34%|███▍ | 622/1826 [00:09<00:18, 66.26it/s]
1791
  35%|███▍ | 632/1826 [00:09<00:16, 73.73it/s]
1792
  35%|███▌ | 641/1826 [00:09<00:15, 76.81it/s]
1793
  36%|███▌ | 649/1826 [00:09<00:16, 71.30it/s]
1794
  36%|███▌ | 657/1826 [00:09<00:16, 69.22it/s]
1795
  37%|███▋ | 667/1826 [00:09<00:15, 75.05it/s]
1796
  37%|███▋ | 675/1826 [00:09<00:16, 71.22it/s]
1797
  37%|███▋ | 683/1826 [00:10<00:16, 69.84it/s]
1798
  38%|███▊ | 692/1826 [00:10<00:15, 73.46it/s]
1799
  38%|███▊ | 700/1826 [00:10<00:15, 73.83it/s]
1800
  39%|███▉ | 708/1826 [00:10<00:14, 75.07it/s]
1801
  39%|███▉ | 718/1826 [00:10<00:13, 79.86it/s]
1802
  40%|███▉ | 727/1826 [00:10<00:13, 78.55it/s]
1803
  40%|████ | 735/1826 [00:10<00:14, 76.87it/s]
1804
  41%|████ | 743/1826 [00:10<00:14, 75.47it/s]
1805
  41%|████ | 752/1826 [00:10<00:13, 79.11it/s]
1806
  42%|████▏ | 760/1826 [00:11<00:13, 77.59it/s]
1807
  42%|████▏ | 769/1826 [00:11<00:13, 78.55it/s]
1808
  43%|████▎ | 777/1826 [00:11<00:13, 77.65it/s]
1809
  43%|████▎ | 785/1826 [00:11<00:14, 71.54it/s]
1810
  43%|████▎ | 793/1826 [00:11<00:14, 73.07it/s]
1811
  44%|████▍ | 801/1826 [00:11<00:15, 68.26it/s]
1812
  44%|████▍ | 809/1826 [00:11<00:14, 69.66it/s]
1813
  45%|████▍ | 817/1826 [00:11<00:14, 71.79it/s]
1814
  45%|████▌ | 826/1826 [00:11<00:13, 76.14it/s]
1815
  46%|████▌ | 834/1826 [00:12<00:14, 68.30it/s]
1816
  46%|████▌ | 842/1826 [00:12<00:14, 69.69it/s]
1817
  47%|████▋ | 851/1826 [00:12<00:13, 72.87it/s]
1818
  47%|████▋ | 860/1826 [00:12<00:12, 77.24it/s]
1819
  48%|████▊ | 868/1826 [00:12<00:13, 72.75it/s]
1820
  48%|████▊ | 876/1826 [00:12<00:13, 72.25it/s]
1821
  48%|████▊ | 884/1826 [00:12<00:13, 71.43it/s]
1822
  49%|████▉ | 892/1826 [00:12<00:13, 68.24it/s]
1823
  49%|████▉ | 902/1826 [00:13<00:12, 74.85it/s]
1824
  50%|████▉ | 911/1826 [00:13<00:11, 77.59it/s]
1825
  50%|█████ | 920/1826 [00:13<00:11, 80.11it/s]
1826
  51%|█████ | 929/1826 [00:13<00:11, 79.42it/s]
1827
  51%|█████▏ | 937/1826 [00:13<00:12, 73.54it/s]
1828
  52%|█████▏ | 945/1826 [00:13<00:12, 68.71it/s]
1829
  52%|█████▏ | 952/1826 [00:13<00:12, 68.31it/s]
1830
  53%|█████▎ | 960/1826 [00:13<00:12, 69.39it/s]
1831
  53%|█████▎ | 968/1826 [00:13<00:11, 71.85it/s]
1832
  53%|█████▎ | 976/1826 [00:14<00:12, 67.88it/s]
1833
  54%|█████▍ | 985/1826 [00:14<00:11, 71.98it/s]
1834
  54%|█████▍ | 993/1826 [00:14<00:11, 69.95it/s]
1835
  55%|█████▍ | 1001/1826 [00:14<00:12, 67.60it/s]
1836
  55%|█████▌ | 1008/1826 [00:14<00:12, 68.06it/s]
1837
  56%|█████▌ | 1016/1826 [00:14<00:11, 70.58it/s]
1838
  56%|█████▌ | 1025/1826 [00:14<00:10, 74.99it/s]
1839
  57%|█████▋ | 1033/1826 [00:14<00:10, 73.75it/s]
1840
  57%|█████▋ | 1041/1826 [00:14<00:10, 71.80it/s]
1841
  57%|█████▋ | 1049/1826 [00:15<00:11, 69.86it/s]
1842
  58%|█████▊ | 1057/1826 [00:15<00:11, 68.46it/s]
1843
  58%|█████▊ | 1066/1826 [00:15<00:10, 73.61it/s]
1844
  59%|█████▉ | 1074/1826 [00:15<00:10, 74.53it/s]
1845
  59%|█████▉ | 1083/1826 [00:15<00:09, 78.56it/s]
1846
  60%|█████▉ | 1092/1826 [00:15<00:09, 79.51it/s]
1847
  60%|██████ | 1101/1826 [00:15<00:09, 79.20it/s]
1848
  61%|██████ | 1109/1826 [00:15<00:09, 74.39it/s]
1849
  61%|██████ | 1117/1826 [00:15<00:09, 71.61it/s]
1850
  62%|██████▏ | 1126/1826 [00:16<00:09, 73.71it/s]
1851
  62%|██████▏ | 1134/1826 [00:16<00:09, 75.14it/s]
1852
  63%|██████▎ | 1142/1826 [00:16<00:09, 72.85it/s]
1853
  63%|██████▎ | 1150/1826 [00:16<00:09, 71.67it/s]
1854
  63%|██████▎ | 1158/1826 [00:16<00:09, 71.29it/s]
1855
  64%|██████▍ | 1166/1826 [00:16<00:09, 67.84it/s]
1856
  64%|██████▍ | 1174/1826 [00:16<00:09, 69.65it/s]
1857
  65%|██████▍ | 1182/1826 [00:16<00:09, 67.30it/s]
1858
  65%|██████▌ | 1190/1826 [00:17<00:09, 68.54it/s]
1859
  66%|██████▌ | 1198/1826 [00:17<00:08, 70.13it/s]
1860
  66%|██████▌ | 1206/1826 [00:17<00:08, 70.01it/s]
1861
  66%|██████▋ | 1214/1826 [00:17<00:09, 64.04it/s]
1862
  67%|██████▋ | 1221/1826 [00:17<00:09, 62.76it/s]
1863
  67%|██████▋ | 1230/1826 [00:17<00:08, 68.49it/s]
1864
  68%|██████▊ | 1238/1826 [00:17<00:08, 69.88it/s]
1865
  68%|██████▊ | 1246/1826 [00:17<00:08, 70.54it/s]
1866
  69%|██████▊ | 1254/1826 [00:17<00:08, 64.45it/s]
1867
  69%|██████▉ | 1262/1826 [00:18<00:08, 66.57it/s]
1868
  70%|██████▉ | 1270/1826 [00:18<00:08, 69.08it/s]
1869
  70%|██████▉ | 1278/1826 [00:18<00:07, 71.87it/s]
1870
  70%|███████ | 1286/1826 [00:18<00:07, 72.39it/s]
1871
  71%|███████ | 1295/1826 [00:18<00:06, 76.32it/s]
1872
  71%|███████▏ | 1304/1826 [00:18<00:06, 79.67it/s]
1873
  72%|███████▏ | 1313/1826 [00:18<00:06, 79.61it/s]
1874
  72%|███████▏ | 1322/1826 [00:18<00:06, 78.03it/s]
1875
  73%|███████▎ | 1330/1826 [00:18<00:06, 75.63it/s]
1876
  73%|███████▎ | 1338/1826 [00:19<00:06, 73.90it/s]
1877
  74%|███████▎ | 1346/1826 [00:19<00:06, 69.31it/s]
1878
  74%|███████▍ | 1354/1826 [00:19<00:06, 70.19it/s]
1879
  75%|███████▍ | 1362/1826 [00:19<00:06, 71.84it/s]
1880
  75%|███████▌ | 1370/1826 [00:19<00:06, 67.49it/s]
1881
  76%|███████▌ | 1379/1826 [00:19<00:06, 72.42it/s]
1882
  76%|███████▌ | 1387/1826 [00:19<00:05, 74.47it/s]
1883
  76%|███████▋ | 1395/1826 [00:19<00:05, 75.32it/s]
1884
  77%|███████▋ | 1404/1826 [00:19<00:05, 79.20it/s]
1885
  77%|███████▋ | 1413/1826 [00:20<00:05, 80.03it/s]
1886
  78%|███████▊ | 1422/1826 [00:20<00:05, 76.15it/s]
1887
  78%|███████▊ | 1430/1826 [00:20<00:05, 72.26it/s]
1888
  79%|███████▉ | 1438/1826 [00:20<00:05, 68.01it/s]
1889
  79%|███████▉ | 1446/1826 [00:20<00:05, 69.61it/s]
1890
  80%|███████▉ | 1454/1826 [00:20<00:05, 71.49it/s]
1891
  80%|████████ | 1462/1826 [00:20<00:05, 72.56it/s]
1892
  81%|████████ | 1470/1826 [00:20<00:05, 66.00it/s]
1893
  81%|████████ | 1477/1826 [00:21<00:05, 65.05it/s]
1894
  81%|████████▏ | 1484/1826 [00:21<00:05, 65.40it/s]
1895
  82%|████████▏ | 1491/1826 [00:21<00:05, 60.28it/s]
1896
  82%|████████▏ | 1498/1826 [00:21<00:05, 61.97it/s]
1897
  82%|████████▏ | 1505/1826 [00:21<00:05, 60.61it/s]
1898
  83%|████████▎ | 1513/1826 [00:21<00:04, 64.51it/s]
1899
  83%|████████▎ | 1520/1826 [00:21<00:04, 65.43it/s]
1900
  84%|████████▎ | 1527/1826 [00:21<00:04, 62.45it/s]
1901
  84%|████████▍ | 1534/1826 [00:21<00:04, 64.07it/s]
1902
  84%|████████▍ | 1541/1826 [00:22<00:04, 63.72it/s]
1903
  85%|████████▍ | 1548/1826 [00:22<00:04, 64.27it/s]
1904
  85%|████████▌ | 1555/1826 [00:22<00:04, 65.72it/s]
1905
  86%|████████▌ | 1562/1826 [00:22<00:03, 66.77it/s]
1906
  86%|████████▌ | 1569/1826 [00:22<00:04, 63.11it/s]
1907
  86%|████████▋ | 1576/1826 [00:22<00:03, 62.95it/s]
1908
  87%|████████▋ | 1584/1826 [00:22<00:03, 65.44it/s]
1909
  87%|████████▋ | 1591/1826 [00:22<00:03, 63.71it/s]
1910
  88%|████████▊ | 1598/1826 [00:22<00:03, 63.28it/s]
1911
  88%|████████▊ | 1607/1826 [00:23<00:03, 69.37it/s]
1912
  88%|████████▊ | 1614/1826 [00:23<00:03, 68.35it/s]
1913
  89%|████████▉ | 1621/1826 [00:23<00:03, 61.61it/s]
1914
  89%|████████▉ | 1628/1826 [00:23<00:03, 56.69it/s]
1915
  90%|████████▉ | 1636/1826 [00:23<00:03, 60.23it/s]
1916
  90%|████████▉ | 1643/1826 [00:23<00:03, 60.95it/s]
1917
  90%|█████████ | 1651/1826 [00:23<00:02, 65.46it/s]
1918
  91%|█████████ | 1658/1826 [00:23<00:02, 64.85it/s]
1919
  91%|█████████ | 1665/1826 [00:24<00:02, 65.01it/s]
1920
  92%|█████████▏| 1673/1826 [00:24<00:02, 68.49it/s]
1921
  92%|█████████▏| 1681/1826 [00:24<00:02, 70.31it/s]
1922
  93%|█████████▎| 1690/1826 [00:24<00:01, 73.05it/s]
1923
  93%|█████████▎| 1698/1826 [00:24<00:02, 63.59it/s]
1924
  93%|█████████▎| 1706/1826 [00:24<00:01, 65.52it/s]
1925
  94%|█████████▍| 1714/1826 [00:24<00:01, 68.18it/s]
1926
  94%|█████████▍| 1721/1826 [00:24<00:01, 61.69it/s]
1927
  95%|█████████▍| 1729/1826 [00:24<00:01, 65.81it/s]
1928
  95%|█████████▌| 1737/1826 [00:25<00:01, 67.43it/s]
1929
  96%|█████████▌| 1746/1826 [00:25<00:01, 71.34it/s]
1930
  96%|█████████▌| 1754/1826 [00:25<00:01, 69.71it/s]
1931
  96%|█████████▋| 1762/1826 [00:25<00:00, 68.18it/s]
1932
  97%|█████████▋| 1769/1826 [00:25<00:00, 66.79it/s]
1933
  97%|█████████▋| 1776/1826 [00:25<00:00, 65.37it/s]
1934
  98%|█████████▊| 1784/1826 [00:25<00:00, 69.28it/s]
1935
  98%|█████████▊| 1791/1826 [00:25<00:00, 64.98it/s]
1936
  98%|█████████▊| 1798/1826 [00:26<00:00, 60.14it/s]
1937
  99%|█████████▉| 1805/1826 [00:26<00:00, 62.25it/s]
1938
  99%|█████████▉| 1813/1826 [00:26<00:00, 66.07it/s]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1588
  {'eval_loss': 0.009366312995553017, 'eval_precision': 0.9169054441260746, 'eval_recall': 0.9293320425943853, 'eval_f1': 0.9230769230769231, 'eval_accuracy': 0.9986302259153467, 'eval_runtime': 17.4762, 'eval_samples_per_second': 388.987, 'eval_steps_per_second': 48.638, 'epoch': 9.99}
1589
  {'train_runtime': 2701.3395, 'train_samples_per_second': 114.954, 'train_steps_per_second': 1.795, 'train_loss': 0.002068036902580679, 'epoch': 9.99}
1590
 
1591
+ ***** train metrics *****
1592
+ epoch = 9.9897
1593
+ total_flos = 16517987GF
1594
+ train_loss = 0.0021
1595
+ train_runtime = 0:45:01.33
1596
+ train_samples = 31053
1597
+ train_samples_per_second = 114.954
1598
+ train_steps_per_second = 1.795
1599
+ 09/09/2024 16:18:12 - INFO - __main__ - *** Evaluate ***
1600
+ [INFO|trainer.py:811] 2024-09-09 16:18:12,179 >> The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
1601
+ [INFO|trainer.py:3819] 2024-09-09 16:18:12,182 >>
1602
+ ***** Running Evaluation *****
1603
+ [INFO|trainer.py:3821] 2024-09-09 16:18:12,182 >> Num examples = 6798
1604
+ [INFO|trainer.py:3824] 2024-09-09 16:18:12,182 >> Batch size = 8
1605
+
1606
  0%| | 0/850 [00:00<?, ?it/s]
1607
  1%| | 9/850 [00:00<00:09, 84.85it/s]
1608
  2%|▏ | 18/850 [00:00<00:12, 69.16it/s]
1609
  3%|▎ | 26/850 [00:00<00:12, 67.03it/s]
1610
  4%|▍ | 34/850 [00:00<00:11, 69.34it/s]
1611
  5%|▍ | 42/850 [00:00<00:12, 66.38it/s]
1612
  6%|▌ | 49/850 [00:00<00:14, 55.56it/s]
1613
  7%|▋ | 57/850 [00:00<00:13, 60.68it/s]
1614
  8%|▊ | 64/850 [00:01<00:13, 59.50it/s]
1615
  8%|▊ | 71/850 [00:01<00:14, 53.38it/s]
1616
  9%|▉ | 77/850 [00:01<00:14, 54.52it/s]
1617
  10%|▉ | 83/850 [00:01<00:14, 54.60it/s]
1618
  11%|█ | 91/850 [00:01<00:12, 59.33it/s]
1619
  12%|█▏ | 98/850 [00:01<00:12, 60.45it/s]
1620
  12%|█▏ | 106/850 [00:01<00:11, 64.35it/s]
1621
  13%|█▎ | 113/850 [00:01<00:11, 63.96it/s]
1622
  14%|█▍ | 120/850 [00:01<00:11, 64.78it/s]
1623
  15%|█▍ | 127/850 [00:02<00:11, 65.12it/s]
1624
  16%|█▌ | 134/850 [00:02<00:11, 61.87it/s]
1625
  17%|█▋ | 142/850 [00:02<00:11, 64.23it/s]
1626
  18%|█▊ | 149/850 [00:02<00:11, 60.11it/s]
1627
  18%|█▊ | 156/850 [00:02<00:11, 60.66it/s]
1628
  19%|█▉ | 163/850 [00:02<00:10, 62.78it/s]
1629
  20%|██ | 170/850 [00:02<00:10, 63.49it/s]
1630
  21%|██ | 177/850 [00:02<00:10, 65.22it/s]
1631
  22%|██▏ | 184/850 [00:02<00:10, 62.35it/s]
1632
  22%|██▏ | 191/850 [00:03<00:10, 60.53it/s]
1633
  23%|██▎ | 198/850 [00:03<00:10, 62.55it/s]
1634
  24%|██▍ | 205/850 [00:03<00:10, 61.27it/s]
1635
  25%|██▍ | 212/850 [00:03<00:10, 61.68it/s]
1636
  26%|██▌ | 219/850 [00:03<00:10, 60.02it/s]
1637
  27%|██▋ | 227/850 [00:03<00:09, 63.15it/s]
1638
  28%|██▊ | 234/850 [00:03<00:09, 64.86it/s]
1639
  28%|██▊ | 241/850 [00:03<00:09, 63.81it/s]
1640
  29%|██▉ | 248/850 [00:04<00:09, 61.38it/s]
1641
  30%|███ | 256/850 [00:04<00:09, 65.97it/s]
1642
  31%|███ | 263/850 [00:04<00:09, 65.18it/s]
1643
  32%|███▏ | 271/850 [00:04<00:08, 66.94it/s]
1644
  33%|███▎ | 279/850 [00:04<00:08, 68.88it/s]
1645
  34%|███▍ | 287/850 [00:04<00:08, 69.84it/s]
1646
  35%|███▍ | 295/850 [00:04<00:08, 69.34it/s]
1647
  36%|███▌ | 302/850 [00:04<00:08, 66.76it/s]
1648
  36%|███▋ | 309/850 [00:04<00:08, 67.41it/s]
1649
  37%|███▋ | 316/850 [00:04<00:07, 67.96it/s]
1650
  38%|███▊ | 323/850 [00:05<00:07, 68.48it/s]
1651
  39%|███▉ | 330/850 [00:05<00:07, 67.10it/s]
1652
  40%|███▉ | 337/850 [00:05<00:07, 66.09it/s]
1653
  40%|████ | 344/850 [00:05<00:07, 66.02it/s]
1654
  41%|████▏ | 351/850 [00:05<00:07, 66.82it/s]
1655
  42%|████▏ | 358/850 [00:05<00:07, 64.35it/s]
1656
  43%|████▎ | 365/850 [00:05<00:07, 63.66it/s]
1657
  44%|████▍ | 372/850 [00:05<00:07, 65.24it/s]
1658
  45%|████▍ | 379/850 [00:05<00:07, 59.31it/s]
1659
  45%|████▌ | 386/850 [00:06<00:07, 59.44it/s]
1660
  46%|████▌ | 393/850 [00:06<00:07, 58.18it/s]
1661
  47%|████▋ | 399/850 [00:06<00:07, 56.50it/s]
1662
  48%|████▊ | 407/850 [00:06<00:07, 62.14it/s]
1663
  49%|████▉ | 415/850 [00:06<00:06, 66.41it/s]
1664
  50%|████▉ | 423/850 [00:06<00:06, 68.67it/s]
1665
  51%|█████ | 431/850 [00:06<00:05, 70.17it/s]
1666
  52%|█████▏ | 439/850 [00:06<00:06, 66.16it/s]
1667
  52%|█████▏ | 446/850 [00:07<00:06, 63.58it/s]
1668
  53%|█████▎ | 453/850 [00:07<00:06, 61.74it/s]
1669
  54%|█████▍ | 461/850 [00:07<00:05, 64.99it/s]
1670
  55%|█████▌ | 469/850 [00:07<00:05, 67.95it/s]
1671
  56%|█████▌ | 477/850 [00:07<00:05, 69.70it/s]
1672
  57%|█████▋ | 485/850 [00:07<00:05, 66.98it/s]
1673
  58%|█████▊ | 492/850 [00:07<00:05, 65.57it/s]
1674
  59%|█████▉ | 500/850 [00:07<00:05, 67.64it/s]
1675
  60%|█████▉ | 508/850 [00:07<00:04, 69.00it/s]
1676
  61%|██████ | 515/850 [00:08<00:04, 67.70it/s]
1677
  61%|██████▏ | 522/850 [00:08<00:04, 68.14it/s]
1678
  62%|██████▏ | 529/850 [00:08<00:05, 63.43it/s]
1679
  63%|██████▎ | 536/850 [00:08<00:05, 62.14it/s]
1680
  64%|██████▍ | 543/850 [00:08<00:05, 60.12it/s]
1681
  65%|██████▍ | 550/850 [00:08<00:05, 58.62it/s]
1682
  66%|██████▌ | 557/850 [00:08<00:04, 59.23it/s]
1683
  66%|██████▌ | 563/850 [00:08<00:05, 55.59it/s]
1684
  67%|██████▋ | 570/850 [00:08<00:04, 57.72it/s]
1685
  68%|██████▊ | 577/850 [00:09<00:04, 57.68it/s]
1686
  69%|██████▊ | 584/850 [00:09<00:04, 59.76it/s]
1687
  70%|██████▉ | 591/850 [00:09<00:04, 60.25it/s]
1688
  70%|███████ | 598/850 [00:09<00:04, 60.81it/s]
1689
  71%|███████ | 605/850 [00:09<00:04, 60.53it/s]
1690
  72%|███████▏ | 612/850 [00:09<00:03, 62.53it/s]
1691
  73%|███████▎ | 619/850 [00:09<00:03, 64.26it/s]
1692
  74%|███████▎ | 626/850 [00:09<00:03, 65.14it/s]
1693
  74%|███████▍ | 633/850 [00:09<00:03, 63.37it/s]
1694
  75%|███████▌ | 640/850 [00:10<00:03, 64.52it/s]
1695
  76%|███████▌ | 648/850 [00:10<00:02, 67.99it/s]
1696
  77%|███████▋ | 655/850 [00:10<00:02, 68.40it/s]
1697
  78%|███████▊ | 662/850 [00:10<00:02, 68.11it/s]
1698
  79%|███████▊ | 669/850 [00:10<00:02, 68.30it/s]
1699
  80%|███████▉ | 676/850 [00:10<00:02, 64.43it/s]
1700
  80%|████████ | 683/850 [00:10<00:02, 64.38it/s]
1701
  81%|████████ | 690/850 [00:10<00:02, 63.19it/s]
1702
  82%|████████▏ | 697/850 [00:10<00:02, 63.47it/s]
1703
  83%|████████▎ | 704/850 [00:11<00:02, 62.55it/s]
1704
  84%|████████▎ | 711/850 [00:11<00:02, 59.59it/s]
1705
  84%|████████▍ | 717/850 [00:11<00:02, 58.91it/s]
1706
  85%|████████▌ | 724/850 [00:11<00:02, 59.52it/s]
1707
  86%|████████▌ | 730/850 [00:11<00:02, 57.96it/s]
1708
  87%|████████▋ | 737/850 [00:11<00:01, 60.64it/s]
1709
  88%|████████▊ | 744/850 [00:11<00:01, 57.91it/s]
1710
  88%|████████▊ | 750/850 [00:11<00:01, 55.74it/s]
1711
  89%|████████▉ | 757/850 [00:12<00:01, 59.42it/s]
1712
  90%|████████▉ | 764/850 [00:12<00:01, 61.63it/s]
1713
  91%|█████████ | 771/850 [00:12<00:01, 59.98it/s]
1714
  92%|█████████▏| 778/850 [00:12<00:01, 59.34it/s]
1715
  92%|█████████▏| 784/850 [00:12<00:01, 56.57it/s]
1716
  93%|█████████▎| 790/850 [00:12<00:01, 54.23it/s]
1717
  94%|█████████▍| 797/850 [00:12<00:00, 58.26it/s]
1718
  94%|█████████▍| 803/850 [00:12<00:00, 58.70it/s]
1719
  95%|█████████▌| 809/850 [00:12<00:00, 58.22it/s]
1720
  96%|█████████▌| 815/850 [00:13<00:00, 57.34it/s]
1721
  97%|█████████▋| 823/850 [00:13<00:00, 62.38it/s]
1722
  98%|█████████▊| 830/850 [00:13<00:00, 60.31it/s]
1723
  98%|█████████▊| 837/850 [00:13<00:00, 61.46it/s]
1724
  99%|█████████▉| 844/850 [00:13<00:00, 62.52it/s]
1725
+ ***** eval metrics *****
1726
+ epoch = 9.9897
1727
+ eval_accuracy = 0.9986
1728
+ eval_f1 = 0.9231
1729
+ eval_loss = 0.0094
1730
+ eval_precision = 0.9169
1731
+ eval_recall = 0.9293
1732
+ eval_runtime = 0:00:17.51
1733
+ eval_samples = 6798
1734
+ eval_samples_per_second = 388.236
1735
+ eval_steps_per_second = 48.544
1736
+ 09/09/2024 16:18:29 - INFO - __main__ - *** Predict ***
1737
+ [INFO|trainer.py:811] 2024-09-09 16:18:29,694 >> The following columns in the test set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `BertForTokenClassification.forward`, you can safely ignore this message.
1738
+ [INFO|trainer.py:3819] 2024-09-09 16:18:29,696 >>
1739
+ ***** Running Prediction *****
1740
+ [INFO|trainer.py:3821] 2024-09-09 16:18:29,696 >> Num examples = 14605
1741
+ [INFO|trainer.py:3824] 2024-09-09 16:18:29,696 >> Batch size = 8
1742
+
1743
  0%| | 0/1826 [00:00<?, ?it/s]
1744
  1%| | 10/1826 [00:00<00:19, 94.11it/s]
1745
  1%| | 20/1826 [00:00<00:26, 68.33it/s]
1746
  2%|▏ | 28/1826 [00:00<00:26, 67.41it/s]
1747
  2%|▏ | 35/1826 [00:00<00:26, 67.50it/s]
1748
  2%|▏ | 43/1826 [00:00<00:25, 70.74it/s]
1749
  3%|▎ | 51/1826 [00:00<00:24, 72.57it/s]
1750
  3%|▎ | 59/1826 [00:00<00:26, 67.19it/s]
1751
  4%|▎ | 66/1826 [00:00<00:28, 61.91it/s]
1752
  4%|▍ | 74/1826 [00:01<00:27, 64.76it/s]
1753
  4%|▍ | 82/1826 [00:01<00:25, 67.85it/s]
1754
  5%|▍ | 89/1826 [00:01<00:25, 66.96it/s]
1755
  5%|▌ | 98/1826 [00:01<00:23, 72.93it/s]
1756
  6%|▌ | 107/1826 [00:01<00:22, 76.04it/s]
1757
  6%|▋ | 115/1826 [00:01<00:22, 74.77it/s]
1758
  7%|▋ | 123/1826 [00:01<00:25, 67.64it/s]
1759
  7%|▋ | 130/1826 [00:01<00:25, 65.55it/s]
1760
  8%|▊ | 138/1826 [00:02<00:25, 66.69it/s]
1761
  8%|▊ | 146/1826 [00:02<00:24, 69.01it/s]
1762
  8%|▊ | 153/1826 [00:02<00:24, 69.21it/s]
1763
  9%|▉ | 160/1826 [00:02<00:27, 60.78it/s]
1764
  9%|▉ | 167/1826 [00:02<00:26, 61.52it/s]
1765
  10%|▉ | 176/1826 [00:02<00:24, 68.18it/s]
1766
  10%|█ | 184/1826 [00:02<00:23, 69.66it/s]
1767
  11%|█ | 192/1826 [00:02<00:23, 69.46it/s]
1768
  11%|█ | 200/1826 [00:02<00:24, 67.41it/s]
1769
  11%|█▏ | 207/1826 [00:03<00:24, 66.24it/s]
1770
  12%|█▏ | 214/1826 [00:03<00:24, 66.73it/s]
1771
  12%|█▏ | 221/1826 [00:03<00:23, 67.00it/s]
1772
  12%|█▏ | 228/1826 [00:03<00:25, 62.86it/s]
1773
  13%|█▎ | 235/1826 [00:03<00:26, 59.84it/s]
1774
  13%|█▎ | 242/1826 [00:03<00:25, 61.91it/s]
1775
  14%|█▎ | 249/1826 [00:03<00:25, 61.34it/s]
1776
  14%|█▍ | 256/1826 [00:03<00:25, 62.00it/s]
1777
  14%|█▍ | 263/1826 [00:03<00:24, 62.93it/s]
1778
  15%|█▍ | 271/1826 [00:04<00:23, 67.26it/s]
1779
  15%|█▌ | 280/1826 [00:04<00:21, 72.72it/s]
1780
  16%|█▌ | 288/1826 [00:04<00:21, 70.72it/s]
1781
  16%|█▌ | 296/1826 [00:04<00:21, 69.79it/s]
1782
  17%|█▋ | 304/1826 [00:04<00:22, 67.51it/s]
1783
  17%|█▋ | 312/1826 [00:04<00:21, 69.04it/s]
1784
  17%|█▋ | 319/1826 [00:04<00:21, 68.83it/s]
1785
  18%|█▊ | 327/1826 [00:04<00:21, 70.40it/s]
1786
  18%|█▊ | 335/1826 [00:04<00:21, 69.17it/s]
1787
  19%|█▉ | 343/1826 [00:05<00:21, 70.48it/s]
1788
  19%|█▉ | 351/1826 [00:05<00:22, 65.72it/s]
1789
  20%|█▉ | 358/1826 [00:05<00:22, 64.30it/s]
1790
  20%|██ | 366/1826 [00:05<00:21, 68.34it/s]
1791
  20%|██ | 374/1826 [00:05<00:20, 69.37it/s]
1792
  21%|██ | 382/1826 [00:05<00:20, 71.56it/s]
1793
  21%|██▏ | 390/1826 [00:05<00:22, 64.90it/s]
1794
  22%|██▏ | 398/1826 [00:05<00:21, 66.61it/s]
1795
  22%|██▏ | 405/1826 [00:06<00:21, 66.68it/s]
1796
  23%|██▎ | 412/1826 [00:06<00:22, 63.63it/s]
1797
  23%|██▎ | 420/1826 [00:06<00:21, 65.85it/s]
1798
  23%|██▎ | 427/1826 [00:06<00:22, 62.95it/s]
1799
  24%|██▍ | 434/1826 [00:06<00:21, 63.77it/s]
1800
  24%|██▍ | 441/1826 [00:06<00:22, 61.74it/s]
1801
  25%|██▍ | 450/1826 [00:06<00:20, 68.00it/s]
1802
  25%|██▌ | 457/1826 [00:06<00:20, 67.45it/s]
1803
  26%|██▌ | 466/1826 [00:06<00:19, 69.66it/s]
1804
  26%|██▌ | 473/1826 [00:07<00:21, 64.33it/s]
1805
  26%|██▋ | 480/1826 [00:07<00:21, 63.73it/s]
1806
  27%|██▋ | 487/1826 [00:07<00:21, 62.35it/s]
1807
  27%|██▋ | 494/1826 [00:07<00:21, 62.04it/s]
1808
  27%|██▋ | 502/1826 [00:07<00:20, 64.86it/s]
1809
  28%|██▊ | 510/1826 [00:07<00:19, 67.59it/s]
1810
  28%|██▊ | 518/1826 [00:07<00:18, 70.73it/s]
1811
  29%|██▉ | 527/1826 [00:07<00:17, 73.95it/s]
1812
  29%|██▉ | 535/1826 [00:07<00:17, 71.81it/s]
1813
  30%|██▉ | 543/1826 [00:08<00:17, 71.49it/s]
1814
  30%|███ | 551/1826 [00:08<00:19, 65.90it/s]
1815
  31%|███ | 558/1826 [00:08<00:19, 64.66it/s]
1816
  31%|███ | 567/1826 [00:08<00:17, 70.27it/s]
1817
  32%|███▏ | 576/1826 [00:08<00:17, 73.36it/s]
1818
  32%|███▏ | 584/1826 [00:08<00:18, 68.09it/s]
1819
  32%|███▏ | 592/1826 [00:08<00:17, 70.54it/s]
1820
  33%|███▎ | 600/1826 [00:08<00:17, 69.88it/s]
1821
  33%|███▎ | 608/1826 [00:09<00:18, 65.58it/s]
1822
  34%|███▎ | 615/1826 [00:09<00:18, 65.45it/s]
1823
  34%|███▍ | 622/1826 [00:09<00:18, 66.26it/s]
1824
  35%|███▍ | 632/1826 [00:09<00:16, 73.73it/s]
1825
  35%|███▌ | 641/1826 [00:09<00:15, 76.81it/s]
1826
  36%|███▌ | 649/1826 [00:09<00:16, 71.30it/s]
1827
  36%|███▌ | 657/1826 [00:09<00:16, 69.22it/s]
1828
  37%|███▋ | 667/1826 [00:09<00:15, 75.05it/s]
1829
  37%|███▋ | 675/1826 [00:09<00:16, 71.22it/s]
1830
  37%|███▋ | 683/1826 [00:10<00:16, 69.84it/s]
1831
  38%|███▊ | 692/1826 [00:10<00:15, 73.46it/s]
1832
  38%|███▊ | 700/1826 [00:10<00:15, 73.83it/s]
1833
  39%|███▉ | 708/1826 [00:10<00:14, 75.07it/s]
1834
  39%|███▉ | 718/1826 [00:10<00:13, 79.86it/s]
1835
  40%|███▉ | 727/1826 [00:10<00:13, 78.55it/s]
1836
  40%|████ | 735/1826 [00:10<00:14, 76.87it/s]
1837
  41%|████ | 743/1826 [00:10<00:14, 75.47it/s]
1838
  41%|████ | 752/1826 [00:10<00:13, 79.11it/s]
1839
  42%|████▏ | 760/1826 [00:11<00:13, 77.59it/s]
1840
  42%|████▏ | 769/1826 [00:11<00:13, 78.55it/s]
1841
  43%|████▎ | 777/1826 [00:11<00:13, 77.65it/s]
1842
  43%|████▎ | 785/1826 [00:11<00:14, 71.54it/s]
1843
  43%|████▎ | 793/1826 [00:11<00:14, 73.07it/s]
1844
  44%|████▍ | 801/1826 [00:11<00:15, 68.26it/s]
1845
  44%|████▍ | 809/1826 [00:11<00:14, 69.66it/s]
1846
  45%|████▍ | 817/1826 [00:11<00:14, 71.79it/s]
1847
  45%|████▌ | 826/1826 [00:11<00:13, 76.14it/s]
1848
  46%|████▌ | 834/1826 [00:12<00:14, 68.30it/s]
1849
  46%|████▌ | 842/1826 [00:12<00:14, 69.69it/s]
1850
  47%|████▋ | 851/1826 [00:12<00:13, 72.87it/s]
1851
  47%|████▋ | 860/1826 [00:12<00:12, 77.24it/s]
1852
  48%|████▊ | 868/1826 [00:12<00:13, 72.75it/s]
1853
  48%|████▊ | 876/1826 [00:12<00:13, 72.25it/s]
1854
  48%|████▊ | 884/1826 [00:12<00:13, 71.43it/s]
1855
  49%|████▉ | 892/1826 [00:12<00:13, 68.24it/s]
1856
  49%|████▉ | 902/1826 [00:13<00:12, 74.85it/s]
1857
  50%|████▉ | 911/1826 [00:13<00:11, 77.59it/s]
1858
  50%|█████ | 920/1826 [00:13<00:11, 80.11it/s]
1859
  51%|█████ | 929/1826 [00:13<00:11, 79.42it/s]
1860
  51%|█████▏ | 937/1826 [00:13<00:12, 73.54it/s]
1861
  52%|█████▏ | 945/1826 [00:13<00:12, 68.71it/s]
1862
  52%|█████▏ | 952/1826 [00:13<00:12, 68.31it/s]
1863
  53%|█████▎ | 960/1826 [00:13<00:12, 69.39it/s]
1864
  53%|█████▎ | 968/1826 [00:13<00:11, 71.85it/s]
1865
  53%|█████▎ | 976/1826 [00:14<00:12, 67.88it/s]
1866
  54%|█████▍ | 985/1826 [00:14<00:11, 71.98it/s]
1867
  54%|█████▍ | 993/1826 [00:14<00:11, 69.95it/s]
1868
  55%|█████▍ | 1001/1826 [00:14<00:12, 67.60it/s]
1869
  55%|█████▌ | 1008/1826 [00:14<00:12, 68.06it/s]
1870
  56%|█████▌ | 1016/1826 [00:14<00:11, 70.58it/s]
1871
  56%|█████▌ | 1025/1826 [00:14<00:10, 74.99it/s]
1872
  57%|█████▋ | 1033/1826 [00:14<00:10, 73.75it/s]
1873
  57%|█████▋ | 1041/1826 [00:14<00:10, 71.80it/s]
1874
  57%|█████▋ | 1049/1826 [00:15<00:11, 69.86it/s]
1875
  58%|█████▊ | 1057/1826 [00:15<00:11, 68.46it/s]
1876
  58%|█████▊ | 1066/1826 [00:15<00:10, 73.61it/s]
1877
  59%|█████▉ | 1074/1826 [00:15<00:10, 74.53it/s]
1878
  59%|█████▉ | 1083/1826 [00:15<00:09, 78.56it/s]
1879
  60%|█████▉ | 1092/1826 [00:15<00:09, 79.51it/s]
1880
  60%|██████ | 1101/1826 [00:15<00:09, 79.20it/s]
1881
  61%|██████ | 1109/1826 [00:15<00:09, 74.39it/s]
1882
  61%|██████ | 1117/1826 [00:15<00:09, 71.61it/s]
1883
  62%|██████▏ | 1126/1826 [00:16<00:09, 73.71it/s]
1884
  62%|██████▏ | 1134/1826 [00:16<00:09, 75.14it/s]
1885
  63%|██████▎ | 1142/1826 [00:16<00:09, 72.85it/s]
1886
  63%|██████▎ | 1150/1826 [00:16<00:09, 71.67it/s]
1887
  63%|██████▎ | 1158/1826 [00:16<00:09, 71.29it/s]
1888
  64%|██████▍ | 1166/1826 [00:16<00:09, 67.84it/s]
1889
  64%|██████▍ | 1174/1826 [00:16<00:09, 69.65it/s]
1890
  65%|██████▍ | 1182/1826 [00:16<00:09, 67.30it/s]
1891
  65%|██████▌ | 1190/1826 [00:17<00:09, 68.54it/s]
1892
  66%|██████▌ | 1198/1826 [00:17<00:08, 70.13it/s]
1893
  66%|██████▌ | 1206/1826 [00:17<00:08, 70.01it/s]
1894
  66%|██████▋ | 1214/1826 [00:17<00:09, 64.04it/s]
1895
  67%|██████▋ | 1221/1826 [00:17<00:09, 62.76it/s]
1896
  67%|██████▋ | 1230/1826 [00:17<00:08, 68.49it/s]
1897
  68%|██████▊ | 1238/1826 [00:17<00:08, 69.88it/s]
1898
  68%|██████▊ | 1246/1826 [00:17<00:08, 70.54it/s]
1899
  69%|██████▊ | 1254/1826 [00:17<00:08, 64.45it/s]
1900
  69%|██████▉ | 1262/1826 [00:18<00:08, 66.57it/s]
1901
  70%|██████▉ | 1270/1826 [00:18<00:08, 69.08it/s]
1902
  70%|██████▉ | 1278/1826 [00:18<00:07, 71.87it/s]
1903
  70%|███████ | 1286/1826 [00:18<00:07, 72.39it/s]
1904
  71%|███████ | 1295/1826 [00:18<00:06, 76.32it/s]
1905
  71%|███████▏ | 1304/1826 [00:18<00:06, 79.67it/s]
1906
  72%|███████▏ | 1313/1826 [00:18<00:06, 79.61it/s]
1907
  72%|███████▏ | 1322/1826 [00:18<00:06, 78.03it/s]
1908
  73%|███████▎ | 1330/1826 [00:18<00:06, 75.63it/s]
1909
  73%|███████▎ | 1338/1826 [00:19<00:06, 73.90it/s]
1910
  74%|███████▎ | 1346/1826 [00:19<00:06, 69.31it/s]
1911
  74%|███████▍ | 1354/1826 [00:19<00:06, 70.19it/s]
1912
  75%|███████▍ | 1362/1826 [00:19<00:06, 71.84it/s]
1913
  75%|███████▌ | 1370/1826 [00:19<00:06, 67.49it/s]
1914
  76%|███████▌ | 1379/1826 [00:19<00:06, 72.42it/s]
1915
  76%|███████▌ | 1387/1826 [00:19<00:05, 74.47it/s]
1916
  76%|███████▋ | 1395/1826 [00:19<00:05, 75.32it/s]
1917
  77%|███████▋ | 1404/1826 [00:19<00:05, 79.20it/s]
1918
  77%|███████▋ | 1413/1826 [00:20<00:05, 80.03it/s]
1919
  78%|███████▊ | 1422/1826 [00:20<00:05, 76.15it/s]
1920
  78%|███████▊ | 1430/1826 [00:20<00:05, 72.26it/s]
1921
  79%|███████▉ | 1438/1826 [00:20<00:05, 68.01it/s]
1922
  79%|███████▉ | 1446/1826 [00:20<00:05, 69.61it/s]
1923
  80%|███████▉ | 1454/1826 [00:20<00:05, 71.49it/s]
1924
  80%|████████ | 1462/1826 [00:20<00:05, 72.56it/s]
1925
  81%|████████ | 1470/1826 [00:20<00:05, 66.00it/s]
1926
  81%|████████ | 1477/1826 [00:21<00:05, 65.05it/s]
1927
  81%|████████▏ | 1484/1826 [00:21<00:05, 65.40it/s]
1928
  82%|████████▏ | 1491/1826 [00:21<00:05, 60.28it/s]
1929
  82%|████████▏ | 1498/1826 [00:21<00:05, 61.97it/s]
1930
  82%|████████▏ | 1505/1826 [00:21<00:05, 60.61it/s]
1931
  83%|████████▎ | 1513/1826 [00:21<00:04, 64.51it/s]
1932
  83%|████████▎ | 1520/1826 [00:21<00:04, 65.43it/s]
1933
  84%|████████▎ | 1527/1826 [00:21<00:04, 62.45it/s]
1934
  84%|████████▍ | 1534/1826 [00:21<00:04, 64.07it/s]
1935
  84%|████████▍ | 1541/1826 [00:22<00:04, 63.72it/s]
1936
  85%|████████▍ | 1548/1826 [00:22<00:04, 64.27it/s]
1937
  85%|████████▌ | 1555/1826 [00:22<00:04, 65.72it/s]
1938
  86%|████████▌ | 1562/1826 [00:22<00:03, 66.77it/s]
1939
  86%|████████▌ | 1569/1826 [00:22<00:04, 63.11it/s]
1940
  86%|████████▋ | 1576/1826 [00:22<00:03, 62.95it/s]
1941
  87%|████████▋ | 1584/1826 [00:22<00:03, 65.44it/s]
1942
  87%|████████▋ | 1591/1826 [00:22<00:03, 63.71it/s]
1943
  88%|████████▊ | 1598/1826 [00:22<00:03, 63.28it/s]
1944
  88%|████████▊ | 1607/1826 [00:23<00:03, 69.37it/s]
1945
  88%|████████▊ | 1614/1826 [00:23<00:03, 68.35it/s]
1946
  89%|████████▉ | 1621/1826 [00:23<00:03, 61.61it/s]
1947
  89%|████████▉ | 1628/1826 [00:23<00:03, 56.69it/s]
1948
  90%|████████▉ | 1636/1826 [00:23<00:03, 60.23it/s]
1949
  90%|████████▉ | 1643/1826 [00:23<00:03, 60.95it/s]
1950
  90%|█████████ | 1651/1826 [00:23<00:02, 65.46it/s]
1951
  91%|█████████ | 1658/1826 [00:23<00:02, 64.85it/s]
1952
  91%|█████████ | 1665/1826 [00:24<00:02, 65.01it/s]
1953
  92%|█████████▏| 1673/1826 [00:24<00:02, 68.49it/s]
1954
  92%|█████████▏| 1681/1826 [00:24<00:02, 70.31it/s]
1955
  93%|█████████▎| 1690/1826 [00:24<00:01, 73.05it/s]
1956
  93%|█████████▎| 1698/1826 [00:24<00:02, 63.59it/s]
1957
  93%|█████████▎| 1706/1826 [00:24<00:01, 65.52it/s]
1958
  94%|█████████▍| 1714/1826 [00:24<00:01, 68.18it/s]
1959
  94%|█████████▍| 1721/1826 [00:24<00:01, 61.69it/s]
1960
  95%|█████████▍| 1729/1826 [00:24<00:01, 65.81it/s]
1961
  95%|█████████▌| 1737/1826 [00:25<00:01, 67.43it/s]
1962
  96%|█████████▌| 1746/1826 [00:25<00:01, 71.34it/s]
1963
  96%|█████████▌| 1754/1826 [00:25<00:01, 69.71it/s]
1964
  96%|█████████▋| 1762/1826 [00:25<00:00, 68.18it/s]
1965
  97%|█████████▋| 1769/1826 [00:25<00:00, 66.79it/s]
1966
  97%|█████████▋| 1776/1826 [00:25<00:00, 65.37it/s]
1967
  98%|█████████▊| 1784/1826 [00:25<00:00, 69.28it/s]
1968
  98%|█████████▊| 1791/1826 [00:25<00:00, 64.98it/s]
1969
  98%|█████████▊| 1798/1826 [00:26<00:00, 60.14it/s]
1970
  99%|█████████▉| 1805/1826 [00:26<00:00, 62.25it/s]
1971
  99%|█████████▉| 1813/1826 [00:26<00:00, 66.07it/s]
1972
+ [INFO|trainer.py:3503] 2024-09-09 16:19:03,621 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
1973
+ [INFO|configuration_utils.py:472] 2024-09-09 16:19:03,622 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
1974
+ [INFO|modeling_utils.py:2799] 2024-09-09 16:19:04,830 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
1975
+ [INFO|tokenization_utils_base.py:2684] 2024-09-09 16:19:04,831 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
1976
+ [INFO|tokenization_utils_base.py:2693] 2024-09-09 16:19:04,832 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
1977
+ ***** predict metrics *****
1978
+ predict_accuracy = 0.9982
1979
+ predict_f1 = 0.8965
1980
+ predict_loss = 0.0127
1981
+ predict_precision = 0.8773
1982
+ predict_recall = 0.9166
1983
+ predict_runtime = 0:00:33.03
1984
+ predict_samples_per_second = 442.053
1985
+ predict_steps_per_second = 55.268
1986
+
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 9.988518943742825,
3
- "total_flos": 1.1084127968547612e+16,
4
- "train_loss": 0.0030981004957495064,
5
- "train_runtime": 1801.8371,
6
- "train_samples": 27841,
7
- "train_samples_per_second": 154.515,
8
- "train_steps_per_second": 2.414
9
  }
 
1
  {
2
+ "epoch": 9.989701338825952,
3
+ "total_flos": 1.7736053837017554e+16,
4
+ "train_loss": 0.002068036902580679,
5
+ "train_runtime": 2701.3395,
6
+ "train_samples": 31053,
7
+ "train_samples_per_second": 114.954,
8
+ "train_steps_per_second": 1.795
9
  }
trainer_state.json CHANGED
@@ -1,201 +1,208 @@
1
  {
2
- "best_metric": 0.9284386617100372,
3
- "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-4350",
4
- "epoch": 9.988518943742825,
5
  "eval_steps": 500,
6
- "global_step": 4350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.9988518943742825,
13
- "eval_accuracy": 0.9981371919738788,
14
- "eval_f1": 0.895427795382526,
15
- "eval_loss": 0.006041484884917736,
16
- "eval_precision": 0.8705985915492958,
17
- "eval_recall": 0.9217148182665424,
18
- "eval_runtime": 15.0476,
19
- "eval_samples_per_second": 461.603,
20
- "eval_steps_per_second": 57.75,
21
- "step": 435
22
- },
23
- {
24
- "epoch": 1.148105625717566,
25
- "grad_norm": 0.025844832882285118,
26
- "learning_rate": 4.4252873563218394e-05,
27
- "loss": 0.0156,
28
  "step": 500
29
  },
30
  {
31
  "epoch": 2.0,
32
- "eval_accuracy": 0.9984232411464292,
33
- "eval_f1": 0.9032855159648312,
34
- "eval_loss": 0.004733391106128693,
35
- "eval_precision": 0.8970588235294118,
36
- "eval_recall": 0.9095992544268406,
37
- "eval_runtime": 15.0374,
38
- "eval_samples_per_second": 461.915,
39
- "eval_steps_per_second": 57.789,
40
- "step": 871
41
- },
42
- {
43
- "epoch": 2.296211251435132,
44
- "grad_norm": 0.013152423314750195,
45
- "learning_rate": 3.850574712643678e-05,
46
- "loss": 0.0038,
47
  "step": 1000
48
  },
49
  {
50
- "epoch": 2.9988518943742823,
51
- "eval_accuracy": 0.9985348700918149,
52
- "eval_f1": 0.8998178506375226,
53
- "eval_loss": 0.004836090374737978,
54
- "eval_precision": 0.8797862867319679,
55
- "eval_recall": 0.9207828518173345,
56
- "eval_runtime": 15.0312,
57
- "eval_samples_per_second": 462.105,
58
- "eval_steps_per_second": 57.813,
59
- "step": 1306
60
  },
61
  {
62
- "epoch": 3.444316877152698,
63
- "grad_norm": 0.13433514535427094,
64
- "learning_rate": 3.275862068965517e-05,
65
- "loss": 0.0026,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 4.0,
70
- "eval_accuracy": 0.9986464990372004,
71
- "eval_f1": 0.9222988505747127,
72
- "eval_loss": 0.004586596507579088,
73
- "eval_precision": 0.9101633393829401,
74
- "eval_recall": 0.934762348555452,
75
- "eval_runtime": 15.3559,
76
- "eval_samples_per_second": 452.335,
77
- "eval_steps_per_second": 56.591,
78
- "step": 1742
79
- },
80
- {
81
- "epoch": 4.592422502870264,
82
- "grad_norm": 0.04434504359960556,
83
- "learning_rate": 2.7011494252873566e-05,
84
- "loss": 0.0016,
85
  "step": 2000
86
  },
87
  {
88
- "epoch": 4.998851894374282,
89
- "eval_accuracy": 0.9986395222281137,
90
- "eval_f1": 0.9255269320843091,
91
- "eval_loss": 0.005914350505918264,
92
- "eval_precision": 0.9303201506591338,
93
- "eval_recall": 0.9207828518173345,
94
- "eval_runtime": 15.1878,
95
- "eval_samples_per_second": 457.342,
96
- "eval_steps_per_second": 57.217,
97
- "step": 2177
98
  },
99
  {
100
- "epoch": 5.7405281285878305,
101
- "grad_norm": 0.004046172834932804,
102
- "learning_rate": 2.1264367816091954e-05,
103
- "loss": 0.0012,
104
  "step": 2500
105
  },
106
  {
107
  "epoch": 6.0,
108
- "eval_accuracy": 0.9986883598917199,
109
- "eval_f1": 0.9282385834109972,
110
- "eval_loss": 0.005369984544813633,
111
- "eval_precision": 0.9282385834109972,
112
- "eval_recall": 0.9282385834109972,
113
- "eval_runtime": 15.0665,
114
- "eval_samples_per_second": 461.023,
115
- "eval_steps_per_second": 57.678,
116
- "step": 2613
117
- },
118
- {
119
- "epoch": 6.888633754305396,
120
- "grad_norm": 0.20072227716445923,
121
- "learning_rate": 1.5517241379310346e-05,
122
- "loss": 0.001,
123
  "step": 3000
124
  },
125
  {
126
- "epoch": 6.998851894374282,
127
- "eval_accuracy": 0.9983325426283035,
128
- "eval_f1": 0.9087640449438201,
129
- "eval_loss": 0.006764214485883713,
130
- "eval_precision": 0.8776041666666666,
131
- "eval_recall": 0.9422180801491147,
132
- "eval_runtime": 15.0682,
133
- "eval_samples_per_second": 460.97,
134
- "eval_steps_per_second": 57.671,
135
- "step": 3048
136
  },
137
  {
138
- "epoch": 8.0,
139
- "eval_accuracy": 0.9986744062735468,
140
- "eval_f1": 0.9283720930232559,
141
- "eval_loss": 0.0063699171878397465,
142
- "eval_precision": 0.9266480965645311,
143
- "eval_recall": 0.9301025163094129,
144
- "eval_runtime": 15.0386,
145
- "eval_samples_per_second": 461.877,
146
- "eval_steps_per_second": 57.785,
147
- "step": 3484
148
- },
149
- {
150
- "epoch": 8.036739380022961,
151
- "grad_norm": 0.0006512438994832337,
152
- "learning_rate": 9.770114942528738e-06,
153
- "loss": 0.0006,
154
  "step": 3500
155
  },
156
  {
157
- "epoch": 8.998851894374283,
158
- "eval_accuracy": 0.9987441743644127,
159
- "eval_f1": 0.928273947246645,
160
- "eval_loss": 0.006663947831839323,
161
- "eval_precision": 0.921875,
162
- "eval_recall": 0.934762348555452,
163
- "eval_runtime": 15.3138,
164
- "eval_samples_per_second": 453.576,
165
- "eval_steps_per_second": 56.746,
166
- "step": 3919
167
- },
168
- {
169
- "epoch": 9.184845005740527,
170
- "grad_norm": 0.007138502784073353,
171
- "learning_rate": 4.022988505747127e-06,
172
- "loss": 0.0004,
173
  "step": 4000
174
  },
175
  {
176
- "epoch": 9.988518943742825,
177
- "eval_accuracy": 0.9986674294644602,
178
- "eval_f1": 0.9284386617100372,
179
- "eval_loss": 0.0068497806787490845,
180
- "eval_precision": 0.9258572752548656,
181
- "eval_recall": 0.9310344827586207,
182
- "eval_runtime": 15.1753,
183
- "eval_samples_per_second": 457.717,
184
- "eval_steps_per_second": 57.264,
185
- "step": 4350
186
- },
187
- {
188
- "epoch": 9.988518943742825,
189
- "step": 4350,
190
- "total_flos": 1.1084127968547612e+16,
191
- "train_loss": 0.0030981004957495064,
192
- "train_runtime": 1801.8371,
193
- "train_samples_per_second": 154.515,
194
- "train_steps_per_second": 2.414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  }
196
  ],
197
  "logging_steps": 500,
198
- "max_steps": 4350,
199
  "num_input_tokens_seen": 0,
200
  "num_train_epochs": 10,
201
  "save_steps": 500,
@@ -211,7 +218,7 @@
211
  "attributes": {}
212
  }
213
  },
214
- "total_flos": 1.1084127968547612e+16,
215
  "train_batch_size": 32,
216
  "trial_name": null,
217
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9230769230769231,
3
+ "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-4850",
4
+ "epoch": 9.989701338825952,
5
  "eval_steps": 500,
6
+ "global_step": 4850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.9989701338825953,
13
+ "eval_accuracy": 0.9984030122046221,
14
+ "eval_f1": 0.9132331555986428,
15
+ "eval_loss": 0.004271956626325846,
16
+ "eval_precision": 0.9145631067961165,
17
+ "eval_recall": 0.9119070667957405,
18
+ "eval_runtime": 17.5218,
19
+ "eval_samples_per_second": 387.973,
20
+ "eval_steps_per_second": 48.511,
21
+ "step": 485
22
+ },
23
+ {
24
+ "epoch": 1.0298661174047374,
25
+ "grad_norm": 0.35554030537605286,
26
+ "learning_rate": 4.484536082474227e-05,
27
+ "loss": 0.0127,
28
  "step": 500
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "eval_accuracy": 0.9984679304076863,
33
+ "eval_f1": 0.9130645944633317,
34
+ "eval_loss": 0.005422212183475494,
35
+ "eval_precision": 0.9161793372319688,
36
+ "eval_recall": 0.9099709583736689,
37
+ "eval_runtime": 17.524,
38
+ "eval_samples_per_second": 387.925,
39
+ "eval_steps_per_second": 48.505,
40
+ "step": 971
41
+ },
42
+ {
43
+ "epoch": 2.059732234809475,
44
+ "grad_norm": 0.2354772835969925,
45
+ "learning_rate": 3.9690721649484535e-05,
46
+ "loss": 0.0033,
47
  "step": 1000
48
  },
49
  {
50
+ "epoch": 2.998970133882595,
51
+ "eval_accuracy": 0.9985912749935082,
52
+ "eval_f1": 0.9173955296404277,
53
+ "eval_loss": 0.0059292372316122055,
54
+ "eval_precision": 0.9209756097560976,
55
+ "eval_recall": 0.9138431752178122,
56
+ "eval_runtime": 17.4807,
57
+ "eval_samples_per_second": 388.886,
58
+ "eval_steps_per_second": 48.625,
59
+ "step": 1456
60
  },
61
  {
62
+ "epoch": 3.089598352214212,
63
+ "grad_norm": 0.026833873242139816,
64
+ "learning_rate": 3.4536082474226805e-05,
65
+ "loss": 0.0016,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "eval_accuracy": 0.9985523240716697,
71
+ "eval_f1": 0.9135212613473483,
72
+ "eval_loss": 0.006239830516278744,
73
+ "eval_precision": 0.9018867924528302,
74
+ "eval_recall": 0.925459825750242,
75
+ "eval_runtime": 17.8043,
76
+ "eval_samples_per_second": 381.818,
77
+ "eval_steps_per_second": 47.741,
78
+ "step": 1942
79
+ },
80
+ {
81
+ "epoch": 4.11946446961895,
82
+ "grad_norm": 0.1513855904340744,
83
+ "learning_rate": 2.9381443298969075e-05,
84
+ "loss": 0.001,
85
  "step": 2000
86
  },
87
  {
88
+ "epoch": 4.998970133882596,
89
+ "eval_accuracy": 0.9986042586341211,
90
+ "eval_f1": 0.920328343795268,
91
+ "eval_loss": 0.006359434220939875,
92
+ "eval_precision": 0.918111753371869,
93
+ "eval_recall": 0.9225556631171346,
94
+ "eval_runtime": 17.4628,
95
+ "eval_samples_per_second": 389.284,
96
+ "eval_steps_per_second": 48.675,
97
+ "step": 2427
98
  },
99
  {
100
+ "epoch": 5.1493305870236865,
101
+ "grad_norm": 0.007741741370409727,
102
+ "learning_rate": 2.422680412371134e-05,
103
+ "loss": 0.0006,
104
  "step": 2500
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "eval_accuracy": 0.9985523240716697,
109
+ "eval_f1": 0.9180009704027171,
110
+ "eval_loss": 0.00812861043959856,
111
+ "eval_precision": 0.9202334630350194,
112
+ "eval_recall": 0.9157792836398838,
113
+ "eval_runtime": 17.5273,
114
+ "eval_samples_per_second": 387.852,
115
+ "eval_steps_per_second": 48.496,
116
+ "step": 2913
117
+ },
118
+ {
119
+ "epoch": 6.179196704428424,
120
+ "grad_norm": 0.06708419322967529,
121
+ "learning_rate": 1.9072164948453608e-05,
122
+ "loss": 0.0004,
123
  "step": 3000
124
  },
125
  {
126
+ "epoch": 6.998970133882596,
127
+ "eval_accuracy": 0.9985653077122826,
128
+ "eval_f1": 0.920328343795268,
129
+ "eval_loss": 0.008457792922854424,
130
+ "eval_precision": 0.918111753371869,
131
+ "eval_recall": 0.9225556631171346,
132
+ "eval_runtime": 17.4726,
133
+ "eval_samples_per_second": 389.067,
134
+ "eval_steps_per_second": 48.648,
135
+ "step": 3398
136
  },
137
  {
138
+ "epoch": 7.209062821833162,
139
+ "grad_norm": 0.0032746351789683104,
140
+ "learning_rate": 1.3917525773195878e-05,
141
+ "loss": 0.0003,
 
 
 
 
 
 
 
 
 
 
 
 
142
  "step": 3500
143
  },
144
  {
145
+ "epoch": 8.0,
146
+ "eval_accuracy": 0.9985847831732018,
147
+ "eval_f1": 0.9210653753026635,
148
+ "eval_loss": 0.00870645884424448,
149
+ "eval_precision": 0.9215116279069767,
150
+ "eval_recall": 0.920619554695063,
151
+ "eval_runtime": 17.5192,
152
+ "eval_samples_per_second": 388.031,
153
+ "eval_steps_per_second": 48.518,
154
+ "step": 3884
155
+ },
156
+ {
157
+ "epoch": 8.2389289392379,
158
+ "grad_norm": 0.0013693332439288497,
159
+ "learning_rate": 8.762886597938144e-06,
160
+ "loss": 0.0002,
161
  "step": 4000
162
  },
163
  {
164
+ "epoch": 8.998970133882596,
165
+ "eval_accuracy": 0.9985198649701377,
166
+ "eval_f1": 0.9206500956022945,
167
+ "eval_loss": 0.008967469446361065,
168
+ "eval_precision": 0.9093484419263456,
169
+ "eval_recall": 0.9322362052274927,
170
+ "eval_runtime": 17.7573,
171
+ "eval_samples_per_second": 382.828,
172
+ "eval_steps_per_second": 47.868,
173
+ "step": 4369
174
+ },
175
+ {
176
+ "epoch": 9.268795056642636,
177
+ "grad_norm": 0.0005869244341738522,
178
+ "learning_rate": 3.608247422680412e-06,
179
+ "loss": 0.0001,
180
+ "step": 4500
181
+ },
182
+ {
183
+ "epoch": 9.989701338825952,
184
+ "eval_accuracy": 0.9986302259153467,
185
+ "eval_f1": 0.9230769230769231,
186
+ "eval_loss": 0.009366312995553017,
187
+ "eval_precision": 0.9169054441260746,
188
+ "eval_recall": 0.9293320425943853,
189
+ "eval_runtime": 17.4762,
190
+ "eval_samples_per_second": 388.987,
191
+ "eval_steps_per_second": 48.638,
192
+ "step": 4850
193
+ },
194
+ {
195
+ "epoch": 9.989701338825952,
196
+ "step": 4850,
197
+ "total_flos": 1.7736053837017554e+16,
198
+ "train_loss": 0.002068036902580679,
199
+ "train_runtime": 2701.3395,
200
+ "train_samples_per_second": 114.954,
201
+ "train_steps_per_second": 1.795
202
  }
203
  ],
204
  "logging_steps": 500,
205
+ "max_steps": 4850,
206
  "num_input_tokens_seen": 0,
207
  "num_train_epochs": 10,
208
  "save_steps": 500,
 
218
  "attributes": {}
219
  }
220
  },
221
+ "total_flos": 1.7736053837017554e+16,
222
  "train_batch_size": 32,
223
  "trial_name": null,
224
  "trial_params": null