joneauxedgar commited on
Commit
fa8f64f
·
verified ·
1 Parent(s): c3c5153

upload pii model

Browse files
checkpoint-30000/config.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ModernBertForTokenClassification"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "classifier_activation": "gelu",
9
+ "classifier_bias": false,
10
+ "classifier_dropout": 0.0,
11
+ "classifier_pooling": "mean",
12
+ "cls_token_id": 50281,
13
+ "decoder_bias": true,
14
+ "deterministic_flash_attn": false,
15
+ "dtype": "float32",
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": null,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 768,
23
+ "id2label": {
24
+ "0": "O",
25
+ "1": "B-ACCOUNTNUM",
26
+ "2": "I-ACCOUNTNUM",
27
+ "3": "B-BUILDINGNUM",
28
+ "4": "I-BUILDINGNUM",
29
+ "5": "B-CITY",
30
+ "6": "I-CITY",
31
+ "7": "B-CREDITCARDNUMBER",
32
+ "8": "I-CREDITCARDNUMBER",
33
+ "9": "B-DATEOFBIRTH",
34
+ "10": "I-DATEOFBIRTH",
35
+ "11": "B-DRIVERLICENSENUM",
36
+ "12": "I-DRIVERLICENSENUM",
37
+ "13": "B-EMAIL",
38
+ "14": "I-EMAIL",
39
+ "15": "B-GIVENNAME",
40
+ "16": "I-GIVENNAME",
41
+ "17": "B-IDCARDNUM",
42
+ "18": "I-IDCARDNUM",
43
+ "19": "B-PASSWORD",
44
+ "20": "I-PASSWORD",
45
+ "21": "B-SOCIALNUM",
46
+ "22": "I-SOCIALNUM",
47
+ "23": "B-STREET",
48
+ "24": "I-STREET",
49
+ "25": "B-SURNAME",
50
+ "26": "I-SURNAME",
51
+ "27": "B-TAXNUM",
52
+ "28": "I-TAXNUM",
53
+ "29": "B-TELEPHONENUM",
54
+ "30": "I-TELEPHONENUM",
55
+ "31": "B-USERNAME",
56
+ "32": "I-USERNAME",
57
+ "33": "B-ZIPCODE",
58
+ "34": "I-ZIPCODE"
59
+ },
60
+ "initializer_cutoff_factor": 2.0,
61
+ "initializer_range": 0.02,
62
+ "intermediate_size": 1152,
63
+ "label2id": {
64
+ "B-ACCOUNTNUM": 1,
65
+ "B-BUILDINGNUM": 3,
66
+ "B-CITY": 5,
67
+ "B-CREDITCARDNUMBER": 7,
68
+ "B-DATEOFBIRTH": 9,
69
+ "B-DRIVERLICENSENUM": 11,
70
+ "B-EMAIL": 13,
71
+ "B-GIVENNAME": 15,
72
+ "B-IDCARDNUM": 17,
73
+ "B-PASSWORD": 19,
74
+ "B-SOCIALNUM": 21,
75
+ "B-STREET": 23,
76
+ "B-SURNAME": 25,
77
+ "B-TAXNUM": 27,
78
+ "B-TELEPHONENUM": 29,
79
+ "B-USERNAME": 31,
80
+ "B-ZIPCODE": 33,
81
+ "I-ACCOUNTNUM": 2,
82
+ "I-BUILDINGNUM": 4,
83
+ "I-CITY": 6,
84
+ "I-CREDITCARDNUMBER": 8,
85
+ "I-DATEOFBIRTH": 10,
86
+ "I-DRIVERLICENSENUM": 12,
87
+ "I-EMAIL": 14,
88
+ "I-GIVENNAME": 16,
89
+ "I-IDCARDNUM": 18,
90
+ "I-PASSWORD": 20,
91
+ "I-SOCIALNUM": 22,
92
+ "I-STREET": 24,
93
+ "I-SURNAME": 26,
94
+ "I-TAXNUM": 28,
95
+ "I-TELEPHONENUM": 30,
96
+ "I-USERNAME": 32,
97
+ "I-ZIPCODE": 34,
98
+ "O": 0
99
+ },
100
+ "layer_norm_eps": 1e-05,
101
+ "local_attention": 128,
102
+ "local_rope_theta": 10000.0,
103
+ "max_position_embeddings": 8192,
104
+ "mlp_bias": false,
105
+ "mlp_dropout": 0.0,
106
+ "model_type": "modernbert",
107
+ "norm_bias": false,
108
+ "norm_eps": 1e-05,
109
+ "num_attention_heads": 12,
110
+ "num_hidden_layers": 22,
111
+ "pad_token_id": 50283,
112
+ "position_embedding_type": "absolute",
113
+ "repad_logits_with_grad": false,
114
+ "sep_token_id": 50282,
115
+ "sparse_pred_ignore_index": -100,
116
+ "sparse_prediction": false,
117
+ "transformers_version": "4.57.3",
118
+ "vocab_size": 50368
119
+ }
checkpoint-30000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:801f79e7fee96cd8a2a58809cdb5d8841a8812d4de65368a9186e0c86a249515
3
+ size 598541300
checkpoint-30000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6823c5a7dc57341bd6aa30e179856f6d6ae31f7f743c37a9112de847d46f81c2
3
+ size 1197172811
checkpoint-30000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9445552595536daf5bd8731be4eabb308bd26e76a3f4f0c20c4aa55fcf9ea202
3
+ size 14645
checkpoint-30000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f60ad25cf3adf322a66114b0f30428835f3ff6a4254f11ffb365e95da9a3ecb1
3
+ size 1383
checkpoint-30000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b80b44612112b0efd15906b6c2a54bd738858bc1a2362297094c97b34a2435c
3
+ size 1465
checkpoint-30000/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": true,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-30000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-30000/tokenizer_config.json ADDED
@@ -0,0 +1,945 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "|||IP_ADDRESS|||",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "1": {
12
+ "content": "<|padding|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "50254": {
20
+ "content": " ",
21
+ "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "50255": {
28
+ "content": " ",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "50256": {
36
+ "content": " ",
37
+ "lstrip": false,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": false
42
+ },
43
+ "50257": {
44
+ "content": " ",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "50258": {
52
+ "content": " ",
53
+ "lstrip": false,
54
+ "normalized": true,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "50259": {
60
+ "content": " ",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": false
66
+ },
67
+ "50260": {
68
+ "content": " ",
69
+ "lstrip": false,
70
+ "normalized": true,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "50261": {
76
+ "content": " ",
77
+ "lstrip": false,
78
+ "normalized": true,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "50262": {
84
+ "content": " ",
85
+ "lstrip": false,
86
+ "normalized": true,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": false
90
+ },
91
+ "50263": {
92
+ "content": " ",
93
+ "lstrip": false,
94
+ "normalized": true,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
+ "50264": {
100
+ "content": " ",
101
+ "lstrip": false,
102
+ "normalized": true,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": false
106
+ },
107
+ "50265": {
108
+ "content": " ",
109
+ "lstrip": false,
110
+ "normalized": true,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "50266": {
116
+ "content": " ",
117
+ "lstrip": false,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": false
122
+ },
123
+ "50267": {
124
+ "content": " ",
125
+ "lstrip": false,
126
+ "normalized": true,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "50268": {
132
+ "content": " ",
133
+ "lstrip": false,
134
+ "normalized": true,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "50269": {
140
+ "content": " ",
141
+ "lstrip": false,
142
+ "normalized": true,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "50270": {
148
+ "content": " ",
149
+ "lstrip": false,
150
+ "normalized": true,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
+ },
155
+ "50271": {
156
+ "content": " ",
157
+ "lstrip": false,
158
+ "normalized": true,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": false
162
+ },
163
+ "50272": {
164
+ "content": " ",
165
+ "lstrip": false,
166
+ "normalized": true,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": false
170
+ },
171
+ "50273": {
172
+ "content": " ",
173
+ "lstrip": false,
174
+ "normalized": true,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": false
178
+ },
179
+ "50274": {
180
+ "content": " ",
181
+ "lstrip": false,
182
+ "normalized": true,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": false
186
+ },
187
+ "50275": {
188
+ "content": " ",
189
+ "lstrip": false,
190
+ "normalized": true,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": false
194
+ },
195
+ "50276": {
196
+ "content": " ",
197
+ "lstrip": false,
198
+ "normalized": true,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": false
202
+ },
203
+ "50277": {
204
+ "content": "|||EMAIL_ADDRESS|||",
205
+ "lstrip": false,
206
+ "normalized": true,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": false
210
+ },
211
+ "50278": {
212
+ "content": "|||PHONE_NUMBER|||",
213
+ "lstrip": false,
214
+ "normalized": true,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": false
218
+ },
219
+ "50279": {
220
+ "content": "<|endoftext|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "50280": {
228
+ "content": "[UNK]",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "50281": {
236
+ "content": "[CLS]",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "50282": {
244
+ "content": "[SEP]",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "50283": {
252
+ "content": "[PAD]",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "50284": {
260
+ "content": "[MASK]",
261
+ "lstrip": true,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "50285": {
268
+ "content": "[unused0]",
269
+ "lstrip": false,
270
+ "normalized": true,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": false
274
+ },
275
+ "50286": {
276
+ "content": "[unused1]",
277
+ "lstrip": false,
278
+ "normalized": true,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": false
282
+ },
283
+ "50287": {
284
+ "content": "[unused2]",
285
+ "lstrip": false,
286
+ "normalized": true,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": false
290
+ },
291
+ "50288": {
292
+ "content": "[unused3]",
293
+ "lstrip": false,
294
+ "normalized": true,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": false
298
+ },
299
+ "50289": {
300
+ "content": "[unused4]",
301
+ "lstrip": false,
302
+ "normalized": true,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": false
306
+ },
307
+ "50290": {
308
+ "content": "[unused5]",
309
+ "lstrip": false,
310
+ "normalized": true,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": false
314
+ },
315
+ "50291": {
316
+ "content": "[unused6]",
317
+ "lstrip": false,
318
+ "normalized": true,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": false
322
+ },
323
+ "50292": {
324
+ "content": "[unused7]",
325
+ "lstrip": false,
326
+ "normalized": true,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": false
330
+ },
331
+ "50293": {
332
+ "content": "[unused8]",
333
+ "lstrip": false,
334
+ "normalized": true,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": false
338
+ },
339
+ "50294": {
340
+ "content": "[unused9]",
341
+ "lstrip": false,
342
+ "normalized": true,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": false
346
+ },
347
+ "50295": {
348
+ "content": "[unused10]",
349
+ "lstrip": false,
350
+ "normalized": true,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": false
354
+ },
355
+ "50296": {
356
+ "content": "[unused11]",
357
+ "lstrip": false,
358
+ "normalized": true,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": false
362
+ },
363
+ "50297": {
364
+ "content": "[unused12]",
365
+ "lstrip": false,
366
+ "normalized": true,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": false
370
+ },
371
+ "50298": {
372
+ "content": "[unused13]",
373
+ "lstrip": false,
374
+ "normalized": true,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": false
378
+ },
379
+ "50299": {
380
+ "content": "[unused14]",
381
+ "lstrip": false,
382
+ "normalized": true,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": false
386
+ },
387
+ "50300": {
388
+ "content": "[unused15]",
389
+ "lstrip": false,
390
+ "normalized": true,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": false
394
+ },
395
+ "50301": {
396
+ "content": "[unused16]",
397
+ "lstrip": false,
398
+ "normalized": true,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": false
402
+ },
403
+ "50302": {
404
+ "content": "[unused17]",
405
+ "lstrip": false,
406
+ "normalized": true,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": false
410
+ },
411
+ "50303": {
412
+ "content": "[unused18]",
413
+ "lstrip": false,
414
+ "normalized": true,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": false
418
+ },
419
+ "50304": {
420
+ "content": "[unused19]",
421
+ "lstrip": false,
422
+ "normalized": true,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": false
426
+ },
427
+ "50305": {
428
+ "content": "[unused20]",
429
+ "lstrip": false,
430
+ "normalized": true,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": false
434
+ },
435
+ "50306": {
436
+ "content": "[unused21]",
437
+ "lstrip": false,
438
+ "normalized": true,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": false
442
+ },
443
+ "50307": {
444
+ "content": "[unused22]",
445
+ "lstrip": false,
446
+ "normalized": true,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": false
450
+ },
451
+ "50308": {
452
+ "content": "[unused23]",
453
+ "lstrip": false,
454
+ "normalized": true,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": false
458
+ },
459
+ "50309": {
460
+ "content": "[unused24]",
461
+ "lstrip": false,
462
+ "normalized": true,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": false
466
+ },
467
+ "50310": {
468
+ "content": "[unused25]",
469
+ "lstrip": false,
470
+ "normalized": true,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": false
474
+ },
475
+ "50311": {
476
+ "content": "[unused26]",
477
+ "lstrip": false,
478
+ "normalized": true,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": false
482
+ },
483
+ "50312": {
484
+ "content": "[unused27]",
485
+ "lstrip": false,
486
+ "normalized": true,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": false
490
+ },
491
+ "50313": {
492
+ "content": "[unused28]",
493
+ "lstrip": false,
494
+ "normalized": true,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": false
498
+ },
499
+ "50314": {
500
+ "content": "[unused29]",
501
+ "lstrip": false,
502
+ "normalized": true,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": false
506
+ },
507
+ "50315": {
508
+ "content": "[unused30]",
509
+ "lstrip": false,
510
+ "normalized": true,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": false
514
+ },
515
+ "50316": {
516
+ "content": "[unused31]",
517
+ "lstrip": false,
518
+ "normalized": true,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": false
522
+ },
523
+ "50317": {
524
+ "content": "[unused32]",
525
+ "lstrip": false,
526
+ "normalized": true,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": false
530
+ },
531
+ "50318": {
532
+ "content": "[unused33]",
533
+ "lstrip": false,
534
+ "normalized": true,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": false
538
+ },
539
+ "50319": {
540
+ "content": "[unused34]",
541
+ "lstrip": false,
542
+ "normalized": true,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": false
546
+ },
547
+ "50320": {
548
+ "content": "[unused35]",
549
+ "lstrip": false,
550
+ "normalized": true,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": false
554
+ },
555
+ "50321": {
556
+ "content": "[unused36]",
557
+ "lstrip": false,
558
+ "normalized": true,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": false
562
+ },
563
+ "50322": {
564
+ "content": "[unused37]",
565
+ "lstrip": false,
566
+ "normalized": true,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": false
570
+ },
571
+ "50323": {
572
+ "content": "[unused38]",
573
+ "lstrip": false,
574
+ "normalized": true,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": false
578
+ },
579
+ "50324": {
580
+ "content": "[unused39]",
581
+ "lstrip": false,
582
+ "normalized": true,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": false
586
+ },
587
+ "50325": {
588
+ "content": "[unused40]",
589
+ "lstrip": false,
590
+ "normalized": true,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": false
594
+ },
595
+ "50326": {
596
+ "content": "[unused41]",
597
+ "lstrip": false,
598
+ "normalized": true,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": false
602
+ },
603
+ "50327": {
604
+ "content": "[unused42]",
605
+ "lstrip": false,
606
+ "normalized": true,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": false
610
+ },
611
+ "50328": {
612
+ "content": "[unused43]",
613
+ "lstrip": false,
614
+ "normalized": true,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": false
618
+ },
619
+ "50329": {
620
+ "content": "[unused44]",
621
+ "lstrip": false,
622
+ "normalized": true,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": false
626
+ },
627
+ "50330": {
628
+ "content": "[unused45]",
629
+ "lstrip": false,
630
+ "normalized": true,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": false
634
+ },
635
+ "50331": {
636
+ "content": "[unused46]",
637
+ "lstrip": false,
638
+ "normalized": true,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": false
642
+ },
643
+ "50332": {
644
+ "content": "[unused47]",
645
+ "lstrip": false,
646
+ "normalized": true,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": false
650
+ },
651
+ "50333": {
652
+ "content": "[unused48]",
653
+ "lstrip": false,
654
+ "normalized": true,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": false
658
+ },
659
+ "50334": {
660
+ "content": "[unused49]",
661
+ "lstrip": false,
662
+ "normalized": true,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": false
666
+ },
667
+ "50335": {
668
+ "content": "[unused50]",
669
+ "lstrip": false,
670
+ "normalized": true,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": false
674
+ },
675
+ "50336": {
676
+ "content": "[unused51]",
677
+ "lstrip": false,
678
+ "normalized": true,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": false
682
+ },
683
+ "50337": {
684
+ "content": "[unused52]",
685
+ "lstrip": false,
686
+ "normalized": true,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": false
690
+ },
691
+ "50338": {
692
+ "content": "[unused53]",
693
+ "lstrip": false,
694
+ "normalized": true,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": false
698
+ },
699
+ "50339": {
700
+ "content": "[unused54]",
701
+ "lstrip": false,
702
+ "normalized": true,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": false
706
+ },
707
+ "50340": {
708
+ "content": "[unused55]",
709
+ "lstrip": false,
710
+ "normalized": true,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": false
714
+ },
715
+ "50341": {
716
+ "content": "[unused56]",
717
+ "lstrip": false,
718
+ "normalized": true,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": false
722
+ },
723
+ "50342": {
724
+ "content": "[unused57]",
725
+ "lstrip": false,
726
+ "normalized": true,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": false
730
+ },
731
+ "50343": {
732
+ "content": "[unused58]",
733
+ "lstrip": false,
734
+ "normalized": true,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": false
738
+ },
739
+ "50344": {
740
+ "content": "[unused59]",
741
+ "lstrip": false,
742
+ "normalized": true,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": false
746
+ },
747
+ "50345": {
748
+ "content": "[unused60]",
749
+ "lstrip": false,
750
+ "normalized": true,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": false
754
+ },
755
+ "50346": {
756
+ "content": "[unused61]",
757
+ "lstrip": false,
758
+ "normalized": true,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": false
762
+ },
763
+ "50347": {
764
+ "content": "[unused62]",
765
+ "lstrip": false,
766
+ "normalized": true,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": false
770
+ },
771
+ "50348": {
772
+ "content": "[unused63]",
773
+ "lstrip": false,
774
+ "normalized": true,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": false
778
+ },
779
+ "50349": {
780
+ "content": "[unused64]",
781
+ "lstrip": false,
782
+ "normalized": true,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": false
786
+ },
787
+ "50350": {
788
+ "content": "[unused65]",
789
+ "lstrip": false,
790
+ "normalized": true,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": false
794
+ },
795
+ "50351": {
796
+ "content": "[unused66]",
797
+ "lstrip": false,
798
+ "normalized": true,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": false
802
+ },
803
+ "50352": {
804
+ "content": "[unused67]",
805
+ "lstrip": false,
806
+ "normalized": true,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": false
810
+ },
811
+ "50353": {
812
+ "content": "[unused68]",
813
+ "lstrip": false,
814
+ "normalized": true,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": false
818
+ },
819
+ "50354": {
820
+ "content": "[unused69]",
821
+ "lstrip": false,
822
+ "normalized": true,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": false
826
+ },
827
+ "50355": {
828
+ "content": "[unused70]",
829
+ "lstrip": false,
830
+ "normalized": true,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": false
834
+ },
835
+ "50356": {
836
+ "content": "[unused71]",
837
+ "lstrip": false,
838
+ "normalized": true,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": false
842
+ },
843
+ "50357": {
844
+ "content": "[unused72]",
845
+ "lstrip": false,
846
+ "normalized": true,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": false
850
+ },
851
+ "50358": {
852
+ "content": "[unused73]",
853
+ "lstrip": false,
854
+ "normalized": true,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": false
858
+ },
859
+ "50359": {
860
+ "content": "[unused74]",
861
+ "lstrip": false,
862
+ "normalized": true,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": false
866
+ },
867
+ "50360": {
868
+ "content": "[unused75]",
869
+ "lstrip": false,
870
+ "normalized": true,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": false
874
+ },
875
+ "50361": {
876
+ "content": "[unused76]",
877
+ "lstrip": false,
878
+ "normalized": true,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": false
882
+ },
883
+ "50362": {
884
+ "content": "[unused77]",
885
+ "lstrip": false,
886
+ "normalized": true,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": false
890
+ },
891
+ "50363": {
892
+ "content": "[unused78]",
893
+ "lstrip": false,
894
+ "normalized": true,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": false
898
+ },
899
+ "50364": {
900
+ "content": "[unused79]",
901
+ "lstrip": false,
902
+ "normalized": true,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": false
906
+ },
907
+ "50365": {
908
+ "content": "[unused80]",
909
+ "lstrip": false,
910
+ "normalized": true,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": false
914
+ },
915
+ "50366": {
916
+ "content": "[unused81]",
917
+ "lstrip": false,
918
+ "normalized": true,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": false
922
+ },
923
+ "50367": {
924
+ "content": "[unused82]",
925
+ "lstrip": false,
926
+ "normalized": true,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": false
930
+ }
931
+ },
932
+ "clean_up_tokenization_spaces": true,
933
+ "cls_token": "[CLS]",
934
+ "extra_special_tokens": {},
935
+ "mask_token": "[MASK]",
936
+ "model_input_names": [
937
+ "input_ids",
938
+ "attention_mask"
939
+ ],
940
+ "model_max_length": 8192,
941
+ "pad_token": "[PAD]",
942
+ "sep_token": "[SEP]",
943
+ "tokenizer_class": "PreTrainedTokenizerFast",
944
+ "unk_token": "[UNK]"
945
+ }
checkpoint-30000/trainer_state.json ADDED
@@ -0,0 +1,2299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 30000,
3
+ "best_metric": 0.919165725910112,
4
+ "best_model_checkpoint": "./pii-detector-modernbert/checkpoint-30000",
5
+ "epoch": 2.948982601002654,
6
+ "eval_steps": 2000,
7
+ "global_step": 30000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.00982994200334218,
14
+ "grad_norm": 2.1159424781799316,
15
+ "learning_rate": 1.6218872870249017e-06,
16
+ "loss": 1.968,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.01965988400668436,
21
+ "grad_norm": 0.7022971510887146,
22
+ "learning_rate": 3.2601572739187415e-06,
23
+ "loss": 0.2013,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.029489826010026542,
28
+ "grad_norm": 0.7267266511917114,
29
+ "learning_rate": 4.898427260812582e-06,
30
+ "loss": 0.1417,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.03931976801336872,
35
+ "grad_norm": 0.9718811511993408,
36
+ "learning_rate": 6.536697247706422e-06,
37
+ "loss": 0.1007,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.0491497100167109,
42
+ "grad_norm": 0.38591882586479187,
43
+ "learning_rate": 8.174967234600263e-06,
44
+ "loss": 0.0696,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.058979652020053085,
49
+ "grad_norm": 0.6200196743011475,
50
+ "learning_rate": 9.813237221494102e-06,
51
+ "loss": 0.0548,
52
+ "step": 600
53
+ },
54
+ {
55
+ "epoch": 0.06880959402339526,
56
+ "grad_norm": 0.46839049458503723,
57
+ "learning_rate": 1.1451507208387943e-05,
58
+ "loss": 0.0447,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 0.07863953602673744,
63
+ "grad_norm": 0.5770799517631531,
64
+ "learning_rate": 1.3089777195281782e-05,
65
+ "loss": 0.0415,
66
+ "step": 800
67
+ },
68
+ {
69
+ "epoch": 0.08846947803007962,
70
+ "grad_norm": 0.4535345137119293,
71
+ "learning_rate": 1.4728047182175622e-05,
72
+ "loss": 0.035,
73
+ "step": 900
74
+ },
75
+ {
76
+ "epoch": 0.0982994200334218,
77
+ "grad_norm": 0.5041764378547668,
78
+ "learning_rate": 1.6366317169069463e-05,
79
+ "loss": 0.0305,
80
+ "step": 1000
81
+ },
82
+ {
83
+ "epoch": 0.10812936203676399,
84
+ "grad_norm": 0.44773876667022705,
85
+ "learning_rate": 1.8004587155963304e-05,
86
+ "loss": 0.0289,
87
+ "step": 1100
88
+ },
89
+ {
90
+ "epoch": 0.11795930404010617,
91
+ "grad_norm": 0.235533207654953,
92
+ "learning_rate": 1.9642857142857145e-05,
93
+ "loss": 0.023,
94
+ "step": 1200
95
+ },
96
+ {
97
+ "epoch": 0.12778924604344835,
98
+ "grad_norm": 0.4770767092704773,
99
+ "learning_rate": 2.1281127129750983e-05,
100
+ "loss": 0.0243,
101
+ "step": 1300
102
+ },
103
+ {
104
+ "epoch": 0.13761918804679052,
105
+ "grad_norm": 0.3569801151752472,
106
+ "learning_rate": 2.2919397116644824e-05,
107
+ "loss": 0.0204,
108
+ "step": 1400
109
+ },
110
+ {
111
+ "epoch": 0.14744913005013272,
112
+ "grad_norm": 0.17573118209838867,
113
+ "learning_rate": 2.4557667103538665e-05,
114
+ "loss": 0.0193,
115
+ "step": 1500
116
+ },
117
+ {
118
+ "epoch": 0.15727907205347488,
119
+ "grad_norm": 0.4183891713619232,
120
+ "learning_rate": 2.6195937090432503e-05,
121
+ "loss": 0.0183,
122
+ "step": 1600
123
+ },
124
+ {
125
+ "epoch": 0.16710901405681705,
126
+ "grad_norm": 0.2571297287940979,
127
+ "learning_rate": 2.7834207077326347e-05,
128
+ "loss": 0.0161,
129
+ "step": 1700
130
+ },
131
+ {
132
+ "epoch": 0.17693895606015925,
133
+ "grad_norm": 0.20306524634361267,
134
+ "learning_rate": 2.9472477064220185e-05,
135
+ "loss": 0.0149,
136
+ "step": 1800
137
+ },
138
+ {
139
+ "epoch": 0.18676889806350142,
140
+ "grad_norm": 0.2766953110694885,
141
+ "learning_rate": 3.111074705111402e-05,
142
+ "loss": 0.0163,
143
+ "step": 1900
144
+ },
145
+ {
146
+ "epoch": 0.1965988400668436,
147
+ "grad_norm": 0.4682016372680664,
148
+ "learning_rate": 3.274901703800787e-05,
149
+ "loss": 0.0148,
150
+ "step": 2000
151
+ },
152
+ {
153
+ "epoch": 0.1965988400668436,
154
+ "eval_f1": 0.729076995389466,
155
+ "eval_loss": 0.0456073060631752,
156
+ "eval_precision": 0.6865494982046768,
157
+ "eval_recall": 0.7772210336673798,
158
+ "eval_runtime": 416.6538,
159
+ "eval_samples_per_second": 195.316,
160
+ "eval_steps_per_second": 3.053,
161
+ "step": 2000
162
+ },
163
+ {
164
+ "epoch": 0.20642878207018578,
165
+ "grad_norm": 0.26377245783805847,
166
+ "learning_rate": 3.4387287024901704e-05,
167
+ "loss": 0.015,
168
+ "step": 2100
169
+ },
170
+ {
171
+ "epoch": 0.21625872407352797,
172
+ "grad_norm": 0.20288439095020294,
173
+ "learning_rate": 3.602555701179555e-05,
174
+ "loss": 0.0138,
175
+ "step": 2200
176
+ },
177
+ {
178
+ "epoch": 0.22608866607687014,
179
+ "grad_norm": 0.3629586398601532,
180
+ "learning_rate": 3.7663826998689387e-05,
181
+ "loss": 0.0145,
182
+ "step": 2300
183
+ },
184
+ {
185
+ "epoch": 0.23591860808021234,
186
+ "grad_norm": 0.22589775919914246,
187
+ "learning_rate": 3.9302096985583224e-05,
188
+ "loss": 0.013,
189
+ "step": 2400
190
+ },
191
+ {
192
+ "epoch": 0.2457485500835545,
193
+ "grad_norm": 0.12344377487897873,
194
+ "learning_rate": 4.094036697247707e-05,
195
+ "loss": 0.0122,
196
+ "step": 2500
197
+ },
198
+ {
199
+ "epoch": 0.2555784920868967,
200
+ "grad_norm": 0.12900730967521667,
201
+ "learning_rate": 4.2578636959370906e-05,
202
+ "loss": 0.0129,
203
+ "step": 2600
204
+ },
205
+ {
206
+ "epoch": 0.26540843409023884,
207
+ "grad_norm": 0.15024515986442566,
208
+ "learning_rate": 4.4216906946264744e-05,
209
+ "loss": 0.0118,
210
+ "step": 2700
211
+ },
212
+ {
213
+ "epoch": 0.27523837609358104,
214
+ "grad_norm": 0.17616596817970276,
215
+ "learning_rate": 4.585517693315859e-05,
216
+ "loss": 0.0116,
217
+ "step": 2800
218
+ },
219
+ {
220
+ "epoch": 0.28506831809692323,
221
+ "grad_norm": 0.2715315520763397,
222
+ "learning_rate": 4.7493446920052426e-05,
223
+ "loss": 0.0128,
224
+ "step": 2900
225
+ },
226
+ {
227
+ "epoch": 0.29489826010026543,
228
+ "grad_norm": 0.14540570974349976,
229
+ "learning_rate": 4.913171690694627e-05,
230
+ "loss": 0.0114,
231
+ "step": 3000
232
+ },
233
+ {
234
+ "epoch": 0.30472820210360757,
235
+ "grad_norm": 0.2312837392091751,
236
+ "learning_rate": 4.991444278588852e-05,
237
+ "loss": 0.0127,
238
+ "step": 3100
239
+ },
240
+ {
241
+ "epoch": 0.31455814410694977,
242
+ "grad_norm": 0.08322272449731827,
243
+ "learning_rate": 4.973240616011942e-05,
244
+ "loss": 0.013,
245
+ "step": 3200
246
+ },
247
+ {
248
+ "epoch": 0.32438808611029196,
249
+ "grad_norm": 0.1441478580236435,
250
+ "learning_rate": 4.9550369534350316e-05,
251
+ "loss": 0.011,
252
+ "step": 3300
253
+ },
254
+ {
255
+ "epoch": 0.3342180281136341,
256
+ "grad_norm": 0.2058216631412506,
257
+ "learning_rate": 4.936833290858121e-05,
258
+ "loss": 0.0108,
259
+ "step": 3400
260
+ },
261
+ {
262
+ "epoch": 0.3440479701169763,
263
+ "grad_norm": 0.08180283010005951,
264
+ "learning_rate": 4.9186296282812107e-05,
265
+ "loss": 0.0112,
266
+ "step": 3500
267
+ },
268
+ {
269
+ "epoch": 0.3538779121203185,
270
+ "grad_norm": 0.1268218606710434,
271
+ "learning_rate": 4.9004259657043e-05,
272
+ "loss": 0.0114,
273
+ "step": 3600
274
+ },
275
+ {
276
+ "epoch": 0.3637078541236607,
277
+ "grad_norm": 0.08831259608268738,
278
+ "learning_rate": 4.88222230312739e-05,
279
+ "loss": 0.0106,
280
+ "step": 3700
281
+ },
282
+ {
283
+ "epoch": 0.37353779612700283,
284
+ "grad_norm": 0.12895138561725616,
285
+ "learning_rate": 4.864018640550479e-05,
286
+ "loss": 0.0105,
287
+ "step": 3800
288
+ },
289
+ {
290
+ "epoch": 0.383367738130345,
291
+ "grad_norm": 0.06203186884522438,
292
+ "learning_rate": 4.845814977973568e-05,
293
+ "loss": 0.0117,
294
+ "step": 3900
295
+ },
296
+ {
297
+ "epoch": 0.3931976801336872,
298
+ "grad_norm": 0.08485294133424759,
299
+ "learning_rate": 4.827611315396658e-05,
300
+ "loss": 0.0105,
301
+ "step": 4000
302
+ },
303
+ {
304
+ "epoch": 0.3931976801336872,
305
+ "eval_f1": 0.8286469466592665,
306
+ "eval_loss": 0.032480597496032715,
307
+ "eval_precision": 0.8092539524100847,
308
+ "eval_recall": 0.8489922306046617,
309
+ "eval_runtime": 368.0275,
310
+ "eval_samples_per_second": 221.122,
311
+ "eval_steps_per_second": 3.456,
312
+ "step": 4000
313
+ },
314
+ {
315
+ "epoch": 0.4030276221370294,
316
+ "grad_norm": 0.4025856554508209,
317
+ "learning_rate": 4.809407652819747e-05,
318
+ "loss": 0.0095,
319
+ "step": 4100
320
+ },
321
+ {
322
+ "epoch": 0.41285756414037156,
323
+ "grad_norm": 0.12447871267795563,
324
+ "learning_rate": 4.791203990242837e-05,
325
+ "loss": 0.0098,
326
+ "step": 4200
327
+ },
328
+ {
329
+ "epoch": 0.42268750614371375,
330
+ "grad_norm": 0.16537797451019287,
331
+ "learning_rate": 4.773000327665927e-05,
332
+ "loss": 0.0094,
333
+ "step": 4300
334
+ },
335
+ {
336
+ "epoch": 0.43251744814705595,
337
+ "grad_norm": 0.14634715020656586,
338
+ "learning_rate": 4.754796665089016e-05,
339
+ "loss": 0.0086,
340
+ "step": 4400
341
+ },
342
+ {
343
+ "epoch": 0.4423473901503981,
344
+ "grad_norm": 0.18309462070465088,
345
+ "learning_rate": 4.736593002512106e-05,
346
+ "loss": 0.0096,
347
+ "step": 4500
348
+ },
349
+ {
350
+ "epoch": 0.4521773321537403,
351
+ "grad_norm": 0.07428343594074249,
352
+ "learning_rate": 4.718389339935195e-05,
353
+ "loss": 0.0096,
354
+ "step": 4600
355
+ },
356
+ {
357
+ "epoch": 0.4620072741570825,
358
+ "grad_norm": 0.11093997955322266,
359
+ "learning_rate": 4.7001856773582845e-05,
360
+ "loss": 0.0097,
361
+ "step": 4700
362
+ },
363
+ {
364
+ "epoch": 0.4718372161604247,
365
+ "grad_norm": 0.035003211349248886,
366
+ "learning_rate": 4.6819820147813744e-05,
367
+ "loss": 0.0096,
368
+ "step": 4800
369
+ },
370
+ {
371
+ "epoch": 0.4816671581637668,
372
+ "grad_norm": 0.1413133144378662,
373
+ "learning_rate": 4.6637783522044636e-05,
374
+ "loss": 0.0086,
375
+ "step": 4900
376
+ },
377
+ {
378
+ "epoch": 0.491497100167109,
379
+ "grad_norm": 0.16881492733955383,
380
+ "learning_rate": 4.645574689627553e-05,
381
+ "loss": 0.0091,
382
+ "step": 5000
383
+ },
384
+ {
385
+ "epoch": 0.5013270421704512,
386
+ "grad_norm": 0.25650179386138916,
387
+ "learning_rate": 4.627371027050643e-05,
388
+ "loss": 0.0089,
389
+ "step": 5100
390
+ },
391
+ {
392
+ "epoch": 0.5111569841737934,
393
+ "grad_norm": 0.07248776406049728,
394
+ "learning_rate": 4.609167364473732e-05,
395
+ "loss": 0.0094,
396
+ "step": 5200
397
+ },
398
+ {
399
+ "epoch": 0.5209869261771356,
400
+ "grad_norm": 0.050960972905159,
401
+ "learning_rate": 4.590963701896822e-05,
402
+ "loss": 0.0083,
403
+ "step": 5300
404
+ },
405
+ {
406
+ "epoch": 0.5308168681804777,
407
+ "grad_norm": 0.10053224116563797,
408
+ "learning_rate": 4.572760039319912e-05,
409
+ "loss": 0.0083,
410
+ "step": 5400
411
+ },
412
+ {
413
+ "epoch": 0.5406468101838199,
414
+ "grad_norm": 0.13864193856716156,
415
+ "learning_rate": 4.554556376743001e-05,
416
+ "loss": 0.0088,
417
+ "step": 5500
418
+ },
419
+ {
420
+ "epoch": 0.5504767521871621,
421
+ "grad_norm": 0.10522215813398361,
422
+ "learning_rate": 4.536352714166091e-05,
423
+ "loss": 0.008,
424
+ "step": 5600
425
+ },
426
+ {
427
+ "epoch": 0.5603066941905043,
428
+ "grad_norm": 0.05200352147221565,
429
+ "learning_rate": 4.51814905158918e-05,
430
+ "loss": 0.0083,
431
+ "step": 5700
432
+ },
433
+ {
434
+ "epoch": 0.5701366361938465,
435
+ "grad_norm": 0.08757878094911575,
436
+ "learning_rate": 4.499945389012269e-05,
437
+ "loss": 0.008,
438
+ "step": 5800
439
+ },
440
+ {
441
+ "epoch": 0.5799665781971887,
442
+ "grad_norm": 0.08500000834465027,
443
+ "learning_rate": 4.481741726435359e-05,
444
+ "loss": 0.008,
445
+ "step": 5900
446
+ },
447
+ {
448
+ "epoch": 0.5897965202005309,
449
+ "grad_norm": 0.3124063313007355,
450
+ "learning_rate": 4.463538063858448e-05,
451
+ "loss": 0.0078,
452
+ "step": 6000
453
+ },
454
+ {
455
+ "epoch": 0.5897965202005309,
456
+ "eval_f1": 0.8566332444324409,
457
+ "eval_loss": 0.026570068672299385,
458
+ "eval_precision": 0.8561226325970579,
459
+ "eval_recall": 0.8571444657133206,
460
+ "eval_runtime": 368.8056,
461
+ "eval_samples_per_second": 220.656,
462
+ "eval_steps_per_second": 3.449,
463
+ "step": 6000
464
+ },
465
+ {
466
+ "epoch": 0.599626462203873,
467
+ "grad_norm": 0.05959346517920494,
468
+ "learning_rate": 4.445334401281538e-05,
469
+ "loss": 0.0083,
470
+ "step": 6100
471
+ },
472
+ {
473
+ "epoch": 0.6094564042072151,
474
+ "grad_norm": 0.07874953001737595,
475
+ "learning_rate": 4.4271307387046274e-05,
476
+ "loss": 0.0076,
477
+ "step": 6200
478
+ },
479
+ {
480
+ "epoch": 0.6192863462105573,
481
+ "grad_norm": 0.10230255872011185,
482
+ "learning_rate": 4.408927076127717e-05,
483
+ "loss": 0.008,
484
+ "step": 6300
485
+ },
486
+ {
487
+ "epoch": 0.6291162882138995,
488
+ "grad_norm": 0.08698707818984985,
489
+ "learning_rate": 4.390723413550807e-05,
490
+ "loss": 0.0076,
491
+ "step": 6400
492
+ },
493
+ {
494
+ "epoch": 0.6389462302172417,
495
+ "grad_norm": 0.050512004643678665,
496
+ "learning_rate": 4.3725197509738964e-05,
497
+ "loss": 0.0087,
498
+ "step": 6500
499
+ },
500
+ {
501
+ "epoch": 0.6487761722205839,
502
+ "grad_norm": 0.1262696236371994,
503
+ "learning_rate": 4.3543160883969856e-05,
504
+ "loss": 0.0091,
505
+ "step": 6600
506
+ },
507
+ {
508
+ "epoch": 0.6586061142239261,
509
+ "grad_norm": 0.10209453850984573,
510
+ "learning_rate": 4.3361124258200755e-05,
511
+ "loss": 0.0072,
512
+ "step": 6700
513
+ },
514
+ {
515
+ "epoch": 0.6684360562272682,
516
+ "grad_norm": 0.13946746289730072,
517
+ "learning_rate": 4.317908763243165e-05,
518
+ "loss": 0.0077,
519
+ "step": 6800
520
+ },
521
+ {
522
+ "epoch": 0.6782659982306104,
523
+ "grad_norm": 0.15709851682186127,
524
+ "learning_rate": 4.299705100666254e-05,
525
+ "loss": 0.0077,
526
+ "step": 6900
527
+ },
528
+ {
529
+ "epoch": 0.6880959402339526,
530
+ "grad_norm": 0.072502002120018,
531
+ "learning_rate": 4.281501438089344e-05,
532
+ "loss": 0.0079,
533
+ "step": 7000
534
+ },
535
+ {
536
+ "epoch": 0.6979258822372948,
537
+ "grad_norm": 0.17354685068130493,
538
+ "learning_rate": 4.263297775512433e-05,
539
+ "loss": 0.0082,
540
+ "step": 7100
541
+ },
542
+ {
543
+ "epoch": 0.707755824240637,
544
+ "grad_norm": 0.14260995388031006,
545
+ "learning_rate": 4.245094112935523e-05,
546
+ "loss": 0.0077,
547
+ "step": 7200
548
+ },
549
+ {
550
+ "epoch": 0.7175857662439792,
551
+ "grad_norm": 0.04445793479681015,
552
+ "learning_rate": 4.226890450358612e-05,
553
+ "loss": 0.0068,
554
+ "step": 7300
555
+ },
556
+ {
557
+ "epoch": 0.7274157082473214,
558
+ "grad_norm": 0.1454411894083023,
559
+ "learning_rate": 4.208686787781702e-05,
560
+ "loss": 0.0072,
561
+ "step": 7400
562
+ },
563
+ {
564
+ "epoch": 0.7372456502506636,
565
+ "grad_norm": 0.0707961767911911,
566
+ "learning_rate": 4.190483125204792e-05,
567
+ "loss": 0.0073,
568
+ "step": 7500
569
+ },
570
+ {
571
+ "epoch": 0.7470755922540057,
572
+ "grad_norm": 0.1078498363494873,
573
+ "learning_rate": 4.172279462627881e-05,
574
+ "loss": 0.0079,
575
+ "step": 7600
576
+ },
577
+ {
578
+ "epoch": 0.7569055342573479,
579
+ "grad_norm": 0.044271912425756454,
580
+ "learning_rate": 4.15407580005097e-05,
581
+ "loss": 0.0079,
582
+ "step": 7700
583
+ },
584
+ {
585
+ "epoch": 0.76673547626069,
586
+ "grad_norm": 0.04950639605522156,
587
+ "learning_rate": 4.13587213747406e-05,
588
+ "loss": 0.0067,
589
+ "step": 7800
590
+ },
591
+ {
592
+ "epoch": 0.7765654182640322,
593
+ "grad_norm": 0.11214105784893036,
594
+ "learning_rate": 4.1176684748971494e-05,
595
+ "loss": 0.0077,
596
+ "step": 7900
597
+ },
598
+ {
599
+ "epoch": 0.7863953602673744,
600
+ "grad_norm": 0.08136852085590363,
601
+ "learning_rate": 4.099464812320239e-05,
602
+ "loss": 0.0071,
603
+ "step": 8000
604
+ },
605
+ {
606
+ "epoch": 0.7863953602673744,
607
+ "eval_f1": 0.8711044849186127,
608
+ "eval_loss": 0.023490285500884056,
609
+ "eval_precision": 0.8608655495447948,
610
+ "eval_recall": 0.8815899110460533,
611
+ "eval_runtime": 368.1099,
612
+ "eval_samples_per_second": 221.073,
613
+ "eval_steps_per_second": 3.455,
614
+ "step": 8000
615
+ },
616
+ {
617
+ "epoch": 0.7962253022707166,
618
+ "grad_norm": 0.05617125704884529,
619
+ "learning_rate": 4.0812611497433284e-05,
620
+ "loss": 0.0076,
621
+ "step": 8100
622
+ },
623
+ {
624
+ "epoch": 0.8060552442740588,
625
+ "grad_norm": 0.09260338544845581,
626
+ "learning_rate": 4.0630574871664177e-05,
627
+ "loss": 0.0075,
628
+ "step": 8200
629
+ },
630
+ {
631
+ "epoch": 0.8158851862774009,
632
+ "grad_norm": 0.04998739808797836,
633
+ "learning_rate": 4.0448538245895075e-05,
634
+ "loss": 0.0073,
635
+ "step": 8300
636
+ },
637
+ {
638
+ "epoch": 0.8257151282807431,
639
+ "grad_norm": 0.09871978312730789,
640
+ "learning_rate": 4.0266501620125974e-05,
641
+ "loss": 0.0073,
642
+ "step": 8400
643
+ },
644
+ {
645
+ "epoch": 0.8355450702840853,
646
+ "grad_norm": 0.23389624059200287,
647
+ "learning_rate": 4.0084464994356866e-05,
648
+ "loss": 0.0074,
649
+ "step": 8500
650
+ },
651
+ {
652
+ "epoch": 0.8453750122874275,
653
+ "grad_norm": 0.08000567555427551,
654
+ "learning_rate": 3.9902428368587765e-05,
655
+ "loss": 0.0079,
656
+ "step": 8600
657
+ },
658
+ {
659
+ "epoch": 0.8552049542907697,
660
+ "grad_norm": 0.038756221532821655,
661
+ "learning_rate": 3.972039174281866e-05,
662
+ "loss": 0.0086,
663
+ "step": 8700
664
+ },
665
+ {
666
+ "epoch": 0.8650348962941119,
667
+ "grad_norm": 0.0853879302740097,
668
+ "learning_rate": 3.953835511704955e-05,
669
+ "loss": 0.0067,
670
+ "step": 8800
671
+ },
672
+ {
673
+ "epoch": 0.8748648382974541,
674
+ "grad_norm": 0.1053905338048935,
675
+ "learning_rate": 3.935631849128045e-05,
676
+ "loss": 0.0067,
677
+ "step": 8900
678
+ },
679
+ {
680
+ "epoch": 0.8846947803007962,
681
+ "grad_norm": 0.09345783293247223,
682
+ "learning_rate": 3.917428186551134e-05,
683
+ "loss": 0.0073,
684
+ "step": 9000
685
+ },
686
+ {
687
+ "epoch": 0.8945247223041384,
688
+ "grad_norm": 0.029876919463276863,
689
+ "learning_rate": 3.899224523974224e-05,
690
+ "loss": 0.0068,
691
+ "step": 9100
692
+ },
693
+ {
694
+ "epoch": 0.9043546643074806,
695
+ "grad_norm": 0.09708785265684128,
696
+ "learning_rate": 3.881020861397313e-05,
697
+ "loss": 0.007,
698
+ "step": 9200
699
+ },
700
+ {
701
+ "epoch": 0.9141846063108228,
702
+ "grad_norm": 0.04830634221434593,
703
+ "learning_rate": 3.862817198820402e-05,
704
+ "loss": 0.0073,
705
+ "step": 9300
706
+ },
707
+ {
708
+ "epoch": 0.924014548314165,
709
+ "grad_norm": 0.09089767932891846,
710
+ "learning_rate": 3.844613536243493e-05,
711
+ "loss": 0.0067,
712
+ "step": 9400
713
+ },
714
+ {
715
+ "epoch": 0.9338444903175072,
716
+ "grad_norm": 0.053389597684144974,
717
+ "learning_rate": 3.826409873666582e-05,
718
+ "loss": 0.0068,
719
+ "step": 9500
720
+ },
721
+ {
722
+ "epoch": 0.9436744323208494,
723
+ "grad_norm": 0.050620563328266144,
724
+ "learning_rate": 3.808206211089671e-05,
725
+ "loss": 0.0061,
726
+ "step": 9600
727
+ },
728
+ {
729
+ "epoch": 0.9535043743241914,
730
+ "grad_norm": 0.08616846799850464,
731
+ "learning_rate": 3.790002548512761e-05,
732
+ "loss": 0.0069,
733
+ "step": 9700
734
+ },
735
+ {
736
+ "epoch": 0.9633343163275336,
737
+ "grad_norm": 0.07850134372711182,
738
+ "learning_rate": 3.7717988859358504e-05,
739
+ "loss": 0.0072,
740
+ "step": 9800
741
+ },
742
+ {
743
+ "epoch": 0.9731642583308758,
744
+ "grad_norm": 0.19498451054096222,
745
+ "learning_rate": 3.75359522335894e-05,
746
+ "loss": 0.0062,
747
+ "step": 9900
748
+ },
749
+ {
750
+ "epoch": 0.982994200334218,
751
+ "grad_norm": 0.05255872756242752,
752
+ "learning_rate": 3.7353915607820295e-05,
753
+ "loss": 0.0059,
754
+ "step": 10000
755
+ },
756
+ {
757
+ "epoch": 0.982994200334218,
758
+ "eval_f1": 0.8865828197851773,
759
+ "eval_loss": 0.02146231383085251,
760
+ "eval_precision": 0.881456666184376,
761
+ "eval_recall": 0.891768944938633,
762
+ "eval_runtime": 368.4725,
763
+ "eval_samples_per_second": 220.855,
764
+ "eval_steps_per_second": 3.452,
765
+ "step": 10000
766
+ },
767
+ {
768
+ "epoch": 0.9928241423375602,
769
+ "grad_norm": 0.13814912736415863,
770
+ "learning_rate": 3.717187898205119e-05,
771
+ "loss": 0.0068,
772
+ "step": 10100
773
+ },
774
+ {
775
+ "epoch": 1.0026540843409024,
776
+ "grad_norm": 0.06895862519741058,
777
+ "learning_rate": 3.6989842356282086e-05,
778
+ "loss": 0.006,
779
+ "step": 10200
780
+ },
781
+ {
782
+ "epoch": 1.0124840263442445,
783
+ "grad_norm": 0.048868328332901,
784
+ "learning_rate": 3.680780573051298e-05,
785
+ "loss": 0.0045,
786
+ "step": 10300
787
+ },
788
+ {
789
+ "epoch": 1.0223139683475868,
790
+ "grad_norm": 0.030351588502526283,
791
+ "learning_rate": 3.662576910474388e-05,
792
+ "loss": 0.0055,
793
+ "step": 10400
794
+ },
795
+ {
796
+ "epoch": 1.032143910350929,
797
+ "grad_norm": 0.02089417539536953,
798
+ "learning_rate": 3.6443732478974776e-05,
799
+ "loss": 0.005,
800
+ "step": 10500
801
+ },
802
+ {
803
+ "epoch": 1.0419738523542712,
804
+ "grad_norm": 0.04806596413254738,
805
+ "learning_rate": 3.626169585320567e-05,
806
+ "loss": 0.0047,
807
+ "step": 10600
808
+ },
809
+ {
810
+ "epoch": 1.0518037943576133,
811
+ "grad_norm": 0.12568242847919464,
812
+ "learning_rate": 3.607965922743656e-05,
813
+ "loss": 0.005,
814
+ "step": 10700
815
+ },
816
+ {
817
+ "epoch": 1.0616337363609554,
818
+ "grad_norm": 0.06524453312158585,
819
+ "learning_rate": 3.589762260166746e-05,
820
+ "loss": 0.0057,
821
+ "step": 10800
822
+ },
823
+ {
824
+ "epoch": 1.0714636783642977,
825
+ "grad_norm": 0.06146615743637085,
826
+ "learning_rate": 3.571558597589835e-05,
827
+ "loss": 0.0048,
828
+ "step": 10900
829
+ },
830
+ {
831
+ "epoch": 1.0812936203676398,
832
+ "grad_norm": 0.044653356075286865,
833
+ "learning_rate": 3.553354935012925e-05,
834
+ "loss": 0.005,
835
+ "step": 11000
836
+ },
837
+ {
838
+ "epoch": 1.091123562370982,
839
+ "grad_norm": 0.03471142798662186,
840
+ "learning_rate": 3.535151272436014e-05,
841
+ "loss": 0.0051,
842
+ "step": 11100
843
+ },
844
+ {
845
+ "epoch": 1.1009535043743242,
846
+ "grad_norm": 0.026662476360797882,
847
+ "learning_rate": 3.5169476098591034e-05,
848
+ "loss": 0.0051,
849
+ "step": 11200
850
+ },
851
+ {
852
+ "epoch": 1.1107834463776665,
853
+ "grad_norm": 0.08286290615797043,
854
+ "learning_rate": 3.498743947282193e-05,
855
+ "loss": 0.0052,
856
+ "step": 11300
857
+ },
858
+ {
859
+ "epoch": 1.1206133883810085,
860
+ "grad_norm": 0.04743447154760361,
861
+ "learning_rate": 3.4805402847052825e-05,
862
+ "loss": 0.0051,
863
+ "step": 11400
864
+ },
865
+ {
866
+ "epoch": 1.1304433303843506,
867
+ "grad_norm": 0.04862457141280174,
868
+ "learning_rate": 3.4623366221283724e-05,
869
+ "loss": 0.005,
870
+ "step": 11500
871
+ },
872
+ {
873
+ "epoch": 1.140273272387693,
874
+ "grad_norm": 0.10798755288124084,
875
+ "learning_rate": 3.444132959551462e-05,
876
+ "loss": 0.0055,
877
+ "step": 11600
878
+ },
879
+ {
880
+ "epoch": 1.150103214391035,
881
+ "grad_norm": 0.06220352649688721,
882
+ "learning_rate": 3.4259292969745515e-05,
883
+ "loss": 0.0055,
884
+ "step": 11700
885
+ },
886
+ {
887
+ "epoch": 1.1599331563943773,
888
+ "grad_norm": 0.14796103537082672,
889
+ "learning_rate": 3.407725634397641e-05,
890
+ "loss": 0.0049,
891
+ "step": 11800
892
+ },
893
+ {
894
+ "epoch": 1.1697630983977194,
895
+ "grad_norm": 0.05921417847275734,
896
+ "learning_rate": 3.3895219718207305e-05,
897
+ "loss": 0.0047,
898
+ "step": 11900
899
+ },
900
+ {
901
+ "epoch": 1.1795930404010617,
902
+ "grad_norm": 0.12081274390220642,
903
+ "learning_rate": 3.37131830924382e-05,
904
+ "loss": 0.0053,
905
+ "step": 12000
906
+ },
907
+ {
908
+ "epoch": 1.1795930404010617,
909
+ "eval_f1": 0.8960633085174066,
910
+ "eval_loss": 0.020371899008750916,
911
+ "eval_precision": 0.8882999911480924,
912
+ "eval_recall": 0.9039635176218894,
913
+ "eval_runtime": 367.9724,
914
+ "eval_samples_per_second": 221.155,
915
+ "eval_steps_per_second": 3.457,
916
+ "step": 12000
917
+ },
918
+ {
919
+ "epoch": 1.1894229824044038,
920
+ "grad_norm": 0.041594497859478,
921
+ "learning_rate": 3.3531146466669096e-05,
922
+ "loss": 0.0057,
923
+ "step": 12100
924
+ },
925
+ {
926
+ "epoch": 1.199252924407746,
927
+ "grad_norm": 0.066756471991539,
928
+ "learning_rate": 3.334910984089999e-05,
929
+ "loss": 0.005,
930
+ "step": 12200
931
+ },
932
+ {
933
+ "epoch": 1.2090828664110882,
934
+ "grad_norm": 0.10321182757616043,
935
+ "learning_rate": 3.316707321513088e-05,
936
+ "loss": 0.0051,
937
+ "step": 12300
938
+ },
939
+ {
940
+ "epoch": 1.2189128084144303,
941
+ "grad_norm": 0.1184532642364502,
942
+ "learning_rate": 3.298503658936178e-05,
943
+ "loss": 0.005,
944
+ "step": 12400
945
+ },
946
+ {
947
+ "epoch": 1.2287427504177726,
948
+ "grad_norm": 0.10429126024246216,
949
+ "learning_rate": 3.280299996359268e-05,
950
+ "loss": 0.0053,
951
+ "step": 12500
952
+ },
953
+ {
954
+ "epoch": 1.2385726924211147,
955
+ "grad_norm": 0.1311911642551422,
956
+ "learning_rate": 3.262096333782357e-05,
957
+ "loss": 0.0052,
958
+ "step": 12600
959
+ },
960
+ {
961
+ "epoch": 1.248402634424457,
962
+ "grad_norm": 0.12976108491420746,
963
+ "learning_rate": 3.243892671205447e-05,
964
+ "loss": 0.005,
965
+ "step": 12700
966
+ },
967
+ {
968
+ "epoch": 1.258232576427799,
969
+ "grad_norm": 0.06385162472724915,
970
+ "learning_rate": 3.225689008628536e-05,
971
+ "loss": 0.0045,
972
+ "step": 12800
973
+ },
974
+ {
975
+ "epoch": 1.2680625184311412,
976
+ "grad_norm": 0.10582277178764343,
977
+ "learning_rate": 3.207485346051626e-05,
978
+ "loss": 0.005,
979
+ "step": 12900
980
+ },
981
+ {
982
+ "epoch": 1.2778924604344835,
983
+ "grad_norm": 0.10751399397850037,
984
+ "learning_rate": 3.189281683474715e-05,
985
+ "loss": 0.0048,
986
+ "step": 13000
987
+ },
988
+ {
989
+ "epoch": 1.2877224024378255,
990
+ "grad_norm": 0.0692177563905716,
991
+ "learning_rate": 3.1710780208978044e-05,
992
+ "loss": 0.0045,
993
+ "step": 13100
994
+ },
995
+ {
996
+ "epoch": 1.2975523444411678,
997
+ "grad_norm": 0.1047593429684639,
998
+ "learning_rate": 3.152874358320894e-05,
999
+ "loss": 0.0048,
1000
+ "step": 13200
1001
+ },
1002
+ {
1003
+ "epoch": 1.30738228644451,
1004
+ "grad_norm": 0.04881567507982254,
1005
+ "learning_rate": 3.1346706957439835e-05,
1006
+ "loss": 0.0045,
1007
+ "step": 13300
1008
+ },
1009
+ {
1010
+ "epoch": 1.3172122284478522,
1011
+ "grad_norm": 0.20649947226047516,
1012
+ "learning_rate": 3.1164670331670734e-05,
1013
+ "loss": 0.0048,
1014
+ "step": 13400
1015
+ },
1016
+ {
1017
+ "epoch": 1.3270421704511943,
1018
+ "grad_norm": 0.07260572165250778,
1019
+ "learning_rate": 3.098263370590163e-05,
1020
+ "loss": 0.0046,
1021
+ "step": 13500
1022
+ },
1023
+ {
1024
+ "epoch": 1.3368721124545364,
1025
+ "grad_norm": 0.14177989959716797,
1026
+ "learning_rate": 3.0800597080132525e-05,
1027
+ "loss": 0.0048,
1028
+ "step": 13600
1029
+ },
1030
+ {
1031
+ "epoch": 1.3467020544578787,
1032
+ "grad_norm": 0.04176017642021179,
1033
+ "learning_rate": 3.0618560454363424e-05,
1034
+ "loss": 0.0048,
1035
+ "step": 13700
1036
+ },
1037
+ {
1038
+ "epoch": 1.3565319964612208,
1039
+ "grad_norm": 0.09111765027046204,
1040
+ "learning_rate": 3.0436523828594316e-05,
1041
+ "loss": 0.0049,
1042
+ "step": 13800
1043
+ },
1044
+ {
1045
+ "epoch": 1.366361938464563,
1046
+ "grad_norm": 0.039208538830280304,
1047
+ "learning_rate": 3.025448720282521e-05,
1048
+ "loss": 0.005,
1049
+ "step": 13900
1050
+ },
1051
+ {
1052
+ "epoch": 1.3761918804679052,
1053
+ "grad_norm": 0.027969840914011,
1054
+ "learning_rate": 3.0072450577056103e-05,
1055
+ "loss": 0.0049,
1056
+ "step": 14000
1057
+ },
1058
+ {
1059
+ "epoch": 1.3761918804679052,
1060
+ "eval_f1": 0.8982576922219995,
1061
+ "eval_loss": 0.01912725158035755,
1062
+ "eval_precision": 0.8887921342122087,
1063
+ "eval_recall": 0.9079270352437788,
1064
+ "eval_runtime": 368.6528,
1065
+ "eval_samples_per_second": 220.747,
1066
+ "eval_steps_per_second": 3.45,
1067
+ "step": 14000
1068
+ },
1069
+ {
1070
+ "epoch": 1.3860218224712475,
1071
+ "grad_norm": 0.05266612395644188,
1072
+ "learning_rate": 2.9890413951287e-05,
1073
+ "loss": 0.0048,
1074
+ "step": 14100
1075
+ },
1076
+ {
1077
+ "epoch": 1.3958517644745896,
1078
+ "grad_norm": 0.04086877778172493,
1079
+ "learning_rate": 2.9708377325517894e-05,
1080
+ "loss": 0.0044,
1081
+ "step": 14200
1082
+ },
1083
+ {
1084
+ "epoch": 1.4056817064779317,
1085
+ "grad_norm": 0.023200375959277153,
1086
+ "learning_rate": 2.952634069974879e-05,
1087
+ "loss": 0.0057,
1088
+ "step": 14300
1089
+ },
1090
+ {
1091
+ "epoch": 1.415511648481274,
1092
+ "grad_norm": 0.08787036687135696,
1093
+ "learning_rate": 2.9344304073979685e-05,
1094
+ "loss": 0.0051,
1095
+ "step": 14400
1096
+ },
1097
+ {
1098
+ "epoch": 1.425341590484616,
1099
+ "grad_norm": 0.05955840274691582,
1100
+ "learning_rate": 2.9162267448210584e-05,
1101
+ "loss": 0.0046,
1102
+ "step": 14500
1103
+ },
1104
+ {
1105
+ "epoch": 1.4351715324879584,
1106
+ "grad_norm": 0.08939366787672043,
1107
+ "learning_rate": 2.898023082244148e-05,
1108
+ "loss": 0.0055,
1109
+ "step": 14600
1110
+ },
1111
+ {
1112
+ "epoch": 1.4450014744913005,
1113
+ "grad_norm": 0.06770022213459015,
1114
+ "learning_rate": 2.8798194196672372e-05,
1115
+ "loss": 0.005,
1116
+ "step": 14700
1117
+ },
1118
+ {
1119
+ "epoch": 1.4548314164946428,
1120
+ "grad_norm": 0.10894829034805298,
1121
+ "learning_rate": 2.8616157570903267e-05,
1122
+ "loss": 0.0048,
1123
+ "step": 14800
1124
+ },
1125
+ {
1126
+ "epoch": 1.4646613584979848,
1127
+ "grad_norm": 0.045476239174604416,
1128
+ "learning_rate": 2.8434120945134163e-05,
1129
+ "loss": 0.0047,
1130
+ "step": 14900
1131
+ },
1132
+ {
1133
+ "epoch": 1.474491300501327,
1134
+ "grad_norm": 0.03545854985713959,
1135
+ "learning_rate": 2.8252084319365058e-05,
1136
+ "loss": 0.004,
1137
+ "step": 15000
1138
+ },
1139
+ {
1140
+ "epoch": 1.4843212425046692,
1141
+ "grad_norm": 0.028701910749077797,
1142
+ "learning_rate": 2.8070047693595954e-05,
1143
+ "loss": 0.0046,
1144
+ "step": 15100
1145
+ },
1146
+ {
1147
+ "epoch": 1.4941511845080113,
1148
+ "grad_norm": 0.13125169277191162,
1149
+ "learning_rate": 2.7888011067826846e-05,
1150
+ "loss": 0.0047,
1151
+ "step": 15200
1152
+ },
1153
+ {
1154
+ "epoch": 1.5039811265113536,
1155
+ "grad_norm": 0.13967622816562653,
1156
+ "learning_rate": 2.770597444205774e-05,
1157
+ "loss": 0.0045,
1158
+ "step": 15300
1159
+ },
1160
+ {
1161
+ "epoch": 1.5138110685146957,
1162
+ "grad_norm": 0.051971685141325,
1163
+ "learning_rate": 2.7523937816288637e-05,
1164
+ "loss": 0.0046,
1165
+ "step": 15400
1166
+ },
1167
+ {
1168
+ "epoch": 1.523641010518038,
1169
+ "grad_norm": 0.08570306748151779,
1170
+ "learning_rate": 2.7341901190519535e-05,
1171
+ "loss": 0.0048,
1172
+ "step": 15500
1173
+ },
1174
+ {
1175
+ "epoch": 1.53347095252138,
1176
+ "grad_norm": 0.08714251965284348,
1177
+ "learning_rate": 2.715986456475043e-05,
1178
+ "loss": 0.0043,
1179
+ "step": 15600
1180
+ },
1181
+ {
1182
+ "epoch": 1.5433008945247222,
1183
+ "grad_norm": 0.031157121062278748,
1184
+ "learning_rate": 2.6977827938981326e-05,
1185
+ "loss": 0.0044,
1186
+ "step": 15700
1187
+ },
1188
+ {
1189
+ "epoch": 1.5531308365280645,
1190
+ "grad_norm": 0.0643945187330246,
1191
+ "learning_rate": 2.6795791313212222e-05,
1192
+ "loss": 0.0039,
1193
+ "step": 15800
1194
+ },
1195
+ {
1196
+ "epoch": 1.5629607785314068,
1197
+ "grad_norm": 0.10222964733839035,
1198
+ "learning_rate": 2.6613754687443114e-05,
1199
+ "loss": 0.0048,
1200
+ "step": 15900
1201
+ },
1202
+ {
1203
+ "epoch": 1.5727907205347489,
1204
+ "grad_norm": 0.05405284836888313,
1205
+ "learning_rate": 2.643171806167401e-05,
1206
+ "loss": 0.0041,
1207
+ "step": 16000
1208
+ },
1209
+ {
1210
+ "epoch": 1.5727907205347489,
1211
+ "eval_f1": 0.9061435464890399,
1212
+ "eval_loss": 0.019078785553574562,
1213
+ "eval_precision": 0.9062404973477042,
1214
+ "eval_recall": 0.9060466163720302,
1215
+ "eval_runtime": 368.5231,
1216
+ "eval_samples_per_second": 220.825,
1217
+ "eval_steps_per_second": 3.452,
1218
+ "step": 16000
1219
+ },
1220
+ {
1221
+ "epoch": 1.582620662538091,
1222
+ "grad_norm": 0.03582916781306267,
1223
+ "learning_rate": 2.6249681435904905e-05,
1224
+ "loss": 0.0042,
1225
+ "step": 16100
1226
+ },
1227
+ {
1228
+ "epoch": 1.5924506045414333,
1229
+ "grad_norm": 0.08402363955974579,
1230
+ "learning_rate": 2.60676448101358e-05,
1231
+ "loss": 0.0048,
1232
+ "step": 16200
1233
+ },
1234
+ {
1235
+ "epoch": 1.6022805465447754,
1236
+ "grad_norm": 0.08818399906158447,
1237
+ "learning_rate": 2.5885608184366696e-05,
1238
+ "loss": 0.0046,
1239
+ "step": 16300
1240
+ },
1241
+ {
1242
+ "epoch": 1.6121104885481174,
1243
+ "grad_norm": 0.04633729159832001,
1244
+ "learning_rate": 2.5703571558597588e-05,
1245
+ "loss": 0.0046,
1246
+ "step": 16400
1247
+ },
1248
+ {
1249
+ "epoch": 1.6219404305514598,
1250
+ "grad_norm": 0.05240938439965248,
1251
+ "learning_rate": 2.5521534932828483e-05,
1252
+ "loss": 0.0049,
1253
+ "step": 16500
1254
+ },
1255
+ {
1256
+ "epoch": 1.631770372554802,
1257
+ "grad_norm": 0.044343069195747375,
1258
+ "learning_rate": 2.5339498307059382e-05,
1259
+ "loss": 0.0046,
1260
+ "step": 16600
1261
+ },
1262
+ {
1263
+ "epoch": 1.6416003145581441,
1264
+ "grad_norm": 0.05871371924877167,
1265
+ "learning_rate": 2.5157461681290278e-05,
1266
+ "loss": 0.0046,
1267
+ "step": 16700
1268
+ },
1269
+ {
1270
+ "epoch": 1.6514302565614862,
1271
+ "grad_norm": 0.04396549612283707,
1272
+ "learning_rate": 2.4975425055521173e-05,
1273
+ "loss": 0.004,
1274
+ "step": 16800
1275
+ },
1276
+ {
1277
+ "epoch": 1.6612601985648285,
1278
+ "grad_norm": 0.09425197541713715,
1279
+ "learning_rate": 2.479338842975207e-05,
1280
+ "loss": 0.0039,
1281
+ "step": 16900
1282
+ },
1283
+ {
1284
+ "epoch": 1.6710901405681706,
1285
+ "grad_norm": 0.039449796080589294,
1286
+ "learning_rate": 2.4611351803982964e-05,
1287
+ "loss": 0.0044,
1288
+ "step": 17000
1289
+ },
1290
+ {
1291
+ "epoch": 1.6809200825715127,
1292
+ "grad_norm": 0.10242141038179398,
1293
+ "learning_rate": 2.4429315178213856e-05,
1294
+ "loss": 0.0045,
1295
+ "step": 17100
1296
+ },
1297
+ {
1298
+ "epoch": 1.690750024574855,
1299
+ "grad_norm": 0.07649975270032883,
1300
+ "learning_rate": 2.424727855244475e-05,
1301
+ "loss": 0.0041,
1302
+ "step": 17200
1303
+ },
1304
+ {
1305
+ "epoch": 1.7005799665781973,
1306
+ "grad_norm": 0.045152563601732254,
1307
+ "learning_rate": 2.406524192667565e-05,
1308
+ "loss": 0.0044,
1309
+ "step": 17300
1310
+ },
1311
+ {
1312
+ "epoch": 1.7104099085815394,
1313
+ "grad_norm": 0.12754422426223755,
1314
+ "learning_rate": 2.3883205300906546e-05,
1315
+ "loss": 0.0043,
1316
+ "step": 17400
1317
+ },
1318
+ {
1319
+ "epoch": 1.7202398505848815,
1320
+ "grad_norm": 0.055379465222358704,
1321
+ "learning_rate": 2.3701168675137438e-05,
1322
+ "loss": 0.0045,
1323
+ "step": 17500
1324
+ },
1325
+ {
1326
+ "epoch": 1.7300697925882238,
1327
+ "grad_norm": 0.040617331862449646,
1328
+ "learning_rate": 2.3519132049368333e-05,
1329
+ "loss": 0.0044,
1330
+ "step": 17600
1331
+ },
1332
+ {
1333
+ "epoch": 1.7398997345915659,
1334
+ "grad_norm": 0.09353236109018326,
1335
+ "learning_rate": 2.333709542359923e-05,
1336
+ "loss": 0.0048,
1337
+ "step": 17700
1338
+ },
1339
+ {
1340
+ "epoch": 1.749729676594908,
1341
+ "grad_norm": 0.07118421792984009,
1342
+ "learning_rate": 2.3155058797830124e-05,
1343
+ "loss": 0.0041,
1344
+ "step": 17800
1345
+ },
1346
+ {
1347
+ "epoch": 1.7595596185982503,
1348
+ "grad_norm": 0.08269080519676208,
1349
+ "learning_rate": 2.297302217206102e-05,
1350
+ "loss": 0.0049,
1351
+ "step": 17900
1352
+ },
1353
+ {
1354
+ "epoch": 1.7693895606015926,
1355
+ "grad_norm": 0.07028749585151672,
1356
+ "learning_rate": 2.2790985546291915e-05,
1357
+ "loss": 0.0045,
1358
+ "step": 18000
1359
+ },
1360
+ {
1361
+ "epoch": 1.7693895606015926,
1362
+ "eval_f1": 0.9050546716060729,
1363
+ "eval_loss": 0.018239887431263924,
1364
+ "eval_precision": 0.9009696604514668,
1365
+ "eval_recall": 0.9091768944938633,
1366
+ "eval_runtime": 368.5862,
1367
+ "eval_samples_per_second": 220.787,
1368
+ "eval_steps_per_second": 3.451,
1369
+ "step": 18000
1370
+ },
1371
+ {
1372
+ "epoch": 1.7792195026049347,
1373
+ "grad_norm": 0.02531488798558712,
1374
+ "learning_rate": 2.260894892052281e-05,
1375
+ "loss": 0.0049,
1376
+ "step": 18100
1377
+ },
1378
+ {
1379
+ "epoch": 1.7890494446082768,
1380
+ "grad_norm": 0.06841567903757095,
1381
+ "learning_rate": 2.2426912294753706e-05,
1382
+ "loss": 0.004,
1383
+ "step": 18200
1384
+ },
1385
+ {
1386
+ "epoch": 1.798879386611619,
1387
+ "grad_norm": 0.438967764377594,
1388
+ "learning_rate": 2.22448756689846e-05,
1389
+ "loss": 0.0045,
1390
+ "step": 18300
1391
+ },
1392
+ {
1393
+ "epoch": 1.8087093286149611,
1394
+ "grad_norm": 0.07297348976135254,
1395
+ "learning_rate": 2.2062839043215497e-05,
1396
+ "loss": 0.0046,
1397
+ "step": 18400
1398
+ },
1399
+ {
1400
+ "epoch": 1.8185392706183032,
1401
+ "grad_norm": 0.04762211814522743,
1402
+ "learning_rate": 2.1880802417446393e-05,
1403
+ "loss": 0.0044,
1404
+ "step": 18500
1405
+ },
1406
+ {
1407
+ "epoch": 1.8283692126216455,
1408
+ "grad_norm": 0.06008617579936981,
1409
+ "learning_rate": 2.1698765791677285e-05,
1410
+ "loss": 0.0043,
1411
+ "step": 18600
1412
+ },
1413
+ {
1414
+ "epoch": 1.8381991546249878,
1415
+ "grad_norm": 0.07657765597105026,
1416
+ "learning_rate": 2.151672916590818e-05,
1417
+ "loss": 0.0037,
1418
+ "step": 18700
1419
+ },
1420
+ {
1421
+ "epoch": 1.84802909662833,
1422
+ "grad_norm": 0.05616445094347,
1423
+ "learning_rate": 2.1334692540139076e-05,
1424
+ "loss": 0.0043,
1425
+ "step": 18800
1426
+ },
1427
+ {
1428
+ "epoch": 1.857859038631672,
1429
+ "grad_norm": 0.03654363006353378,
1430
+ "learning_rate": 2.1152655914369975e-05,
1431
+ "loss": 0.0044,
1432
+ "step": 18900
1433
+ },
1434
+ {
1435
+ "epoch": 1.8676889806350143,
1436
+ "grad_norm": 0.09715255349874496,
1437
+ "learning_rate": 2.0970619288600867e-05,
1438
+ "loss": 0.0037,
1439
+ "step": 19000
1440
+ },
1441
+ {
1442
+ "epoch": 1.8775189226383564,
1443
+ "grad_norm": 0.10472027957439423,
1444
+ "learning_rate": 2.0788582662831762e-05,
1445
+ "loss": 0.0038,
1446
+ "step": 19100
1447
+ },
1448
+ {
1449
+ "epoch": 1.8873488646416985,
1450
+ "grad_norm": 0.014321831054985523,
1451
+ "learning_rate": 2.0606546037062658e-05,
1452
+ "loss": 0.0046,
1453
+ "step": 19200
1454
+ },
1455
+ {
1456
+ "epoch": 1.8971788066450408,
1457
+ "grad_norm": 0.038841910660266876,
1458
+ "learning_rate": 2.0424509411293553e-05,
1459
+ "loss": 0.0044,
1460
+ "step": 19300
1461
+ },
1462
+ {
1463
+ "epoch": 1.907008748648383,
1464
+ "grad_norm": 0.07139607518911362,
1465
+ "learning_rate": 2.024247278552445e-05,
1466
+ "loss": 0.0042,
1467
+ "step": 19400
1468
+ },
1469
+ {
1470
+ "epoch": 1.9168386906517252,
1471
+ "grad_norm": 0.03969763219356537,
1472
+ "learning_rate": 2.0060436159755344e-05,
1473
+ "loss": 0.004,
1474
+ "step": 19500
1475
+ },
1476
+ {
1477
+ "epoch": 1.9266686326550673,
1478
+ "grad_norm": 0.06369686126708984,
1479
+ "learning_rate": 1.987839953398624e-05,
1480
+ "loss": 0.0042,
1481
+ "step": 19600
1482
+ },
1483
+ {
1484
+ "epoch": 1.9364985746584096,
1485
+ "grad_norm": 0.045906830579042435,
1486
+ "learning_rate": 1.9696362908217135e-05,
1487
+ "loss": 0.0036,
1488
+ "step": 19700
1489
+ },
1490
+ {
1491
+ "epoch": 1.9463285166617517,
1492
+ "grad_norm": 0.06250949203968048,
1493
+ "learning_rate": 1.9514326282448027e-05,
1494
+ "loss": 0.0044,
1495
+ "step": 19800
1496
+ },
1497
+ {
1498
+ "epoch": 1.9561584586650937,
1499
+ "grad_norm": 0.10479672998189926,
1500
+ "learning_rate": 1.9332289656678926e-05,
1501
+ "loss": 0.0039,
1502
+ "step": 19900
1503
+ },
1504
+ {
1505
+ "epoch": 1.965988400668436,
1506
+ "grad_norm": 0.10211700201034546,
1507
+ "learning_rate": 1.915025303090982e-05,
1508
+ "loss": 0.0039,
1509
+ "step": 20000
1510
+ },
1511
+ {
1512
+ "epoch": 1.965988400668436,
1513
+ "eval_f1": 0.9113280966497507,
1514
+ "eval_loss": 0.016693420708179474,
1515
+ "eval_precision": 0.9037548028479365,
1516
+ "eval_recall": 0.9190293885823668,
1517
+ "eval_runtime": 367.6153,
1518
+ "eval_samples_per_second": 221.37,
1519
+ "eval_steps_per_second": 3.46,
1520
+ "step": 20000
1521
+ },
1522
+ {
1523
+ "epoch": 1.9758183426717784,
1524
+ "grad_norm": 0.2106950581073761,
1525
+ "learning_rate": 1.8968216405140717e-05,
1526
+ "loss": 0.0044,
1527
+ "step": 20100
1528
+ },
1529
+ {
1530
+ "epoch": 1.9856482846751204,
1531
+ "grad_norm": 0.037731654942035675,
1532
+ "learning_rate": 1.878617977937161e-05,
1533
+ "loss": 0.0041,
1534
+ "step": 20200
1535
+ },
1536
+ {
1537
+ "epoch": 1.9954782266784625,
1538
+ "grad_norm": 0.06709322333335876,
1539
+ "learning_rate": 1.8604143153602504e-05,
1540
+ "loss": 0.0041,
1541
+ "step": 20300
1542
+ },
1543
+ {
1544
+ "epoch": 2.005308168681805,
1545
+ "grad_norm": 0.12997037172317505,
1546
+ "learning_rate": 1.8422106527833403e-05,
1547
+ "loss": 0.003,
1548
+ "step": 20400
1549
+ },
1550
+ {
1551
+ "epoch": 2.015138110685147,
1552
+ "grad_norm": 0.09218054264783859,
1553
+ "learning_rate": 1.8240069902064295e-05,
1554
+ "loss": 0.0024,
1555
+ "step": 20500
1556
+ },
1557
+ {
1558
+ "epoch": 2.024968052688489,
1559
+ "grad_norm": 0.053119756281375885,
1560
+ "learning_rate": 1.805803327629519e-05,
1561
+ "loss": 0.0025,
1562
+ "step": 20600
1563
+ },
1564
+ {
1565
+ "epoch": 2.0347979946918313,
1566
+ "grad_norm": 0.05638999119400978,
1567
+ "learning_rate": 1.7875996650526086e-05,
1568
+ "loss": 0.0022,
1569
+ "step": 20700
1570
+ },
1571
+ {
1572
+ "epoch": 2.0446279366951736,
1573
+ "grad_norm": 0.029376154765486717,
1574
+ "learning_rate": 1.769396002475698e-05,
1575
+ "loss": 0.0023,
1576
+ "step": 20800
1577
+ },
1578
+ {
1579
+ "epoch": 2.0544578786985155,
1580
+ "grad_norm": 0.1715756207704544,
1581
+ "learning_rate": 1.7511923398987877e-05,
1582
+ "loss": 0.002,
1583
+ "step": 20900
1584
+ },
1585
+ {
1586
+ "epoch": 2.064287820701858,
1587
+ "grad_norm": 0.17415770888328552,
1588
+ "learning_rate": 1.7329886773218773e-05,
1589
+ "loss": 0.002,
1590
+ "step": 21000
1591
+ },
1592
+ {
1593
+ "epoch": 2.0741177627052,
1594
+ "grad_norm": 0.18803322315216064,
1595
+ "learning_rate": 1.7147850147449668e-05,
1596
+ "loss": 0.0016,
1597
+ "step": 21100
1598
+ },
1599
+ {
1600
+ "epoch": 2.0839477047085424,
1601
+ "grad_norm": 0.04249728098511696,
1602
+ "learning_rate": 1.6965813521680564e-05,
1603
+ "loss": 0.0021,
1604
+ "step": 21200
1605
+ },
1606
+ {
1607
+ "epoch": 2.0937776467118843,
1608
+ "grad_norm": 0.10625623166561127,
1609
+ "learning_rate": 1.6783776895911456e-05,
1610
+ "loss": 0.0021,
1611
+ "step": 21300
1612
+ },
1613
+ {
1614
+ "epoch": 2.1036075887152266,
1615
+ "grad_norm": 0.08748575299978256,
1616
+ "learning_rate": 1.6601740270142354e-05,
1617
+ "loss": 0.0021,
1618
+ "step": 21400
1619
+ },
1620
+ {
1621
+ "epoch": 2.113437530718569,
1622
+ "grad_norm": 0.014536886475980282,
1623
+ "learning_rate": 1.641970364437325e-05,
1624
+ "loss": 0.0019,
1625
+ "step": 21500
1626
+ },
1627
+ {
1628
+ "epoch": 2.1232674727219107,
1629
+ "grad_norm": 0.0076785460114479065,
1630
+ "learning_rate": 1.6237667018604145e-05,
1631
+ "loss": 0.002,
1632
+ "step": 21600
1633
+ },
1634
+ {
1635
+ "epoch": 2.133097414725253,
1636
+ "grad_norm": 0.054168928414583206,
1637
+ "learning_rate": 1.6055630392835037e-05,
1638
+ "loss": 0.0021,
1639
+ "step": 21700
1640
+ },
1641
+ {
1642
+ "epoch": 2.1429273567285954,
1643
+ "grad_norm": 0.09373613446950912,
1644
+ "learning_rate": 1.5873593767065933e-05,
1645
+ "loss": 0.0026,
1646
+ "step": 21800
1647
+ },
1648
+ {
1649
+ "epoch": 2.1527572987319377,
1650
+ "grad_norm": 0.025737851858139038,
1651
+ "learning_rate": 1.5691557141296832e-05,
1652
+ "loss": 0.002,
1653
+ "step": 21900
1654
+ },
1655
+ {
1656
+ "epoch": 2.1625872407352795,
1657
+ "grad_norm": 0.17471902072429657,
1658
+ "learning_rate": 1.5509520515527727e-05,
1659
+ "loss": 0.0019,
1660
+ "step": 22000
1661
+ },
1662
+ {
1663
+ "epoch": 2.1625872407352795,
1664
+ "eval_f1": 0.9128289933125876,
1665
+ "eval_loss": 0.018903136253356934,
1666
+ "eval_precision": 0.9058802656820022,
1667
+ "eval_recall": 0.9198851480689112,
1668
+ "eval_runtime": 368.5326,
1669
+ "eval_samples_per_second": 220.819,
1670
+ "eval_steps_per_second": 3.452,
1671
+ "step": 22000
1672
+ },
1673
+ {
1674
+ "epoch": 2.172417182738622,
1675
+ "grad_norm": 0.048461370170116425,
1676
+ "learning_rate": 1.532748388975862e-05,
1677
+ "loss": 0.002,
1678
+ "step": 22100
1679
+ },
1680
+ {
1681
+ "epoch": 2.182247124741964,
1682
+ "grad_norm": 0.33116135001182556,
1683
+ "learning_rate": 1.5145447263989515e-05,
1684
+ "loss": 0.0022,
1685
+ "step": 22200
1686
+ },
1687
+ {
1688
+ "epoch": 2.192077066745306,
1689
+ "grad_norm": 0.2750576436519623,
1690
+ "learning_rate": 1.496341063822041e-05,
1691
+ "loss": 0.0019,
1692
+ "step": 22300
1693
+ },
1694
+ {
1695
+ "epoch": 2.2019070087486483,
1696
+ "grad_norm": 0.05528819188475609,
1697
+ "learning_rate": 1.4781374012451307e-05,
1698
+ "loss": 0.0023,
1699
+ "step": 22400
1700
+ },
1701
+ {
1702
+ "epoch": 2.2117369507519906,
1703
+ "grad_norm": 0.03077654168009758,
1704
+ "learning_rate": 1.4599337386682201e-05,
1705
+ "loss": 0.0019,
1706
+ "step": 22500
1707
+ },
1708
+ {
1709
+ "epoch": 2.221566892755333,
1710
+ "grad_norm": 0.06391356140375137,
1711
+ "learning_rate": 1.4417300760913097e-05,
1712
+ "loss": 0.0023,
1713
+ "step": 22600
1714
+ },
1715
+ {
1716
+ "epoch": 2.231396834758675,
1717
+ "grad_norm": 0.07958533614873886,
1718
+ "learning_rate": 1.4235264135143992e-05,
1719
+ "loss": 0.0019,
1720
+ "step": 22700
1721
+ },
1722
+ {
1723
+ "epoch": 2.241226776762017,
1724
+ "grad_norm": 0.007806443143635988,
1725
+ "learning_rate": 1.4053227509374886e-05,
1726
+ "loss": 0.002,
1727
+ "step": 22800
1728
+ },
1729
+ {
1730
+ "epoch": 2.2510567187653594,
1731
+ "grad_norm": 0.05843871831893921,
1732
+ "learning_rate": 1.3871190883605781e-05,
1733
+ "loss": 0.0019,
1734
+ "step": 22900
1735
+ },
1736
+ {
1737
+ "epoch": 2.2608866607687013,
1738
+ "grad_norm": 0.10456466674804688,
1739
+ "learning_rate": 1.3689154257836679e-05,
1740
+ "loss": 0.0025,
1741
+ "step": 23000
1742
+ },
1743
+ {
1744
+ "epoch": 2.2707166027720436,
1745
+ "grad_norm": 0.10085894912481308,
1746
+ "learning_rate": 1.3507117632067572e-05,
1747
+ "loss": 0.0022,
1748
+ "step": 23100
1749
+ },
1750
+ {
1751
+ "epoch": 2.280546544775386,
1752
+ "grad_norm": 0.04934035614132881,
1753
+ "learning_rate": 1.3325081006298468e-05,
1754
+ "loss": 0.002,
1755
+ "step": 23200
1756
+ },
1757
+ {
1758
+ "epoch": 2.290376486778728,
1759
+ "grad_norm": 0.2595049738883972,
1760
+ "learning_rate": 1.3143044380529362e-05,
1761
+ "loss": 0.0022,
1762
+ "step": 23300
1763
+ },
1764
+ {
1765
+ "epoch": 2.30020642878207,
1766
+ "grad_norm": 0.1089097335934639,
1767
+ "learning_rate": 1.2961007754760257e-05,
1768
+ "loss": 0.0016,
1769
+ "step": 23400
1770
+ },
1771
+ {
1772
+ "epoch": 2.3100363707854124,
1773
+ "grad_norm": 0.13112761080265045,
1774
+ "learning_rate": 1.2778971128991154e-05,
1775
+ "loss": 0.002,
1776
+ "step": 23500
1777
+ },
1778
+ {
1779
+ "epoch": 2.3198663127887547,
1780
+ "grad_norm": 0.09164682775735855,
1781
+ "learning_rate": 1.259693450322205e-05,
1782
+ "loss": 0.0017,
1783
+ "step": 23600
1784
+ },
1785
+ {
1786
+ "epoch": 2.329696254792097,
1787
+ "grad_norm": 0.008273393847048283,
1788
+ "learning_rate": 1.2414897877452943e-05,
1789
+ "loss": 0.0022,
1790
+ "step": 23700
1791
+ },
1792
+ {
1793
+ "epoch": 2.339526196795439,
1794
+ "grad_norm": 0.031804751604795456,
1795
+ "learning_rate": 1.223286125168384e-05,
1796
+ "loss": 0.0019,
1797
+ "step": 23800
1798
+ },
1799
+ {
1800
+ "epoch": 2.349356138798781,
1801
+ "grad_norm": 0.02527899481356144,
1802
+ "learning_rate": 1.2050824625914734e-05,
1803
+ "loss": 0.0022,
1804
+ "step": 23900
1805
+ },
1806
+ {
1807
+ "epoch": 2.3591860808021234,
1808
+ "grad_norm": 0.10769706964492798,
1809
+ "learning_rate": 1.186878800014563e-05,
1810
+ "loss": 0.0018,
1811
+ "step": 24000
1812
+ },
1813
+ {
1814
+ "epoch": 2.3591860808021234,
1815
+ "eval_f1": 0.9163237772142261,
1816
+ "eval_loss": 0.018079889938235283,
1817
+ "eval_precision": 0.9111769224774269,
1818
+ "eval_recall": 0.9215291070825358,
1819
+ "eval_runtime": 368.2948,
1820
+ "eval_samples_per_second": 220.962,
1821
+ "eval_steps_per_second": 3.454,
1822
+ "step": 24000
1823
+ },
1824
+ {
1825
+ "epoch": 2.3690160228054653,
1826
+ "grad_norm": 0.05162603408098221,
1827
+ "learning_rate": 1.1686751374376525e-05,
1828
+ "loss": 0.0017,
1829
+ "step": 24100
1830
+ },
1831
+ {
1832
+ "epoch": 2.3788459648088076,
1833
+ "grad_norm": 0.014332090504467487,
1834
+ "learning_rate": 1.150471474860742e-05,
1835
+ "loss": 0.0018,
1836
+ "step": 24200
1837
+ },
1838
+ {
1839
+ "epoch": 2.38867590681215,
1840
+ "grad_norm": 0.03358616307377815,
1841
+ "learning_rate": 1.1322678122838315e-05,
1842
+ "loss": 0.002,
1843
+ "step": 24300
1844
+ },
1845
+ {
1846
+ "epoch": 2.398505848815492,
1847
+ "grad_norm": 0.01889238879084587,
1848
+ "learning_rate": 1.1140641497069212e-05,
1849
+ "loss": 0.0017,
1850
+ "step": 24400
1851
+ },
1852
+ {
1853
+ "epoch": 2.408335790818834,
1854
+ "grad_norm": 0.04304986447095871,
1855
+ "learning_rate": 1.0958604871300105e-05,
1856
+ "loss": 0.0021,
1857
+ "step": 24500
1858
+ },
1859
+ {
1860
+ "epoch": 2.4181657328221764,
1861
+ "grad_norm": 0.04633597284555435,
1862
+ "learning_rate": 1.0776568245531003e-05,
1863
+ "loss": 0.0021,
1864
+ "step": 24600
1865
+ },
1866
+ {
1867
+ "epoch": 2.4279956748255187,
1868
+ "grad_norm": 0.08608473092317581,
1869
+ "learning_rate": 1.0594531619761896e-05,
1870
+ "loss": 0.0019,
1871
+ "step": 24700
1872
+ },
1873
+ {
1874
+ "epoch": 2.4378256168288606,
1875
+ "grad_norm": 0.0008240310125984251,
1876
+ "learning_rate": 1.0412494993992792e-05,
1877
+ "loss": 0.002,
1878
+ "step": 24800
1879
+ },
1880
+ {
1881
+ "epoch": 2.447655558832203,
1882
+ "grad_norm": 0.033123135566711426,
1883
+ "learning_rate": 1.0230458368223687e-05,
1884
+ "loss": 0.0014,
1885
+ "step": 24900
1886
+ },
1887
+ {
1888
+ "epoch": 2.457485500835545,
1889
+ "grad_norm": 0.07832614332437515,
1890
+ "learning_rate": 1.0048421742454583e-05,
1891
+ "loss": 0.0019,
1892
+ "step": 25000
1893
+ },
1894
+ {
1895
+ "epoch": 2.4673154428388875,
1896
+ "grad_norm": 0.13727368414402008,
1897
+ "learning_rate": 9.866385116685478e-06,
1898
+ "loss": 0.0023,
1899
+ "step": 25100
1900
+ },
1901
+ {
1902
+ "epoch": 2.4771453848422293,
1903
+ "grad_norm": 0.013529472053050995,
1904
+ "learning_rate": 9.684348490916372e-06,
1905
+ "loss": 0.0016,
1906
+ "step": 25200
1907
+ },
1908
+ {
1909
+ "epoch": 2.4869753268455717,
1910
+ "grad_norm": 0.09583965688943863,
1911
+ "learning_rate": 9.502311865147267e-06,
1912
+ "loss": 0.0017,
1913
+ "step": 25300
1914
+ },
1915
+ {
1916
+ "epoch": 2.496805268848914,
1917
+ "grad_norm": 0.10004164278507233,
1918
+ "learning_rate": 9.320275239378163e-06,
1919
+ "loss": 0.0021,
1920
+ "step": 25400
1921
+ },
1922
+ {
1923
+ "epoch": 2.506635210852256,
1924
+ "grad_norm": 0.13714532554149628,
1925
+ "learning_rate": 9.138238613609058e-06,
1926
+ "loss": 0.0017,
1927
+ "step": 25500
1928
+ },
1929
+ {
1930
+ "epoch": 2.516465152855598,
1931
+ "grad_norm": 0.060430146753787994,
1932
+ "learning_rate": 8.956201987839954e-06,
1933
+ "loss": 0.002,
1934
+ "step": 25600
1935
+ },
1936
+ {
1937
+ "epoch": 2.5262950948589404,
1938
+ "grad_norm": 0.11850597709417343,
1939
+ "learning_rate": 8.77416536207085e-06,
1940
+ "loss": 0.0018,
1941
+ "step": 25700
1942
+ },
1943
+ {
1944
+ "epoch": 2.5361250368622823,
1945
+ "grad_norm": 0.05095691233873367,
1946
+ "learning_rate": 8.592128736301743e-06,
1947
+ "loss": 0.0016,
1948
+ "step": 25800
1949
+ },
1950
+ {
1951
+ "epoch": 2.5459549788656246,
1952
+ "grad_norm": 0.22412051260471344,
1953
+ "learning_rate": 8.41009211053264e-06,
1954
+ "loss": 0.0017,
1955
+ "step": 25900
1956
+ },
1957
+ {
1958
+ "epoch": 2.555784920868967,
1959
+ "grad_norm": 0.06112053617835045,
1960
+ "learning_rate": 8.228055484763534e-06,
1961
+ "loss": 0.0021,
1962
+ "step": 26000
1963
+ },
1964
+ {
1965
+ "epoch": 2.555784920868967,
1966
+ "eval_f1": 0.917086342018139,
1967
+ "eval_loss": 0.018284747377038002,
1968
+ "eval_precision": 0.9125316328691988,
1969
+ "eval_recall": 0.9216867469879518,
1970
+ "eval_runtime": 367.7346,
1971
+ "eval_samples_per_second": 221.298,
1972
+ "eval_steps_per_second": 3.459,
1973
+ "step": 26000
1974
+ },
1975
+ {
1976
+ "epoch": 2.5656148628723088,
1977
+ "grad_norm": 0.04585032910108566,
1978
+ "learning_rate": 8.046018858994431e-06,
1979
+ "loss": 0.0015,
1980
+ "step": 26100
1981
+ },
1982
+ {
1983
+ "epoch": 2.575444804875651,
1984
+ "grad_norm": 0.023976296186447144,
1985
+ "learning_rate": 7.863982233225325e-06,
1986
+ "loss": 0.0019,
1987
+ "step": 26200
1988
+ },
1989
+ {
1990
+ "epoch": 2.5852747468789934,
1991
+ "grad_norm": 0.12570028007030487,
1992
+ "learning_rate": 7.68194560745622e-06,
1993
+ "loss": 0.0019,
1994
+ "step": 26300
1995
+ },
1996
+ {
1997
+ "epoch": 2.5951046888823357,
1998
+ "grad_norm": 0.11105850338935852,
1999
+ "learning_rate": 7.499908981687116e-06,
2000
+ "loss": 0.0016,
2001
+ "step": 26400
2002
+ },
2003
+ {
2004
+ "epoch": 2.604934630885678,
2005
+ "grad_norm": 0.03623613342642784,
2006
+ "learning_rate": 7.3178723559180105e-06,
2007
+ "loss": 0.0015,
2008
+ "step": 26500
2009
+ },
2010
+ {
2011
+ "epoch": 2.61476457288902,
2012
+ "grad_norm": 0.018633360043168068,
2013
+ "learning_rate": 7.135835730148906e-06,
2014
+ "loss": 0.0018,
2015
+ "step": 26600
2016
+ },
2017
+ {
2018
+ "epoch": 2.624594514892362,
2019
+ "grad_norm": 0.10424701869487762,
2020
+ "learning_rate": 6.9537991043798015e-06,
2021
+ "loss": 0.0018,
2022
+ "step": 26700
2023
+ },
2024
+ {
2025
+ "epoch": 2.6344244568957045,
2026
+ "grad_norm": 0.0371340848505497,
2027
+ "learning_rate": 6.771762478610696e-06,
2028
+ "loss": 0.0017,
2029
+ "step": 26800
2030
+ },
2031
+ {
2032
+ "epoch": 2.6442543988990463,
2033
+ "grad_norm": 0.07623058557510376,
2034
+ "learning_rate": 6.589725852841592e-06,
2035
+ "loss": 0.002,
2036
+ "step": 26900
2037
+ },
2038
+ {
2039
+ "epoch": 2.6540843409023887,
2040
+ "grad_norm": 0.051303476095199585,
2041
+ "learning_rate": 6.407689227072487e-06,
2042
+ "loss": 0.0016,
2043
+ "step": 27000
2044
+ },
2045
+ {
2046
+ "epoch": 2.663914282905731,
2047
+ "grad_norm": 0.02509203553199768,
2048
+ "learning_rate": 6.2256526013033825e-06,
2049
+ "loss": 0.0016,
2050
+ "step": 27100
2051
+ },
2052
+ {
2053
+ "epoch": 2.673744224909073,
2054
+ "grad_norm": 0.04684291034936905,
2055
+ "learning_rate": 6.043615975534278e-06,
2056
+ "loss": 0.0017,
2057
+ "step": 27200
2058
+ },
2059
+ {
2060
+ "epoch": 2.683574166912415,
2061
+ "grad_norm": 0.03077726438641548,
2062
+ "learning_rate": 5.861579349765173e-06,
2063
+ "loss": 0.0017,
2064
+ "step": 27300
2065
+ },
2066
+ {
2067
+ "epoch": 2.6934041089157574,
2068
+ "grad_norm": 0.056035276502370834,
2069
+ "learning_rate": 5.679542723996068e-06,
2070
+ "loss": 0.0019,
2071
+ "step": 27400
2072
+ },
2073
+ {
2074
+ "epoch": 2.7032340509190997,
2075
+ "grad_norm": 0.04686987027525902,
2076
+ "learning_rate": 5.4975060982269635e-06,
2077
+ "loss": 0.0017,
2078
+ "step": 27500
2079
+ },
2080
+ {
2081
+ "epoch": 2.7130639929224416,
2082
+ "grad_norm": 0.022549783810973167,
2083
+ "learning_rate": 5.315469472457859e-06,
2084
+ "loss": 0.0019,
2085
+ "step": 27600
2086
+ },
2087
+ {
2088
+ "epoch": 2.722893934925784,
2089
+ "grad_norm": 0.055259574204683304,
2090
+ "learning_rate": 5.1334328466887544e-06,
2091
+ "loss": 0.0016,
2092
+ "step": 27700
2093
+ },
2094
+ {
2095
+ "epoch": 2.732723876929126,
2096
+ "grad_norm": 0.055192168802022934,
2097
+ "learning_rate": 4.951396220919649e-06,
2098
+ "loss": 0.0015,
2099
+ "step": 27800
2100
+ },
2101
+ {
2102
+ "epoch": 2.7425538189324685,
2103
+ "grad_norm": 0.2088267058134079,
2104
+ "learning_rate": 4.7693595951505445e-06,
2105
+ "loss": 0.0018,
2106
+ "step": 27900
2107
+ },
2108
+ {
2109
+ "epoch": 2.7523837609358104,
2110
+ "grad_norm": 0.10806486010551453,
2111
+ "learning_rate": 4.58732296938144e-06,
2112
+ "loss": 0.0018,
2113
+ "step": 28000
2114
+ },
2115
+ {
2116
+ "epoch": 2.7523837609358104,
2117
+ "eval_f1": 0.9180684275996548,
2118
+ "eval_loss": 0.01810205541551113,
2119
+ "eval_precision": 0.9137369501204604,
2120
+ "eval_recall": 0.9224411665353001,
2121
+ "eval_runtime": 367.586,
2122
+ "eval_samples_per_second": 221.388,
2123
+ "eval_steps_per_second": 3.46,
2124
+ "step": 28000
2125
+ },
2126
+ {
2127
+ "epoch": 2.7622137029391527,
2128
+ "grad_norm": 0.04833536595106125,
2129
+ "learning_rate": 4.4052863436123355e-06,
2130
+ "loss": 0.0019,
2131
+ "step": 28100
2132
+ },
2133
+ {
2134
+ "epoch": 2.772043644942495,
2135
+ "grad_norm": 0.08527988195419312,
2136
+ "learning_rate": 4.22324971784323e-06,
2137
+ "loss": 0.0015,
2138
+ "step": 28200
2139
+ },
2140
+ {
2141
+ "epoch": 2.781873586945837,
2142
+ "grad_norm": 0.054375261068344116,
2143
+ "learning_rate": 4.0412130920741256e-06,
2144
+ "loss": 0.0015,
2145
+ "step": 28300
2146
+ },
2147
+ {
2148
+ "epoch": 2.791703528949179,
2149
+ "grad_norm": 0.013773391023278236,
2150
+ "learning_rate": 3.859176466305021e-06,
2151
+ "loss": 0.0019,
2152
+ "step": 28400
2153
+ },
2154
+ {
2155
+ "epoch": 2.8015334709525215,
2156
+ "grad_norm": 0.0823429673910141,
2157
+ "learning_rate": 3.677139840535916e-06,
2158
+ "loss": 0.0019,
2159
+ "step": 28500
2160
+ },
2161
+ {
2162
+ "epoch": 2.8113634129558633,
2163
+ "grad_norm": 0.01534045860171318,
2164
+ "learning_rate": 3.4951032147668115e-06,
2165
+ "loss": 0.0019,
2166
+ "step": 28600
2167
+ },
2168
+ {
2169
+ "epoch": 2.8211933549592056,
2170
+ "grad_norm": 0.06509745121002197,
2171
+ "learning_rate": 3.313066588997706e-06,
2172
+ "loss": 0.0015,
2173
+ "step": 28700
2174
+ },
2175
+ {
2176
+ "epoch": 2.831023296962548,
2177
+ "grad_norm": 0.022973215207457542,
2178
+ "learning_rate": 3.1310299632286016e-06,
2179
+ "loss": 0.0018,
2180
+ "step": 28800
2181
+ },
2182
+ {
2183
+ "epoch": 2.8408532389658903,
2184
+ "grad_norm": 0.058371126651763916,
2185
+ "learning_rate": 2.948993337459497e-06,
2186
+ "loss": 0.0015,
2187
+ "step": 28900
2188
+ },
2189
+ {
2190
+ "epoch": 2.850683180969232,
2191
+ "grad_norm": 0.20555707812309265,
2192
+ "learning_rate": 2.766956711690392e-06,
2193
+ "loss": 0.0016,
2194
+ "step": 29000
2195
+ },
2196
+ {
2197
+ "epoch": 2.8605131229725744,
2198
+ "grad_norm": 0.08409526199102402,
2199
+ "learning_rate": 2.5849200859212876e-06,
2200
+ "loss": 0.0016,
2201
+ "step": 29100
2202
+ },
2203
+ {
2204
+ "epoch": 2.8703430649759167,
2205
+ "grad_norm": 0.014734131284058094,
2206
+ "learning_rate": 2.402883460152183e-06,
2207
+ "loss": 0.0016,
2208
+ "step": 29200
2209
+ },
2210
+ {
2211
+ "epoch": 2.880173006979259,
2212
+ "grad_norm": 0.007813429459929466,
2213
+ "learning_rate": 2.220846834383078e-06,
2214
+ "loss": 0.0016,
2215
+ "step": 29300
2216
+ },
2217
+ {
2218
+ "epoch": 2.890002948982601,
2219
+ "grad_norm": 0.03011438436806202,
2220
+ "learning_rate": 2.038810208613973e-06,
2221
+ "loss": 0.0015,
2222
+ "step": 29400
2223
+ },
2224
+ {
2225
+ "epoch": 2.899832890985943,
2226
+ "grad_norm": 0.08900994062423706,
2227
+ "learning_rate": 1.8567735828448684e-06,
2228
+ "loss": 0.0019,
2229
+ "step": 29500
2230
+ },
2231
+ {
2232
+ "epoch": 2.9096628329892855,
2233
+ "grad_norm": 0.04198099300265312,
2234
+ "learning_rate": 1.6747369570757639e-06,
2235
+ "loss": 0.0017,
2236
+ "step": 29600
2237
+ },
2238
+ {
2239
+ "epoch": 2.9194927749926274,
2240
+ "grad_norm": 0.054798416793346405,
2241
+ "learning_rate": 1.4927003313066591e-06,
2242
+ "loss": 0.0016,
2243
+ "step": 29700
2244
+ },
2245
+ {
2246
+ "epoch": 2.9293227169959697,
2247
+ "grad_norm": 0.05114193260669708,
2248
+ "learning_rate": 1.3106637055375542e-06,
2249
+ "loss": 0.0016,
2250
+ "step": 29800
2251
+ },
2252
+ {
2253
+ "epoch": 2.939152658999312,
2254
+ "grad_norm": 0.08578815311193466,
2255
+ "learning_rate": 1.1286270797684494e-06,
2256
+ "loss": 0.0014,
2257
+ "step": 29900
2258
+ },
2259
+ {
2260
+ "epoch": 2.948982601002654,
2261
+ "grad_norm": 0.04622345417737961,
2262
+ "learning_rate": 9.465904539993447e-07,
2263
+ "loss": 0.0016,
2264
+ "step": 30000
2265
+ },
2266
+ {
2267
+ "epoch": 2.948982601002654,
2268
+ "eval_f1": 0.919165725910112,
2269
+ "eval_loss": 0.01758442632853985,
2270
+ "eval_precision": 0.9124650481558741,
2271
+ "eval_recall": 0.9259655444206734,
2272
+ "eval_runtime": 368.0845,
2273
+ "eval_samples_per_second": 221.088,
2274
+ "eval_steps_per_second": 3.456,
2275
+ "step": 30000
2276
+ }
2277
+ ],
2278
+ "logging_steps": 100,
2279
+ "max_steps": 30519,
2280
+ "num_input_tokens_seen": 0,
2281
+ "num_train_epochs": 3,
2282
+ "save_steps": 2000,
2283
+ "stateful_callbacks": {
2284
+ "TrainerControl": {
2285
+ "args": {
2286
+ "should_epoch_stop": false,
2287
+ "should_evaluate": false,
2288
+ "should_log": false,
2289
+ "should_save": true,
2290
+ "should_training_stop": false
2291
+ },
2292
+ "attributes": {}
2293
+ }
2294
+ },
2295
+ "total_flos": 3.271894202330419e+17,
2296
+ "train_batch_size": 32,
2297
+ "trial_name": null,
2298
+ "trial_params": null
2299
+ }
checkpoint-30000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c990fe4db5e0a76e772957870bc32e02469c1885a567fa0e531a4704336cc133
3
+ size 5841
checkpoint-30519/config.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ModernBertForTokenClassification"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "classifier_activation": "gelu",
9
+ "classifier_bias": false,
10
+ "classifier_dropout": 0.0,
11
+ "classifier_pooling": "mean",
12
+ "cls_token_id": 50281,
13
+ "decoder_bias": true,
14
+ "deterministic_flash_attn": false,
15
+ "dtype": "float32",
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": null,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 768,
23
+ "id2label": {
24
+ "0": "O",
25
+ "1": "B-ACCOUNTNUM",
26
+ "2": "I-ACCOUNTNUM",
27
+ "3": "B-BUILDINGNUM",
28
+ "4": "I-BUILDINGNUM",
29
+ "5": "B-CITY",
30
+ "6": "I-CITY",
31
+ "7": "B-CREDITCARDNUMBER",
32
+ "8": "I-CREDITCARDNUMBER",
33
+ "9": "B-DATEOFBIRTH",
34
+ "10": "I-DATEOFBIRTH",
35
+ "11": "B-DRIVERLICENSENUM",
36
+ "12": "I-DRIVERLICENSENUM",
37
+ "13": "B-EMAIL",
38
+ "14": "I-EMAIL",
39
+ "15": "B-GIVENNAME",
40
+ "16": "I-GIVENNAME",
41
+ "17": "B-IDCARDNUM",
42
+ "18": "I-IDCARDNUM",
43
+ "19": "B-PASSWORD",
44
+ "20": "I-PASSWORD",
45
+ "21": "B-SOCIALNUM",
46
+ "22": "I-SOCIALNUM",
47
+ "23": "B-STREET",
48
+ "24": "I-STREET",
49
+ "25": "B-SURNAME",
50
+ "26": "I-SURNAME",
51
+ "27": "B-TAXNUM",
52
+ "28": "I-TAXNUM",
53
+ "29": "B-TELEPHONENUM",
54
+ "30": "I-TELEPHONENUM",
55
+ "31": "B-USERNAME",
56
+ "32": "I-USERNAME",
57
+ "33": "B-ZIPCODE",
58
+ "34": "I-ZIPCODE"
59
+ },
60
+ "initializer_cutoff_factor": 2.0,
61
+ "initializer_range": 0.02,
62
+ "intermediate_size": 1152,
63
+ "label2id": {
64
+ "B-ACCOUNTNUM": 1,
65
+ "B-BUILDINGNUM": 3,
66
+ "B-CITY": 5,
67
+ "B-CREDITCARDNUMBER": 7,
68
+ "B-DATEOFBIRTH": 9,
69
+ "B-DRIVERLICENSENUM": 11,
70
+ "B-EMAIL": 13,
71
+ "B-GIVENNAME": 15,
72
+ "B-IDCARDNUM": 17,
73
+ "B-PASSWORD": 19,
74
+ "B-SOCIALNUM": 21,
75
+ "B-STREET": 23,
76
+ "B-SURNAME": 25,
77
+ "B-TAXNUM": 27,
78
+ "B-TELEPHONENUM": 29,
79
+ "B-USERNAME": 31,
80
+ "B-ZIPCODE": 33,
81
+ "I-ACCOUNTNUM": 2,
82
+ "I-BUILDINGNUM": 4,
83
+ "I-CITY": 6,
84
+ "I-CREDITCARDNUMBER": 8,
85
+ "I-DATEOFBIRTH": 10,
86
+ "I-DRIVERLICENSENUM": 12,
87
+ "I-EMAIL": 14,
88
+ "I-GIVENNAME": 16,
89
+ "I-IDCARDNUM": 18,
90
+ "I-PASSWORD": 20,
91
+ "I-SOCIALNUM": 22,
92
+ "I-STREET": 24,
93
+ "I-SURNAME": 26,
94
+ "I-TAXNUM": 28,
95
+ "I-TELEPHONENUM": 30,
96
+ "I-USERNAME": 32,
97
+ "I-ZIPCODE": 34,
98
+ "O": 0
99
+ },
100
+ "layer_norm_eps": 1e-05,
101
+ "local_attention": 128,
102
+ "local_rope_theta": 10000.0,
103
+ "max_position_embeddings": 8192,
104
+ "mlp_bias": false,
105
+ "mlp_dropout": 0.0,
106
+ "model_type": "modernbert",
107
+ "norm_bias": false,
108
+ "norm_eps": 1e-05,
109
+ "num_attention_heads": 12,
110
+ "num_hidden_layers": 22,
111
+ "pad_token_id": 50283,
112
+ "position_embedding_type": "absolute",
113
+ "repad_logits_with_grad": false,
114
+ "sep_token_id": 50282,
115
+ "sparse_pred_ignore_index": -100,
116
+ "sparse_prediction": false,
117
+ "transformers_version": "4.57.3",
118
+ "vocab_size": 50368
119
+ }
checkpoint-30519/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b390da162f442b9b120f0ae991ee3d779aae393107e3fe0ee4968e5d0bdb0cd0
3
+ size 598541300
checkpoint-30519/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:589ece37adb6ceef7ca8c5215392439df40e8b985468b944578566c9c5b3fb6b
3
+ size 1197172811
checkpoint-30519/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9445552595536daf5bd8731be4eabb308bd26e76a3f4f0c20c4aa55fcf9ea202
3
+ size 14645
checkpoint-30519/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b312b1b11500133516c9ac9ed975fa2b31dded349db0f2b64c7da0204382d41
3
+ size 1383
checkpoint-30519/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62fbe87fcbe41e5ae93ec670d4d5c2ff0afe0cfbfbc84a2021ff26d32277b5cc
3
+ size 1465
checkpoint-30519/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": true,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
checkpoint-30519/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-30519/tokenizer_config.json ADDED
@@ -0,0 +1,945 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "|||IP_ADDRESS|||",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "1": {
12
+ "content": "<|padding|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "50254": {
20
+ "content": " ",
21
+ "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "50255": {
28
+ "content": " ",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "50256": {
36
+ "content": " ",
37
+ "lstrip": false,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": false
42
+ },
43
+ "50257": {
44
+ "content": " ",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "50258": {
52
+ "content": " ",
53
+ "lstrip": false,
54
+ "normalized": true,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "50259": {
60
+ "content": " ",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": false
66
+ },
67
+ "50260": {
68
+ "content": " ",
69
+ "lstrip": false,
70
+ "normalized": true,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "50261": {
76
+ "content": " ",
77
+ "lstrip": false,
78
+ "normalized": true,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "50262": {
84
+ "content": " ",
85
+ "lstrip": false,
86
+ "normalized": true,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": false
90
+ },
91
+ "50263": {
92
+ "content": " ",
93
+ "lstrip": false,
94
+ "normalized": true,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
+ "50264": {
100
+ "content": " ",
101
+ "lstrip": false,
102
+ "normalized": true,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": false
106
+ },
107
+ "50265": {
108
+ "content": " ",
109
+ "lstrip": false,
110
+ "normalized": true,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "50266": {
116
+ "content": " ",
117
+ "lstrip": false,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": false
122
+ },
123
+ "50267": {
124
+ "content": " ",
125
+ "lstrip": false,
126
+ "normalized": true,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "50268": {
132
+ "content": " ",
133
+ "lstrip": false,
134
+ "normalized": true,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "50269": {
140
+ "content": " ",
141
+ "lstrip": false,
142
+ "normalized": true,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "50270": {
148
+ "content": " ",
149
+ "lstrip": false,
150
+ "normalized": true,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
+ },
155
+ "50271": {
156
+ "content": " ",
157
+ "lstrip": false,
158
+ "normalized": true,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": false
162
+ },
163
+ "50272": {
164
+ "content": " ",
165
+ "lstrip": false,
166
+ "normalized": true,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": false
170
+ },
171
+ "50273": {
172
+ "content": " ",
173
+ "lstrip": false,
174
+ "normalized": true,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": false
178
+ },
179
+ "50274": {
180
+ "content": " ",
181
+ "lstrip": false,
182
+ "normalized": true,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": false
186
+ },
187
+ "50275": {
188
+ "content": " ",
189
+ "lstrip": false,
190
+ "normalized": true,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": false
194
+ },
195
+ "50276": {
196
+ "content": " ",
197
+ "lstrip": false,
198
+ "normalized": true,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": false
202
+ },
203
+ "50277": {
204
+ "content": "|||EMAIL_ADDRESS|||",
205
+ "lstrip": false,
206
+ "normalized": true,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": false
210
+ },
211
+ "50278": {
212
+ "content": "|||PHONE_NUMBER|||",
213
+ "lstrip": false,
214
+ "normalized": true,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": false
218
+ },
219
+ "50279": {
220
+ "content": "<|endoftext|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "50280": {
228
+ "content": "[UNK]",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "50281": {
236
+ "content": "[CLS]",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "50282": {
244
+ "content": "[SEP]",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "50283": {
252
+ "content": "[PAD]",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "50284": {
260
+ "content": "[MASK]",
261
+ "lstrip": true,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "50285": {
268
+ "content": "[unused0]",
269
+ "lstrip": false,
270
+ "normalized": true,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": false
274
+ },
275
+ "50286": {
276
+ "content": "[unused1]",
277
+ "lstrip": false,
278
+ "normalized": true,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": false
282
+ },
283
+ "50287": {
284
+ "content": "[unused2]",
285
+ "lstrip": false,
286
+ "normalized": true,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": false
290
+ },
291
+ "50288": {
292
+ "content": "[unused3]",
293
+ "lstrip": false,
294
+ "normalized": true,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": false
298
+ },
299
+ "50289": {
300
+ "content": "[unused4]",
301
+ "lstrip": false,
302
+ "normalized": true,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": false
306
+ },
307
+ "50290": {
308
+ "content": "[unused5]",
309
+ "lstrip": false,
310
+ "normalized": true,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": false
314
+ },
315
+ "50291": {
316
+ "content": "[unused6]",
317
+ "lstrip": false,
318
+ "normalized": true,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": false
322
+ },
323
+ "50292": {
324
+ "content": "[unused7]",
325
+ "lstrip": false,
326
+ "normalized": true,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": false
330
+ },
331
+ "50293": {
332
+ "content": "[unused8]",
333
+ "lstrip": false,
334
+ "normalized": true,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": false
338
+ },
339
+ "50294": {
340
+ "content": "[unused9]",
341
+ "lstrip": false,
342
+ "normalized": true,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": false
346
+ },
347
+ "50295": {
348
+ "content": "[unused10]",
349
+ "lstrip": false,
350
+ "normalized": true,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": false
354
+ },
355
+ "50296": {
356
+ "content": "[unused11]",
357
+ "lstrip": false,
358
+ "normalized": true,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": false
362
+ },
363
+ "50297": {
364
+ "content": "[unused12]",
365
+ "lstrip": false,
366
+ "normalized": true,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": false
370
+ },
371
+ "50298": {
372
+ "content": "[unused13]",
373
+ "lstrip": false,
374
+ "normalized": true,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": false
378
+ },
379
+ "50299": {
380
+ "content": "[unused14]",
381
+ "lstrip": false,
382
+ "normalized": true,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": false
386
+ },
387
+ "50300": {
388
+ "content": "[unused15]",
389
+ "lstrip": false,
390
+ "normalized": true,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": false
394
+ },
395
+ "50301": {
396
+ "content": "[unused16]",
397
+ "lstrip": false,
398
+ "normalized": true,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": false
402
+ },
403
+ "50302": {
404
+ "content": "[unused17]",
405
+ "lstrip": false,
406
+ "normalized": true,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": false
410
+ },
411
+ "50303": {
412
+ "content": "[unused18]",
413
+ "lstrip": false,
414
+ "normalized": true,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": false
418
+ },
419
+ "50304": {
420
+ "content": "[unused19]",
421
+ "lstrip": false,
422
+ "normalized": true,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": false
426
+ },
427
+ "50305": {
428
+ "content": "[unused20]",
429
+ "lstrip": false,
430
+ "normalized": true,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": false
434
+ },
435
+ "50306": {
436
+ "content": "[unused21]",
437
+ "lstrip": false,
438
+ "normalized": true,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": false
442
+ },
443
+ "50307": {
444
+ "content": "[unused22]",
445
+ "lstrip": false,
446
+ "normalized": true,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": false
450
+ },
451
+ "50308": {
452
+ "content": "[unused23]",
453
+ "lstrip": false,
454
+ "normalized": true,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": false
458
+ },
459
+ "50309": {
460
+ "content": "[unused24]",
461
+ "lstrip": false,
462
+ "normalized": true,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": false
466
+ },
467
+ "50310": {
468
+ "content": "[unused25]",
469
+ "lstrip": false,
470
+ "normalized": true,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": false
474
+ },
475
+ "50311": {
476
+ "content": "[unused26]",
477
+ "lstrip": false,
478
+ "normalized": true,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": false
482
+ },
483
+ "50312": {
484
+ "content": "[unused27]",
485
+ "lstrip": false,
486
+ "normalized": true,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": false
490
+ },
491
+ "50313": {
492
+ "content": "[unused28]",
493
+ "lstrip": false,
494
+ "normalized": true,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": false
498
+ },
499
+ "50314": {
500
+ "content": "[unused29]",
501
+ "lstrip": false,
502
+ "normalized": true,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": false
506
+ },
507
+ "50315": {
508
+ "content": "[unused30]",
509
+ "lstrip": false,
510
+ "normalized": true,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": false
514
+ },
515
+ "50316": {
516
+ "content": "[unused31]",
517
+ "lstrip": false,
518
+ "normalized": true,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": false
522
+ },
523
+ "50317": {
524
+ "content": "[unused32]",
525
+ "lstrip": false,
526
+ "normalized": true,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": false
530
+ },
531
+ "50318": {
532
+ "content": "[unused33]",
533
+ "lstrip": false,
534
+ "normalized": true,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": false
538
+ },
539
+ "50319": {
540
+ "content": "[unused34]",
541
+ "lstrip": false,
542
+ "normalized": true,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": false
546
+ },
547
+ "50320": {
548
+ "content": "[unused35]",
549
+ "lstrip": false,
550
+ "normalized": true,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": false
554
+ },
555
+ "50321": {
556
+ "content": "[unused36]",
557
+ "lstrip": false,
558
+ "normalized": true,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": false
562
+ },
563
+ "50322": {
564
+ "content": "[unused37]",
565
+ "lstrip": false,
566
+ "normalized": true,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": false
570
+ },
571
+ "50323": {
572
+ "content": "[unused38]",
573
+ "lstrip": false,
574
+ "normalized": true,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": false
578
+ },
579
+ "50324": {
580
+ "content": "[unused39]",
581
+ "lstrip": false,
582
+ "normalized": true,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": false
586
+ },
587
+ "50325": {
588
+ "content": "[unused40]",
589
+ "lstrip": false,
590
+ "normalized": true,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": false
594
+ },
595
+ "50326": {
596
+ "content": "[unused41]",
597
+ "lstrip": false,
598
+ "normalized": true,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": false
602
+ },
603
+ "50327": {
604
+ "content": "[unused42]",
605
+ "lstrip": false,
606
+ "normalized": true,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": false
610
+ },
611
+ "50328": {
612
+ "content": "[unused43]",
613
+ "lstrip": false,
614
+ "normalized": true,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": false
618
+ },
619
+ "50329": {
620
+ "content": "[unused44]",
621
+ "lstrip": false,
622
+ "normalized": true,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": false
626
+ },
627
+ "50330": {
628
+ "content": "[unused45]",
629
+ "lstrip": false,
630
+ "normalized": true,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": false
634
+ },
635
+ "50331": {
636
+ "content": "[unused46]",
637
+ "lstrip": false,
638
+ "normalized": true,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": false
642
+ },
643
+ "50332": {
644
+ "content": "[unused47]",
645
+ "lstrip": false,
646
+ "normalized": true,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": false
650
+ },
651
+ "50333": {
652
+ "content": "[unused48]",
653
+ "lstrip": false,
654
+ "normalized": true,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": false
658
+ },
659
+ "50334": {
660
+ "content": "[unused49]",
661
+ "lstrip": false,
662
+ "normalized": true,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": false
666
+ },
667
+ "50335": {
668
+ "content": "[unused50]",
669
+ "lstrip": false,
670
+ "normalized": true,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": false
674
+ },
675
+ "50336": {
676
+ "content": "[unused51]",
677
+ "lstrip": false,
678
+ "normalized": true,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": false
682
+ },
683
+ "50337": {
684
+ "content": "[unused52]",
685
+ "lstrip": false,
686
+ "normalized": true,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": false
690
+ },
691
+ "50338": {
692
+ "content": "[unused53]",
693
+ "lstrip": false,
694
+ "normalized": true,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": false
698
+ },
699
+ "50339": {
700
+ "content": "[unused54]",
701
+ "lstrip": false,
702
+ "normalized": true,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": false
706
+ },
707
+ "50340": {
708
+ "content": "[unused55]",
709
+ "lstrip": false,
710
+ "normalized": true,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": false
714
+ },
715
+ "50341": {
716
+ "content": "[unused56]",
717
+ "lstrip": false,
718
+ "normalized": true,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": false
722
+ },
723
+ "50342": {
724
+ "content": "[unused57]",
725
+ "lstrip": false,
726
+ "normalized": true,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": false
730
+ },
731
+ "50343": {
732
+ "content": "[unused58]",
733
+ "lstrip": false,
734
+ "normalized": true,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": false
738
+ },
739
+ "50344": {
740
+ "content": "[unused59]",
741
+ "lstrip": false,
742
+ "normalized": true,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": false
746
+ },
747
+ "50345": {
748
+ "content": "[unused60]",
749
+ "lstrip": false,
750
+ "normalized": true,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": false
754
+ },
755
+ "50346": {
756
+ "content": "[unused61]",
757
+ "lstrip": false,
758
+ "normalized": true,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": false
762
+ },
763
+ "50347": {
764
+ "content": "[unused62]",
765
+ "lstrip": false,
766
+ "normalized": true,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": false
770
+ },
771
+ "50348": {
772
+ "content": "[unused63]",
773
+ "lstrip": false,
774
+ "normalized": true,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": false
778
+ },
779
+ "50349": {
780
+ "content": "[unused64]",
781
+ "lstrip": false,
782
+ "normalized": true,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": false
786
+ },
787
+ "50350": {
788
+ "content": "[unused65]",
789
+ "lstrip": false,
790
+ "normalized": true,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": false
794
+ },
795
+ "50351": {
796
+ "content": "[unused66]",
797
+ "lstrip": false,
798
+ "normalized": true,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": false
802
+ },
803
+ "50352": {
804
+ "content": "[unused67]",
805
+ "lstrip": false,
806
+ "normalized": true,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": false
810
+ },
811
+ "50353": {
812
+ "content": "[unused68]",
813
+ "lstrip": false,
814
+ "normalized": true,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": false
818
+ },
819
+ "50354": {
820
+ "content": "[unused69]",
821
+ "lstrip": false,
822
+ "normalized": true,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": false
826
+ },
827
+ "50355": {
828
+ "content": "[unused70]",
829
+ "lstrip": false,
830
+ "normalized": true,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": false
834
+ },
835
+ "50356": {
836
+ "content": "[unused71]",
837
+ "lstrip": false,
838
+ "normalized": true,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": false
842
+ },
843
+ "50357": {
844
+ "content": "[unused72]",
845
+ "lstrip": false,
846
+ "normalized": true,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": false
850
+ },
851
+ "50358": {
852
+ "content": "[unused73]",
853
+ "lstrip": false,
854
+ "normalized": true,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": false
858
+ },
859
+ "50359": {
860
+ "content": "[unused74]",
861
+ "lstrip": false,
862
+ "normalized": true,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": false
866
+ },
867
+ "50360": {
868
+ "content": "[unused75]",
869
+ "lstrip": false,
870
+ "normalized": true,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": false
874
+ },
875
+ "50361": {
876
+ "content": "[unused76]",
877
+ "lstrip": false,
878
+ "normalized": true,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": false
882
+ },
883
+ "50362": {
884
+ "content": "[unused77]",
885
+ "lstrip": false,
886
+ "normalized": true,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": false
890
+ },
891
+ "50363": {
892
+ "content": "[unused78]",
893
+ "lstrip": false,
894
+ "normalized": true,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": false
898
+ },
899
+ "50364": {
900
+ "content": "[unused79]",
901
+ "lstrip": false,
902
+ "normalized": true,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": false
906
+ },
907
+ "50365": {
908
+ "content": "[unused80]",
909
+ "lstrip": false,
910
+ "normalized": true,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": false
914
+ },
915
+ "50366": {
916
+ "content": "[unused81]",
917
+ "lstrip": false,
918
+ "normalized": true,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": false
922
+ },
923
+ "50367": {
924
+ "content": "[unused82]",
925
+ "lstrip": false,
926
+ "normalized": true,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": false
930
+ }
931
+ },
932
+ "clean_up_tokenization_spaces": true,
933
+ "cls_token": "[CLS]",
934
+ "extra_special_tokens": {},
935
+ "mask_token": "[MASK]",
936
+ "model_input_names": [
937
+ "input_ids",
938
+ "attention_mask"
939
+ ],
940
+ "model_max_length": 8192,
941
+ "pad_token": "[PAD]",
942
+ "sep_token": "[SEP]",
943
+ "tokenizer_class": "PreTrainedTokenizerFast",
944
+ "unk_token": "[UNK]"
945
+ }
checkpoint-30519/trainer_state.json ADDED
@@ -0,0 +1,2334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 30000,
3
+ "best_metric": 0.919165725910112,
4
+ "best_model_checkpoint": "./pii-detector-modernbert/checkpoint-30000",
5
+ "epoch": 3.0,
6
+ "eval_steps": 2000,
7
+ "global_step": 30519,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.00982994200334218,
14
+ "grad_norm": 2.1159424781799316,
15
+ "learning_rate": 1.6218872870249017e-06,
16
+ "loss": 1.968,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.01965988400668436,
21
+ "grad_norm": 0.7022971510887146,
22
+ "learning_rate": 3.2601572739187415e-06,
23
+ "loss": 0.2013,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.029489826010026542,
28
+ "grad_norm": 0.7267266511917114,
29
+ "learning_rate": 4.898427260812582e-06,
30
+ "loss": 0.1417,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.03931976801336872,
35
+ "grad_norm": 0.9718811511993408,
36
+ "learning_rate": 6.536697247706422e-06,
37
+ "loss": 0.1007,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.0491497100167109,
42
+ "grad_norm": 0.38591882586479187,
43
+ "learning_rate": 8.174967234600263e-06,
44
+ "loss": 0.0696,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.058979652020053085,
49
+ "grad_norm": 0.6200196743011475,
50
+ "learning_rate": 9.813237221494102e-06,
51
+ "loss": 0.0548,
52
+ "step": 600
53
+ },
54
+ {
55
+ "epoch": 0.06880959402339526,
56
+ "grad_norm": 0.46839049458503723,
57
+ "learning_rate": 1.1451507208387943e-05,
58
+ "loss": 0.0447,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 0.07863953602673744,
63
+ "grad_norm": 0.5770799517631531,
64
+ "learning_rate": 1.3089777195281782e-05,
65
+ "loss": 0.0415,
66
+ "step": 800
67
+ },
68
+ {
69
+ "epoch": 0.08846947803007962,
70
+ "grad_norm": 0.4535345137119293,
71
+ "learning_rate": 1.4728047182175622e-05,
72
+ "loss": 0.035,
73
+ "step": 900
74
+ },
75
+ {
76
+ "epoch": 0.0982994200334218,
77
+ "grad_norm": 0.5041764378547668,
78
+ "learning_rate": 1.6366317169069463e-05,
79
+ "loss": 0.0305,
80
+ "step": 1000
81
+ },
82
+ {
83
+ "epoch": 0.10812936203676399,
84
+ "grad_norm": 0.44773876667022705,
85
+ "learning_rate": 1.8004587155963304e-05,
86
+ "loss": 0.0289,
87
+ "step": 1100
88
+ },
89
+ {
90
+ "epoch": 0.11795930404010617,
91
+ "grad_norm": 0.235533207654953,
92
+ "learning_rate": 1.9642857142857145e-05,
93
+ "loss": 0.023,
94
+ "step": 1200
95
+ },
96
+ {
97
+ "epoch": 0.12778924604344835,
98
+ "grad_norm": 0.4770767092704773,
99
+ "learning_rate": 2.1281127129750983e-05,
100
+ "loss": 0.0243,
101
+ "step": 1300
102
+ },
103
+ {
104
+ "epoch": 0.13761918804679052,
105
+ "grad_norm": 0.3569801151752472,
106
+ "learning_rate": 2.2919397116644824e-05,
107
+ "loss": 0.0204,
108
+ "step": 1400
109
+ },
110
+ {
111
+ "epoch": 0.14744913005013272,
112
+ "grad_norm": 0.17573118209838867,
113
+ "learning_rate": 2.4557667103538665e-05,
114
+ "loss": 0.0193,
115
+ "step": 1500
116
+ },
117
+ {
118
+ "epoch": 0.15727907205347488,
119
+ "grad_norm": 0.4183891713619232,
120
+ "learning_rate": 2.6195937090432503e-05,
121
+ "loss": 0.0183,
122
+ "step": 1600
123
+ },
124
+ {
125
+ "epoch": 0.16710901405681705,
126
+ "grad_norm": 0.2571297287940979,
127
+ "learning_rate": 2.7834207077326347e-05,
128
+ "loss": 0.0161,
129
+ "step": 1700
130
+ },
131
+ {
132
+ "epoch": 0.17693895606015925,
133
+ "grad_norm": 0.20306524634361267,
134
+ "learning_rate": 2.9472477064220185e-05,
135
+ "loss": 0.0149,
136
+ "step": 1800
137
+ },
138
+ {
139
+ "epoch": 0.18676889806350142,
140
+ "grad_norm": 0.2766953110694885,
141
+ "learning_rate": 3.111074705111402e-05,
142
+ "loss": 0.0163,
143
+ "step": 1900
144
+ },
145
+ {
146
+ "epoch": 0.1965988400668436,
147
+ "grad_norm": 0.4682016372680664,
148
+ "learning_rate": 3.274901703800787e-05,
149
+ "loss": 0.0148,
150
+ "step": 2000
151
+ },
152
+ {
153
+ "epoch": 0.1965988400668436,
154
+ "eval_f1": 0.729076995389466,
155
+ "eval_loss": 0.0456073060631752,
156
+ "eval_precision": 0.6865494982046768,
157
+ "eval_recall": 0.7772210336673798,
158
+ "eval_runtime": 416.6538,
159
+ "eval_samples_per_second": 195.316,
160
+ "eval_steps_per_second": 3.053,
161
+ "step": 2000
162
+ },
163
+ {
164
+ "epoch": 0.20642878207018578,
165
+ "grad_norm": 0.26377245783805847,
166
+ "learning_rate": 3.4387287024901704e-05,
167
+ "loss": 0.015,
168
+ "step": 2100
169
+ },
170
+ {
171
+ "epoch": 0.21625872407352797,
172
+ "grad_norm": 0.20288439095020294,
173
+ "learning_rate": 3.602555701179555e-05,
174
+ "loss": 0.0138,
175
+ "step": 2200
176
+ },
177
+ {
178
+ "epoch": 0.22608866607687014,
179
+ "grad_norm": 0.3629586398601532,
180
+ "learning_rate": 3.7663826998689387e-05,
181
+ "loss": 0.0145,
182
+ "step": 2300
183
+ },
184
+ {
185
+ "epoch": 0.23591860808021234,
186
+ "grad_norm": 0.22589775919914246,
187
+ "learning_rate": 3.9302096985583224e-05,
188
+ "loss": 0.013,
189
+ "step": 2400
190
+ },
191
+ {
192
+ "epoch": 0.2457485500835545,
193
+ "grad_norm": 0.12344377487897873,
194
+ "learning_rate": 4.094036697247707e-05,
195
+ "loss": 0.0122,
196
+ "step": 2500
197
+ },
198
+ {
199
+ "epoch": 0.2555784920868967,
200
+ "grad_norm": 0.12900730967521667,
201
+ "learning_rate": 4.2578636959370906e-05,
202
+ "loss": 0.0129,
203
+ "step": 2600
204
+ },
205
+ {
206
+ "epoch": 0.26540843409023884,
207
+ "grad_norm": 0.15024515986442566,
208
+ "learning_rate": 4.4216906946264744e-05,
209
+ "loss": 0.0118,
210
+ "step": 2700
211
+ },
212
+ {
213
+ "epoch": 0.27523837609358104,
214
+ "grad_norm": 0.17616596817970276,
215
+ "learning_rate": 4.585517693315859e-05,
216
+ "loss": 0.0116,
217
+ "step": 2800
218
+ },
219
+ {
220
+ "epoch": 0.28506831809692323,
221
+ "grad_norm": 0.2715315520763397,
222
+ "learning_rate": 4.7493446920052426e-05,
223
+ "loss": 0.0128,
224
+ "step": 2900
225
+ },
226
+ {
227
+ "epoch": 0.29489826010026543,
228
+ "grad_norm": 0.14540570974349976,
229
+ "learning_rate": 4.913171690694627e-05,
230
+ "loss": 0.0114,
231
+ "step": 3000
232
+ },
233
+ {
234
+ "epoch": 0.30472820210360757,
235
+ "grad_norm": 0.2312837392091751,
236
+ "learning_rate": 4.991444278588852e-05,
237
+ "loss": 0.0127,
238
+ "step": 3100
239
+ },
240
+ {
241
+ "epoch": 0.31455814410694977,
242
+ "grad_norm": 0.08322272449731827,
243
+ "learning_rate": 4.973240616011942e-05,
244
+ "loss": 0.013,
245
+ "step": 3200
246
+ },
247
+ {
248
+ "epoch": 0.32438808611029196,
249
+ "grad_norm": 0.1441478580236435,
250
+ "learning_rate": 4.9550369534350316e-05,
251
+ "loss": 0.011,
252
+ "step": 3300
253
+ },
254
+ {
255
+ "epoch": 0.3342180281136341,
256
+ "grad_norm": 0.2058216631412506,
257
+ "learning_rate": 4.936833290858121e-05,
258
+ "loss": 0.0108,
259
+ "step": 3400
260
+ },
261
+ {
262
+ "epoch": 0.3440479701169763,
263
+ "grad_norm": 0.08180283010005951,
264
+ "learning_rate": 4.9186296282812107e-05,
265
+ "loss": 0.0112,
266
+ "step": 3500
267
+ },
268
+ {
269
+ "epoch": 0.3538779121203185,
270
+ "grad_norm": 0.1268218606710434,
271
+ "learning_rate": 4.9004259657043e-05,
272
+ "loss": 0.0114,
273
+ "step": 3600
274
+ },
275
+ {
276
+ "epoch": 0.3637078541236607,
277
+ "grad_norm": 0.08831259608268738,
278
+ "learning_rate": 4.88222230312739e-05,
279
+ "loss": 0.0106,
280
+ "step": 3700
281
+ },
282
+ {
283
+ "epoch": 0.37353779612700283,
284
+ "grad_norm": 0.12895138561725616,
285
+ "learning_rate": 4.864018640550479e-05,
286
+ "loss": 0.0105,
287
+ "step": 3800
288
+ },
289
+ {
290
+ "epoch": 0.383367738130345,
291
+ "grad_norm": 0.06203186884522438,
292
+ "learning_rate": 4.845814977973568e-05,
293
+ "loss": 0.0117,
294
+ "step": 3900
295
+ },
296
+ {
297
+ "epoch": 0.3931976801336872,
298
+ "grad_norm": 0.08485294133424759,
299
+ "learning_rate": 4.827611315396658e-05,
300
+ "loss": 0.0105,
301
+ "step": 4000
302
+ },
303
+ {
304
+ "epoch": 0.3931976801336872,
305
+ "eval_f1": 0.8286469466592665,
306
+ "eval_loss": 0.032480597496032715,
307
+ "eval_precision": 0.8092539524100847,
308
+ "eval_recall": 0.8489922306046617,
309
+ "eval_runtime": 368.0275,
310
+ "eval_samples_per_second": 221.122,
311
+ "eval_steps_per_second": 3.456,
312
+ "step": 4000
313
+ },
314
+ {
315
+ "epoch": 0.4030276221370294,
316
+ "grad_norm": 0.4025856554508209,
317
+ "learning_rate": 4.809407652819747e-05,
318
+ "loss": 0.0095,
319
+ "step": 4100
320
+ },
321
+ {
322
+ "epoch": 0.41285756414037156,
323
+ "grad_norm": 0.12447871267795563,
324
+ "learning_rate": 4.791203990242837e-05,
325
+ "loss": 0.0098,
326
+ "step": 4200
327
+ },
328
+ {
329
+ "epoch": 0.42268750614371375,
330
+ "grad_norm": 0.16537797451019287,
331
+ "learning_rate": 4.773000327665927e-05,
332
+ "loss": 0.0094,
333
+ "step": 4300
334
+ },
335
+ {
336
+ "epoch": 0.43251744814705595,
337
+ "grad_norm": 0.14634715020656586,
338
+ "learning_rate": 4.754796665089016e-05,
339
+ "loss": 0.0086,
340
+ "step": 4400
341
+ },
342
+ {
343
+ "epoch": 0.4423473901503981,
344
+ "grad_norm": 0.18309462070465088,
345
+ "learning_rate": 4.736593002512106e-05,
346
+ "loss": 0.0096,
347
+ "step": 4500
348
+ },
349
+ {
350
+ "epoch": 0.4521773321537403,
351
+ "grad_norm": 0.07428343594074249,
352
+ "learning_rate": 4.718389339935195e-05,
353
+ "loss": 0.0096,
354
+ "step": 4600
355
+ },
356
+ {
357
+ "epoch": 0.4620072741570825,
358
+ "grad_norm": 0.11093997955322266,
359
+ "learning_rate": 4.7001856773582845e-05,
360
+ "loss": 0.0097,
361
+ "step": 4700
362
+ },
363
+ {
364
+ "epoch": 0.4718372161604247,
365
+ "grad_norm": 0.035003211349248886,
366
+ "learning_rate": 4.6819820147813744e-05,
367
+ "loss": 0.0096,
368
+ "step": 4800
369
+ },
370
+ {
371
+ "epoch": 0.4816671581637668,
372
+ "grad_norm": 0.1413133144378662,
373
+ "learning_rate": 4.6637783522044636e-05,
374
+ "loss": 0.0086,
375
+ "step": 4900
376
+ },
377
+ {
378
+ "epoch": 0.491497100167109,
379
+ "grad_norm": 0.16881492733955383,
380
+ "learning_rate": 4.645574689627553e-05,
381
+ "loss": 0.0091,
382
+ "step": 5000
383
+ },
384
+ {
385
+ "epoch": 0.5013270421704512,
386
+ "grad_norm": 0.25650179386138916,
387
+ "learning_rate": 4.627371027050643e-05,
388
+ "loss": 0.0089,
389
+ "step": 5100
390
+ },
391
+ {
392
+ "epoch": 0.5111569841737934,
393
+ "grad_norm": 0.07248776406049728,
394
+ "learning_rate": 4.609167364473732e-05,
395
+ "loss": 0.0094,
396
+ "step": 5200
397
+ },
398
+ {
399
+ "epoch": 0.5209869261771356,
400
+ "grad_norm": 0.050960972905159,
401
+ "learning_rate": 4.590963701896822e-05,
402
+ "loss": 0.0083,
403
+ "step": 5300
404
+ },
405
+ {
406
+ "epoch": 0.5308168681804777,
407
+ "grad_norm": 0.10053224116563797,
408
+ "learning_rate": 4.572760039319912e-05,
409
+ "loss": 0.0083,
410
+ "step": 5400
411
+ },
412
+ {
413
+ "epoch": 0.5406468101838199,
414
+ "grad_norm": 0.13864193856716156,
415
+ "learning_rate": 4.554556376743001e-05,
416
+ "loss": 0.0088,
417
+ "step": 5500
418
+ },
419
+ {
420
+ "epoch": 0.5504767521871621,
421
+ "grad_norm": 0.10522215813398361,
422
+ "learning_rate": 4.536352714166091e-05,
423
+ "loss": 0.008,
424
+ "step": 5600
425
+ },
426
+ {
427
+ "epoch": 0.5603066941905043,
428
+ "grad_norm": 0.05200352147221565,
429
+ "learning_rate": 4.51814905158918e-05,
430
+ "loss": 0.0083,
431
+ "step": 5700
432
+ },
433
+ {
434
+ "epoch": 0.5701366361938465,
435
+ "grad_norm": 0.08757878094911575,
436
+ "learning_rate": 4.499945389012269e-05,
437
+ "loss": 0.008,
438
+ "step": 5800
439
+ },
440
+ {
441
+ "epoch": 0.5799665781971887,
442
+ "grad_norm": 0.08500000834465027,
443
+ "learning_rate": 4.481741726435359e-05,
444
+ "loss": 0.008,
445
+ "step": 5900
446
+ },
447
+ {
448
+ "epoch": 0.5897965202005309,
449
+ "grad_norm": 0.3124063313007355,
450
+ "learning_rate": 4.463538063858448e-05,
451
+ "loss": 0.0078,
452
+ "step": 6000
453
+ },
454
+ {
455
+ "epoch": 0.5897965202005309,
456
+ "eval_f1": 0.8566332444324409,
457
+ "eval_loss": 0.026570068672299385,
458
+ "eval_precision": 0.8561226325970579,
459
+ "eval_recall": 0.8571444657133206,
460
+ "eval_runtime": 368.8056,
461
+ "eval_samples_per_second": 220.656,
462
+ "eval_steps_per_second": 3.449,
463
+ "step": 6000
464
+ },
465
+ {
466
+ "epoch": 0.599626462203873,
467
+ "grad_norm": 0.05959346517920494,
468
+ "learning_rate": 4.445334401281538e-05,
469
+ "loss": 0.0083,
470
+ "step": 6100
471
+ },
472
+ {
473
+ "epoch": 0.6094564042072151,
474
+ "grad_norm": 0.07874953001737595,
475
+ "learning_rate": 4.4271307387046274e-05,
476
+ "loss": 0.0076,
477
+ "step": 6200
478
+ },
479
+ {
480
+ "epoch": 0.6192863462105573,
481
+ "grad_norm": 0.10230255872011185,
482
+ "learning_rate": 4.408927076127717e-05,
483
+ "loss": 0.008,
484
+ "step": 6300
485
+ },
486
+ {
487
+ "epoch": 0.6291162882138995,
488
+ "grad_norm": 0.08698707818984985,
489
+ "learning_rate": 4.390723413550807e-05,
490
+ "loss": 0.0076,
491
+ "step": 6400
492
+ },
493
+ {
494
+ "epoch": 0.6389462302172417,
495
+ "grad_norm": 0.050512004643678665,
496
+ "learning_rate": 4.3725197509738964e-05,
497
+ "loss": 0.0087,
498
+ "step": 6500
499
+ },
500
+ {
501
+ "epoch": 0.6487761722205839,
502
+ "grad_norm": 0.1262696236371994,
503
+ "learning_rate": 4.3543160883969856e-05,
504
+ "loss": 0.0091,
505
+ "step": 6600
506
+ },
507
+ {
508
+ "epoch": 0.6586061142239261,
509
+ "grad_norm": 0.10209453850984573,
510
+ "learning_rate": 4.3361124258200755e-05,
511
+ "loss": 0.0072,
512
+ "step": 6700
513
+ },
514
+ {
515
+ "epoch": 0.6684360562272682,
516
+ "grad_norm": 0.13946746289730072,
517
+ "learning_rate": 4.317908763243165e-05,
518
+ "loss": 0.0077,
519
+ "step": 6800
520
+ },
521
+ {
522
+ "epoch": 0.6782659982306104,
523
+ "grad_norm": 0.15709851682186127,
524
+ "learning_rate": 4.299705100666254e-05,
525
+ "loss": 0.0077,
526
+ "step": 6900
527
+ },
528
+ {
529
+ "epoch": 0.6880959402339526,
530
+ "grad_norm": 0.072502002120018,
531
+ "learning_rate": 4.281501438089344e-05,
532
+ "loss": 0.0079,
533
+ "step": 7000
534
+ },
535
+ {
536
+ "epoch": 0.6979258822372948,
537
+ "grad_norm": 0.17354685068130493,
538
+ "learning_rate": 4.263297775512433e-05,
539
+ "loss": 0.0082,
540
+ "step": 7100
541
+ },
542
+ {
543
+ "epoch": 0.707755824240637,
544
+ "grad_norm": 0.14260995388031006,
545
+ "learning_rate": 4.245094112935523e-05,
546
+ "loss": 0.0077,
547
+ "step": 7200
548
+ },
549
+ {
550
+ "epoch": 0.7175857662439792,
551
+ "grad_norm": 0.04445793479681015,
552
+ "learning_rate": 4.226890450358612e-05,
553
+ "loss": 0.0068,
554
+ "step": 7300
555
+ },
556
+ {
557
+ "epoch": 0.7274157082473214,
558
+ "grad_norm": 0.1454411894083023,
559
+ "learning_rate": 4.208686787781702e-05,
560
+ "loss": 0.0072,
561
+ "step": 7400
562
+ },
563
+ {
564
+ "epoch": 0.7372456502506636,
565
+ "grad_norm": 0.0707961767911911,
566
+ "learning_rate": 4.190483125204792e-05,
567
+ "loss": 0.0073,
568
+ "step": 7500
569
+ },
570
+ {
571
+ "epoch": 0.7470755922540057,
572
+ "grad_norm": 0.1078498363494873,
573
+ "learning_rate": 4.172279462627881e-05,
574
+ "loss": 0.0079,
575
+ "step": 7600
576
+ },
577
+ {
578
+ "epoch": 0.7569055342573479,
579
+ "grad_norm": 0.044271912425756454,
580
+ "learning_rate": 4.15407580005097e-05,
581
+ "loss": 0.0079,
582
+ "step": 7700
583
+ },
584
+ {
585
+ "epoch": 0.76673547626069,
586
+ "grad_norm": 0.04950639605522156,
587
+ "learning_rate": 4.13587213747406e-05,
588
+ "loss": 0.0067,
589
+ "step": 7800
590
+ },
591
+ {
592
+ "epoch": 0.7765654182640322,
593
+ "grad_norm": 0.11214105784893036,
594
+ "learning_rate": 4.1176684748971494e-05,
595
+ "loss": 0.0077,
596
+ "step": 7900
597
+ },
598
+ {
599
+ "epoch": 0.7863953602673744,
600
+ "grad_norm": 0.08136852085590363,
601
+ "learning_rate": 4.099464812320239e-05,
602
+ "loss": 0.0071,
603
+ "step": 8000
604
+ },
605
+ {
606
+ "epoch": 0.7863953602673744,
607
+ "eval_f1": 0.8711044849186127,
608
+ "eval_loss": 0.023490285500884056,
609
+ "eval_precision": 0.8608655495447948,
610
+ "eval_recall": 0.8815899110460533,
611
+ "eval_runtime": 368.1099,
612
+ "eval_samples_per_second": 221.073,
613
+ "eval_steps_per_second": 3.455,
614
+ "step": 8000
615
+ },
616
+ {
617
+ "epoch": 0.7962253022707166,
618
+ "grad_norm": 0.05617125704884529,
619
+ "learning_rate": 4.0812611497433284e-05,
620
+ "loss": 0.0076,
621
+ "step": 8100
622
+ },
623
+ {
624
+ "epoch": 0.8060552442740588,
625
+ "grad_norm": 0.09260338544845581,
626
+ "learning_rate": 4.0630574871664177e-05,
627
+ "loss": 0.0075,
628
+ "step": 8200
629
+ },
630
+ {
631
+ "epoch": 0.8158851862774009,
632
+ "grad_norm": 0.04998739808797836,
633
+ "learning_rate": 4.0448538245895075e-05,
634
+ "loss": 0.0073,
635
+ "step": 8300
636
+ },
637
+ {
638
+ "epoch": 0.8257151282807431,
639
+ "grad_norm": 0.09871978312730789,
640
+ "learning_rate": 4.0266501620125974e-05,
641
+ "loss": 0.0073,
642
+ "step": 8400
643
+ },
644
+ {
645
+ "epoch": 0.8355450702840853,
646
+ "grad_norm": 0.23389624059200287,
647
+ "learning_rate": 4.0084464994356866e-05,
648
+ "loss": 0.0074,
649
+ "step": 8500
650
+ },
651
+ {
652
+ "epoch": 0.8453750122874275,
653
+ "grad_norm": 0.08000567555427551,
654
+ "learning_rate": 3.9902428368587765e-05,
655
+ "loss": 0.0079,
656
+ "step": 8600
657
+ },
658
+ {
659
+ "epoch": 0.8552049542907697,
660
+ "grad_norm": 0.038756221532821655,
661
+ "learning_rate": 3.972039174281866e-05,
662
+ "loss": 0.0086,
663
+ "step": 8700
664
+ },
665
+ {
666
+ "epoch": 0.8650348962941119,
667
+ "grad_norm": 0.0853879302740097,
668
+ "learning_rate": 3.953835511704955e-05,
669
+ "loss": 0.0067,
670
+ "step": 8800
671
+ },
672
+ {
673
+ "epoch": 0.8748648382974541,
674
+ "grad_norm": 0.1053905338048935,
675
+ "learning_rate": 3.935631849128045e-05,
676
+ "loss": 0.0067,
677
+ "step": 8900
678
+ },
679
+ {
680
+ "epoch": 0.8846947803007962,
681
+ "grad_norm": 0.09345783293247223,
682
+ "learning_rate": 3.917428186551134e-05,
683
+ "loss": 0.0073,
684
+ "step": 9000
685
+ },
686
+ {
687
+ "epoch": 0.8945247223041384,
688
+ "grad_norm": 0.029876919463276863,
689
+ "learning_rate": 3.899224523974224e-05,
690
+ "loss": 0.0068,
691
+ "step": 9100
692
+ },
693
+ {
694
+ "epoch": 0.9043546643074806,
695
+ "grad_norm": 0.09708785265684128,
696
+ "learning_rate": 3.881020861397313e-05,
697
+ "loss": 0.007,
698
+ "step": 9200
699
+ },
700
+ {
701
+ "epoch": 0.9141846063108228,
702
+ "grad_norm": 0.04830634221434593,
703
+ "learning_rate": 3.862817198820402e-05,
704
+ "loss": 0.0073,
705
+ "step": 9300
706
+ },
707
+ {
708
+ "epoch": 0.924014548314165,
709
+ "grad_norm": 0.09089767932891846,
710
+ "learning_rate": 3.844613536243493e-05,
711
+ "loss": 0.0067,
712
+ "step": 9400
713
+ },
714
+ {
715
+ "epoch": 0.9338444903175072,
716
+ "grad_norm": 0.053389597684144974,
717
+ "learning_rate": 3.826409873666582e-05,
718
+ "loss": 0.0068,
719
+ "step": 9500
720
+ },
721
+ {
722
+ "epoch": 0.9436744323208494,
723
+ "grad_norm": 0.050620563328266144,
724
+ "learning_rate": 3.808206211089671e-05,
725
+ "loss": 0.0061,
726
+ "step": 9600
727
+ },
728
+ {
729
+ "epoch": 0.9535043743241914,
730
+ "grad_norm": 0.08616846799850464,
731
+ "learning_rate": 3.790002548512761e-05,
732
+ "loss": 0.0069,
733
+ "step": 9700
734
+ },
735
+ {
736
+ "epoch": 0.9633343163275336,
737
+ "grad_norm": 0.07850134372711182,
738
+ "learning_rate": 3.7717988859358504e-05,
739
+ "loss": 0.0072,
740
+ "step": 9800
741
+ },
742
+ {
743
+ "epoch": 0.9731642583308758,
744
+ "grad_norm": 0.19498451054096222,
745
+ "learning_rate": 3.75359522335894e-05,
746
+ "loss": 0.0062,
747
+ "step": 9900
748
+ },
749
+ {
750
+ "epoch": 0.982994200334218,
751
+ "grad_norm": 0.05255872756242752,
752
+ "learning_rate": 3.7353915607820295e-05,
753
+ "loss": 0.0059,
754
+ "step": 10000
755
+ },
756
+ {
757
+ "epoch": 0.982994200334218,
758
+ "eval_f1": 0.8865828197851773,
759
+ "eval_loss": 0.02146231383085251,
760
+ "eval_precision": 0.881456666184376,
761
+ "eval_recall": 0.891768944938633,
762
+ "eval_runtime": 368.4725,
763
+ "eval_samples_per_second": 220.855,
764
+ "eval_steps_per_second": 3.452,
765
+ "step": 10000
766
+ },
767
+ {
768
+ "epoch": 0.9928241423375602,
769
+ "grad_norm": 0.13814912736415863,
770
+ "learning_rate": 3.717187898205119e-05,
771
+ "loss": 0.0068,
772
+ "step": 10100
773
+ },
774
+ {
775
+ "epoch": 1.0026540843409024,
776
+ "grad_norm": 0.06895862519741058,
777
+ "learning_rate": 3.6989842356282086e-05,
778
+ "loss": 0.006,
779
+ "step": 10200
780
+ },
781
+ {
782
+ "epoch": 1.0124840263442445,
783
+ "grad_norm": 0.048868328332901,
784
+ "learning_rate": 3.680780573051298e-05,
785
+ "loss": 0.0045,
786
+ "step": 10300
787
+ },
788
+ {
789
+ "epoch": 1.0223139683475868,
790
+ "grad_norm": 0.030351588502526283,
791
+ "learning_rate": 3.662576910474388e-05,
792
+ "loss": 0.0055,
793
+ "step": 10400
794
+ },
795
+ {
796
+ "epoch": 1.032143910350929,
797
+ "grad_norm": 0.02089417539536953,
798
+ "learning_rate": 3.6443732478974776e-05,
799
+ "loss": 0.005,
800
+ "step": 10500
801
+ },
802
+ {
803
+ "epoch": 1.0419738523542712,
804
+ "grad_norm": 0.04806596413254738,
805
+ "learning_rate": 3.626169585320567e-05,
806
+ "loss": 0.0047,
807
+ "step": 10600
808
+ },
809
+ {
810
+ "epoch": 1.0518037943576133,
811
+ "grad_norm": 0.12568242847919464,
812
+ "learning_rate": 3.607965922743656e-05,
813
+ "loss": 0.005,
814
+ "step": 10700
815
+ },
816
+ {
817
+ "epoch": 1.0616337363609554,
818
+ "grad_norm": 0.06524453312158585,
819
+ "learning_rate": 3.589762260166746e-05,
820
+ "loss": 0.0057,
821
+ "step": 10800
822
+ },
823
+ {
824
+ "epoch": 1.0714636783642977,
825
+ "grad_norm": 0.06146615743637085,
826
+ "learning_rate": 3.571558597589835e-05,
827
+ "loss": 0.0048,
828
+ "step": 10900
829
+ },
830
+ {
831
+ "epoch": 1.0812936203676398,
832
+ "grad_norm": 0.044653356075286865,
833
+ "learning_rate": 3.553354935012925e-05,
834
+ "loss": 0.005,
835
+ "step": 11000
836
+ },
837
+ {
838
+ "epoch": 1.091123562370982,
839
+ "grad_norm": 0.03471142798662186,
840
+ "learning_rate": 3.535151272436014e-05,
841
+ "loss": 0.0051,
842
+ "step": 11100
843
+ },
844
+ {
845
+ "epoch": 1.1009535043743242,
846
+ "grad_norm": 0.026662476360797882,
847
+ "learning_rate": 3.5169476098591034e-05,
848
+ "loss": 0.0051,
849
+ "step": 11200
850
+ },
851
+ {
852
+ "epoch": 1.1107834463776665,
853
+ "grad_norm": 0.08286290615797043,
854
+ "learning_rate": 3.498743947282193e-05,
855
+ "loss": 0.0052,
856
+ "step": 11300
857
+ },
858
+ {
859
+ "epoch": 1.1206133883810085,
860
+ "grad_norm": 0.04743447154760361,
861
+ "learning_rate": 3.4805402847052825e-05,
862
+ "loss": 0.0051,
863
+ "step": 11400
864
+ },
865
+ {
866
+ "epoch": 1.1304433303843506,
867
+ "grad_norm": 0.04862457141280174,
868
+ "learning_rate": 3.4623366221283724e-05,
869
+ "loss": 0.005,
870
+ "step": 11500
871
+ },
872
+ {
873
+ "epoch": 1.140273272387693,
874
+ "grad_norm": 0.10798755288124084,
875
+ "learning_rate": 3.444132959551462e-05,
876
+ "loss": 0.0055,
877
+ "step": 11600
878
+ },
879
+ {
880
+ "epoch": 1.150103214391035,
881
+ "grad_norm": 0.06220352649688721,
882
+ "learning_rate": 3.4259292969745515e-05,
883
+ "loss": 0.0055,
884
+ "step": 11700
885
+ },
886
+ {
887
+ "epoch": 1.1599331563943773,
888
+ "grad_norm": 0.14796103537082672,
889
+ "learning_rate": 3.407725634397641e-05,
890
+ "loss": 0.0049,
891
+ "step": 11800
892
+ },
893
+ {
894
+ "epoch": 1.1697630983977194,
895
+ "grad_norm": 0.05921417847275734,
896
+ "learning_rate": 3.3895219718207305e-05,
897
+ "loss": 0.0047,
898
+ "step": 11900
899
+ },
900
+ {
901
+ "epoch": 1.1795930404010617,
902
+ "grad_norm": 0.12081274390220642,
903
+ "learning_rate": 3.37131830924382e-05,
904
+ "loss": 0.0053,
905
+ "step": 12000
906
+ },
907
+ {
908
+ "epoch": 1.1795930404010617,
909
+ "eval_f1": 0.8960633085174066,
910
+ "eval_loss": 0.020371899008750916,
911
+ "eval_precision": 0.8882999911480924,
912
+ "eval_recall": 0.9039635176218894,
913
+ "eval_runtime": 367.9724,
914
+ "eval_samples_per_second": 221.155,
915
+ "eval_steps_per_second": 3.457,
916
+ "step": 12000
917
+ },
918
+ {
919
+ "epoch": 1.1894229824044038,
920
+ "grad_norm": 0.041594497859478,
921
+ "learning_rate": 3.3531146466669096e-05,
922
+ "loss": 0.0057,
923
+ "step": 12100
924
+ },
925
+ {
926
+ "epoch": 1.199252924407746,
927
+ "grad_norm": 0.066756471991539,
928
+ "learning_rate": 3.334910984089999e-05,
929
+ "loss": 0.005,
930
+ "step": 12200
931
+ },
932
+ {
933
+ "epoch": 1.2090828664110882,
934
+ "grad_norm": 0.10321182757616043,
935
+ "learning_rate": 3.316707321513088e-05,
936
+ "loss": 0.0051,
937
+ "step": 12300
938
+ },
939
+ {
940
+ "epoch": 1.2189128084144303,
941
+ "grad_norm": 0.1184532642364502,
942
+ "learning_rate": 3.298503658936178e-05,
943
+ "loss": 0.005,
944
+ "step": 12400
945
+ },
946
+ {
947
+ "epoch": 1.2287427504177726,
948
+ "grad_norm": 0.10429126024246216,
949
+ "learning_rate": 3.280299996359268e-05,
950
+ "loss": 0.0053,
951
+ "step": 12500
952
+ },
953
+ {
954
+ "epoch": 1.2385726924211147,
955
+ "grad_norm": 0.1311911642551422,
956
+ "learning_rate": 3.262096333782357e-05,
957
+ "loss": 0.0052,
958
+ "step": 12600
959
+ },
960
+ {
961
+ "epoch": 1.248402634424457,
962
+ "grad_norm": 0.12976108491420746,
963
+ "learning_rate": 3.243892671205447e-05,
964
+ "loss": 0.005,
965
+ "step": 12700
966
+ },
967
+ {
968
+ "epoch": 1.258232576427799,
969
+ "grad_norm": 0.06385162472724915,
970
+ "learning_rate": 3.225689008628536e-05,
971
+ "loss": 0.0045,
972
+ "step": 12800
973
+ },
974
+ {
975
+ "epoch": 1.2680625184311412,
976
+ "grad_norm": 0.10582277178764343,
977
+ "learning_rate": 3.207485346051626e-05,
978
+ "loss": 0.005,
979
+ "step": 12900
980
+ },
981
+ {
982
+ "epoch": 1.2778924604344835,
983
+ "grad_norm": 0.10751399397850037,
984
+ "learning_rate": 3.189281683474715e-05,
985
+ "loss": 0.0048,
986
+ "step": 13000
987
+ },
988
+ {
989
+ "epoch": 1.2877224024378255,
990
+ "grad_norm": 0.0692177563905716,
991
+ "learning_rate": 3.1710780208978044e-05,
992
+ "loss": 0.0045,
993
+ "step": 13100
994
+ },
995
+ {
996
+ "epoch": 1.2975523444411678,
997
+ "grad_norm": 0.1047593429684639,
998
+ "learning_rate": 3.152874358320894e-05,
999
+ "loss": 0.0048,
1000
+ "step": 13200
1001
+ },
1002
+ {
1003
+ "epoch": 1.30738228644451,
1004
+ "grad_norm": 0.04881567507982254,
1005
+ "learning_rate": 3.1346706957439835e-05,
1006
+ "loss": 0.0045,
1007
+ "step": 13300
1008
+ },
1009
+ {
1010
+ "epoch": 1.3172122284478522,
1011
+ "grad_norm": 0.20649947226047516,
1012
+ "learning_rate": 3.1164670331670734e-05,
1013
+ "loss": 0.0048,
1014
+ "step": 13400
1015
+ },
1016
+ {
1017
+ "epoch": 1.3270421704511943,
1018
+ "grad_norm": 0.07260572165250778,
1019
+ "learning_rate": 3.098263370590163e-05,
1020
+ "loss": 0.0046,
1021
+ "step": 13500
1022
+ },
1023
+ {
1024
+ "epoch": 1.3368721124545364,
1025
+ "grad_norm": 0.14177989959716797,
1026
+ "learning_rate": 3.0800597080132525e-05,
1027
+ "loss": 0.0048,
1028
+ "step": 13600
1029
+ },
1030
+ {
1031
+ "epoch": 1.3467020544578787,
1032
+ "grad_norm": 0.04176017642021179,
1033
+ "learning_rate": 3.0618560454363424e-05,
1034
+ "loss": 0.0048,
1035
+ "step": 13700
1036
+ },
1037
+ {
1038
+ "epoch": 1.3565319964612208,
1039
+ "grad_norm": 0.09111765027046204,
1040
+ "learning_rate": 3.0436523828594316e-05,
1041
+ "loss": 0.0049,
1042
+ "step": 13800
1043
+ },
1044
+ {
1045
+ "epoch": 1.366361938464563,
1046
+ "grad_norm": 0.039208538830280304,
1047
+ "learning_rate": 3.025448720282521e-05,
1048
+ "loss": 0.005,
1049
+ "step": 13900
1050
+ },
1051
+ {
1052
+ "epoch": 1.3761918804679052,
1053
+ "grad_norm": 0.027969840914011,
1054
+ "learning_rate": 3.0072450577056103e-05,
1055
+ "loss": 0.0049,
1056
+ "step": 14000
1057
+ },
1058
+ {
1059
+ "epoch": 1.3761918804679052,
1060
+ "eval_f1": 0.8982576922219995,
1061
+ "eval_loss": 0.01912725158035755,
1062
+ "eval_precision": 0.8887921342122087,
1063
+ "eval_recall": 0.9079270352437788,
1064
+ "eval_runtime": 368.6528,
1065
+ "eval_samples_per_second": 220.747,
1066
+ "eval_steps_per_second": 3.45,
1067
+ "step": 14000
1068
+ },
1069
+ {
1070
+ "epoch": 1.3860218224712475,
1071
+ "grad_norm": 0.05266612395644188,
1072
+ "learning_rate": 2.9890413951287e-05,
1073
+ "loss": 0.0048,
1074
+ "step": 14100
1075
+ },
1076
+ {
1077
+ "epoch": 1.3958517644745896,
1078
+ "grad_norm": 0.04086877778172493,
1079
+ "learning_rate": 2.9708377325517894e-05,
1080
+ "loss": 0.0044,
1081
+ "step": 14200
1082
+ },
1083
+ {
1084
+ "epoch": 1.4056817064779317,
1085
+ "grad_norm": 0.023200375959277153,
1086
+ "learning_rate": 2.952634069974879e-05,
1087
+ "loss": 0.0057,
1088
+ "step": 14300
1089
+ },
1090
+ {
1091
+ "epoch": 1.415511648481274,
1092
+ "grad_norm": 0.08787036687135696,
1093
+ "learning_rate": 2.9344304073979685e-05,
1094
+ "loss": 0.0051,
1095
+ "step": 14400
1096
+ },
1097
+ {
1098
+ "epoch": 1.425341590484616,
1099
+ "grad_norm": 0.05955840274691582,
1100
+ "learning_rate": 2.9162267448210584e-05,
1101
+ "loss": 0.0046,
1102
+ "step": 14500
1103
+ },
1104
+ {
1105
+ "epoch": 1.4351715324879584,
1106
+ "grad_norm": 0.08939366787672043,
1107
+ "learning_rate": 2.898023082244148e-05,
1108
+ "loss": 0.0055,
1109
+ "step": 14600
1110
+ },
1111
+ {
1112
+ "epoch": 1.4450014744913005,
1113
+ "grad_norm": 0.06770022213459015,
1114
+ "learning_rate": 2.8798194196672372e-05,
1115
+ "loss": 0.005,
1116
+ "step": 14700
1117
+ },
1118
+ {
1119
+ "epoch": 1.4548314164946428,
1120
+ "grad_norm": 0.10894829034805298,
1121
+ "learning_rate": 2.8616157570903267e-05,
1122
+ "loss": 0.0048,
1123
+ "step": 14800
1124
+ },
1125
+ {
1126
+ "epoch": 1.4646613584979848,
1127
+ "grad_norm": 0.045476239174604416,
1128
+ "learning_rate": 2.8434120945134163e-05,
1129
+ "loss": 0.0047,
1130
+ "step": 14900
1131
+ },
1132
+ {
1133
+ "epoch": 1.474491300501327,
1134
+ "grad_norm": 0.03545854985713959,
1135
+ "learning_rate": 2.8252084319365058e-05,
1136
+ "loss": 0.004,
1137
+ "step": 15000
1138
+ },
1139
+ {
1140
+ "epoch": 1.4843212425046692,
1141
+ "grad_norm": 0.028701910749077797,
1142
+ "learning_rate": 2.8070047693595954e-05,
1143
+ "loss": 0.0046,
1144
+ "step": 15100
1145
+ },
1146
+ {
1147
+ "epoch": 1.4941511845080113,
1148
+ "grad_norm": 0.13125169277191162,
1149
+ "learning_rate": 2.7888011067826846e-05,
1150
+ "loss": 0.0047,
1151
+ "step": 15200
1152
+ },
1153
+ {
1154
+ "epoch": 1.5039811265113536,
1155
+ "grad_norm": 0.13967622816562653,
1156
+ "learning_rate": 2.770597444205774e-05,
1157
+ "loss": 0.0045,
1158
+ "step": 15300
1159
+ },
1160
+ {
1161
+ "epoch": 1.5138110685146957,
1162
+ "grad_norm": 0.051971685141325,
1163
+ "learning_rate": 2.7523937816288637e-05,
1164
+ "loss": 0.0046,
1165
+ "step": 15400
1166
+ },
1167
+ {
1168
+ "epoch": 1.523641010518038,
1169
+ "grad_norm": 0.08570306748151779,
1170
+ "learning_rate": 2.7341901190519535e-05,
1171
+ "loss": 0.0048,
1172
+ "step": 15500
1173
+ },
1174
+ {
1175
+ "epoch": 1.53347095252138,
1176
+ "grad_norm": 0.08714251965284348,
1177
+ "learning_rate": 2.715986456475043e-05,
1178
+ "loss": 0.0043,
1179
+ "step": 15600
1180
+ },
1181
+ {
1182
+ "epoch": 1.5433008945247222,
1183
+ "grad_norm": 0.031157121062278748,
1184
+ "learning_rate": 2.6977827938981326e-05,
1185
+ "loss": 0.0044,
1186
+ "step": 15700
1187
+ },
1188
+ {
1189
+ "epoch": 1.5531308365280645,
1190
+ "grad_norm": 0.0643945187330246,
1191
+ "learning_rate": 2.6795791313212222e-05,
1192
+ "loss": 0.0039,
1193
+ "step": 15800
1194
+ },
1195
+ {
1196
+ "epoch": 1.5629607785314068,
1197
+ "grad_norm": 0.10222964733839035,
1198
+ "learning_rate": 2.6613754687443114e-05,
1199
+ "loss": 0.0048,
1200
+ "step": 15900
1201
+ },
1202
+ {
1203
+ "epoch": 1.5727907205347489,
1204
+ "grad_norm": 0.05405284836888313,
1205
+ "learning_rate": 2.643171806167401e-05,
1206
+ "loss": 0.0041,
1207
+ "step": 16000
1208
+ },
1209
+ {
1210
+ "epoch": 1.5727907205347489,
1211
+ "eval_f1": 0.9061435464890399,
1212
+ "eval_loss": 0.019078785553574562,
1213
+ "eval_precision": 0.9062404973477042,
1214
+ "eval_recall": 0.9060466163720302,
1215
+ "eval_runtime": 368.5231,
1216
+ "eval_samples_per_second": 220.825,
1217
+ "eval_steps_per_second": 3.452,
1218
+ "step": 16000
1219
+ },
1220
+ {
1221
+ "epoch": 1.582620662538091,
1222
+ "grad_norm": 0.03582916781306267,
1223
+ "learning_rate": 2.6249681435904905e-05,
1224
+ "loss": 0.0042,
1225
+ "step": 16100
1226
+ },
1227
+ {
1228
+ "epoch": 1.5924506045414333,
1229
+ "grad_norm": 0.08402363955974579,
1230
+ "learning_rate": 2.60676448101358e-05,
1231
+ "loss": 0.0048,
1232
+ "step": 16200
1233
+ },
1234
+ {
1235
+ "epoch": 1.6022805465447754,
1236
+ "grad_norm": 0.08818399906158447,
1237
+ "learning_rate": 2.5885608184366696e-05,
1238
+ "loss": 0.0046,
1239
+ "step": 16300
1240
+ },
1241
+ {
1242
+ "epoch": 1.6121104885481174,
1243
+ "grad_norm": 0.04633729159832001,
1244
+ "learning_rate": 2.5703571558597588e-05,
1245
+ "loss": 0.0046,
1246
+ "step": 16400
1247
+ },
1248
+ {
1249
+ "epoch": 1.6219404305514598,
1250
+ "grad_norm": 0.05240938439965248,
1251
+ "learning_rate": 2.5521534932828483e-05,
1252
+ "loss": 0.0049,
1253
+ "step": 16500
1254
+ },
1255
+ {
1256
+ "epoch": 1.631770372554802,
1257
+ "grad_norm": 0.044343069195747375,
1258
+ "learning_rate": 2.5339498307059382e-05,
1259
+ "loss": 0.0046,
1260
+ "step": 16600
1261
+ },
1262
+ {
1263
+ "epoch": 1.6416003145581441,
1264
+ "grad_norm": 0.05871371924877167,
1265
+ "learning_rate": 2.5157461681290278e-05,
1266
+ "loss": 0.0046,
1267
+ "step": 16700
1268
+ },
1269
+ {
1270
+ "epoch": 1.6514302565614862,
1271
+ "grad_norm": 0.04396549612283707,
1272
+ "learning_rate": 2.4975425055521173e-05,
1273
+ "loss": 0.004,
1274
+ "step": 16800
1275
+ },
1276
+ {
1277
+ "epoch": 1.6612601985648285,
1278
+ "grad_norm": 0.09425197541713715,
1279
+ "learning_rate": 2.479338842975207e-05,
1280
+ "loss": 0.0039,
1281
+ "step": 16900
1282
+ },
1283
+ {
1284
+ "epoch": 1.6710901405681706,
1285
+ "grad_norm": 0.039449796080589294,
1286
+ "learning_rate": 2.4611351803982964e-05,
1287
+ "loss": 0.0044,
1288
+ "step": 17000
1289
+ },
1290
+ {
1291
+ "epoch": 1.6809200825715127,
1292
+ "grad_norm": 0.10242141038179398,
1293
+ "learning_rate": 2.4429315178213856e-05,
1294
+ "loss": 0.0045,
1295
+ "step": 17100
1296
+ },
1297
+ {
1298
+ "epoch": 1.690750024574855,
1299
+ "grad_norm": 0.07649975270032883,
1300
+ "learning_rate": 2.424727855244475e-05,
1301
+ "loss": 0.0041,
1302
+ "step": 17200
1303
+ },
1304
+ {
1305
+ "epoch": 1.7005799665781973,
1306
+ "grad_norm": 0.045152563601732254,
1307
+ "learning_rate": 2.406524192667565e-05,
1308
+ "loss": 0.0044,
1309
+ "step": 17300
1310
+ },
1311
+ {
1312
+ "epoch": 1.7104099085815394,
1313
+ "grad_norm": 0.12754422426223755,
1314
+ "learning_rate": 2.3883205300906546e-05,
1315
+ "loss": 0.0043,
1316
+ "step": 17400
1317
+ },
1318
+ {
1319
+ "epoch": 1.7202398505848815,
1320
+ "grad_norm": 0.055379465222358704,
1321
+ "learning_rate": 2.3701168675137438e-05,
1322
+ "loss": 0.0045,
1323
+ "step": 17500
1324
+ },
1325
+ {
1326
+ "epoch": 1.7300697925882238,
1327
+ "grad_norm": 0.040617331862449646,
1328
+ "learning_rate": 2.3519132049368333e-05,
1329
+ "loss": 0.0044,
1330
+ "step": 17600
1331
+ },
1332
+ {
1333
+ "epoch": 1.7398997345915659,
1334
+ "grad_norm": 0.09353236109018326,
1335
+ "learning_rate": 2.333709542359923e-05,
1336
+ "loss": 0.0048,
1337
+ "step": 17700
1338
+ },
1339
+ {
1340
+ "epoch": 1.749729676594908,
1341
+ "grad_norm": 0.07118421792984009,
1342
+ "learning_rate": 2.3155058797830124e-05,
1343
+ "loss": 0.0041,
1344
+ "step": 17800
1345
+ },
1346
+ {
1347
+ "epoch": 1.7595596185982503,
1348
+ "grad_norm": 0.08269080519676208,
1349
+ "learning_rate": 2.297302217206102e-05,
1350
+ "loss": 0.0049,
1351
+ "step": 17900
1352
+ },
1353
+ {
1354
+ "epoch": 1.7693895606015926,
1355
+ "grad_norm": 0.07028749585151672,
1356
+ "learning_rate": 2.2790985546291915e-05,
1357
+ "loss": 0.0045,
1358
+ "step": 18000
1359
+ },
1360
+ {
1361
+ "epoch": 1.7693895606015926,
1362
+ "eval_f1": 0.9050546716060729,
1363
+ "eval_loss": 0.018239887431263924,
1364
+ "eval_precision": 0.9009696604514668,
1365
+ "eval_recall": 0.9091768944938633,
1366
+ "eval_runtime": 368.5862,
1367
+ "eval_samples_per_second": 220.787,
1368
+ "eval_steps_per_second": 3.451,
1369
+ "step": 18000
1370
+ },
1371
+ {
1372
+ "epoch": 1.7792195026049347,
1373
+ "grad_norm": 0.02531488798558712,
1374
+ "learning_rate": 2.260894892052281e-05,
1375
+ "loss": 0.0049,
1376
+ "step": 18100
1377
+ },
1378
+ {
1379
+ "epoch": 1.7890494446082768,
1380
+ "grad_norm": 0.06841567903757095,
1381
+ "learning_rate": 2.2426912294753706e-05,
1382
+ "loss": 0.004,
1383
+ "step": 18200
1384
+ },
1385
+ {
1386
+ "epoch": 1.798879386611619,
1387
+ "grad_norm": 0.438967764377594,
1388
+ "learning_rate": 2.22448756689846e-05,
1389
+ "loss": 0.0045,
1390
+ "step": 18300
1391
+ },
1392
+ {
1393
+ "epoch": 1.8087093286149611,
1394
+ "grad_norm": 0.07297348976135254,
1395
+ "learning_rate": 2.2062839043215497e-05,
1396
+ "loss": 0.0046,
1397
+ "step": 18400
1398
+ },
1399
+ {
1400
+ "epoch": 1.8185392706183032,
1401
+ "grad_norm": 0.04762211814522743,
1402
+ "learning_rate": 2.1880802417446393e-05,
1403
+ "loss": 0.0044,
1404
+ "step": 18500
1405
+ },
1406
+ {
1407
+ "epoch": 1.8283692126216455,
1408
+ "grad_norm": 0.06008617579936981,
1409
+ "learning_rate": 2.1698765791677285e-05,
1410
+ "loss": 0.0043,
1411
+ "step": 18600
1412
+ },
1413
+ {
1414
+ "epoch": 1.8381991546249878,
1415
+ "grad_norm": 0.07657765597105026,
1416
+ "learning_rate": 2.151672916590818e-05,
1417
+ "loss": 0.0037,
1418
+ "step": 18700
1419
+ },
1420
+ {
1421
+ "epoch": 1.84802909662833,
1422
+ "grad_norm": 0.05616445094347,
1423
+ "learning_rate": 2.1334692540139076e-05,
1424
+ "loss": 0.0043,
1425
+ "step": 18800
1426
+ },
1427
+ {
1428
+ "epoch": 1.857859038631672,
1429
+ "grad_norm": 0.03654363006353378,
1430
+ "learning_rate": 2.1152655914369975e-05,
1431
+ "loss": 0.0044,
1432
+ "step": 18900
1433
+ },
1434
+ {
1435
+ "epoch": 1.8676889806350143,
1436
+ "grad_norm": 0.09715255349874496,
1437
+ "learning_rate": 2.0970619288600867e-05,
1438
+ "loss": 0.0037,
1439
+ "step": 19000
1440
+ },
1441
+ {
1442
+ "epoch": 1.8775189226383564,
1443
+ "grad_norm": 0.10472027957439423,
1444
+ "learning_rate": 2.0788582662831762e-05,
1445
+ "loss": 0.0038,
1446
+ "step": 19100
1447
+ },
1448
+ {
1449
+ "epoch": 1.8873488646416985,
1450
+ "grad_norm": 0.014321831054985523,
1451
+ "learning_rate": 2.0606546037062658e-05,
1452
+ "loss": 0.0046,
1453
+ "step": 19200
1454
+ },
1455
+ {
1456
+ "epoch": 1.8971788066450408,
1457
+ "grad_norm": 0.038841910660266876,
1458
+ "learning_rate": 2.0424509411293553e-05,
1459
+ "loss": 0.0044,
1460
+ "step": 19300
1461
+ },
1462
+ {
1463
+ "epoch": 1.907008748648383,
1464
+ "grad_norm": 0.07139607518911362,
1465
+ "learning_rate": 2.024247278552445e-05,
1466
+ "loss": 0.0042,
1467
+ "step": 19400
1468
+ },
1469
+ {
1470
+ "epoch": 1.9168386906517252,
1471
+ "grad_norm": 0.03969763219356537,
1472
+ "learning_rate": 2.0060436159755344e-05,
1473
+ "loss": 0.004,
1474
+ "step": 19500
1475
+ },
1476
+ {
1477
+ "epoch": 1.9266686326550673,
1478
+ "grad_norm": 0.06369686126708984,
1479
+ "learning_rate": 1.987839953398624e-05,
1480
+ "loss": 0.0042,
1481
+ "step": 19600
1482
+ },
1483
+ {
1484
+ "epoch": 1.9364985746584096,
1485
+ "grad_norm": 0.045906830579042435,
1486
+ "learning_rate": 1.9696362908217135e-05,
1487
+ "loss": 0.0036,
1488
+ "step": 19700
1489
+ },
1490
+ {
1491
+ "epoch": 1.9463285166617517,
1492
+ "grad_norm": 0.06250949203968048,
1493
+ "learning_rate": 1.9514326282448027e-05,
1494
+ "loss": 0.0044,
1495
+ "step": 19800
1496
+ },
1497
+ {
1498
+ "epoch": 1.9561584586650937,
1499
+ "grad_norm": 0.10479672998189926,
1500
+ "learning_rate": 1.9332289656678926e-05,
1501
+ "loss": 0.0039,
1502
+ "step": 19900
1503
+ },
1504
+ {
1505
+ "epoch": 1.965988400668436,
1506
+ "grad_norm": 0.10211700201034546,
1507
+ "learning_rate": 1.915025303090982e-05,
1508
+ "loss": 0.0039,
1509
+ "step": 20000
1510
+ },
1511
+ {
1512
+ "epoch": 1.965988400668436,
1513
+ "eval_f1": 0.9113280966497507,
1514
+ "eval_loss": 0.016693420708179474,
1515
+ "eval_precision": 0.9037548028479365,
1516
+ "eval_recall": 0.9190293885823668,
1517
+ "eval_runtime": 367.6153,
1518
+ "eval_samples_per_second": 221.37,
1519
+ "eval_steps_per_second": 3.46,
1520
+ "step": 20000
1521
+ },
1522
+ {
1523
+ "epoch": 1.9758183426717784,
1524
+ "grad_norm": 0.2106950581073761,
1525
+ "learning_rate": 1.8968216405140717e-05,
1526
+ "loss": 0.0044,
1527
+ "step": 20100
1528
+ },
1529
+ {
1530
+ "epoch": 1.9856482846751204,
1531
+ "grad_norm": 0.037731654942035675,
1532
+ "learning_rate": 1.878617977937161e-05,
1533
+ "loss": 0.0041,
1534
+ "step": 20200
1535
+ },
1536
+ {
1537
+ "epoch": 1.9954782266784625,
1538
+ "grad_norm": 0.06709322333335876,
1539
+ "learning_rate": 1.8604143153602504e-05,
1540
+ "loss": 0.0041,
1541
+ "step": 20300
1542
+ },
1543
+ {
1544
+ "epoch": 2.005308168681805,
1545
+ "grad_norm": 0.12997037172317505,
1546
+ "learning_rate": 1.8422106527833403e-05,
1547
+ "loss": 0.003,
1548
+ "step": 20400
1549
+ },
1550
+ {
1551
+ "epoch": 2.015138110685147,
1552
+ "grad_norm": 0.09218054264783859,
1553
+ "learning_rate": 1.8240069902064295e-05,
1554
+ "loss": 0.0024,
1555
+ "step": 20500
1556
+ },
1557
+ {
1558
+ "epoch": 2.024968052688489,
1559
+ "grad_norm": 0.053119756281375885,
1560
+ "learning_rate": 1.805803327629519e-05,
1561
+ "loss": 0.0025,
1562
+ "step": 20600
1563
+ },
1564
+ {
1565
+ "epoch": 2.0347979946918313,
1566
+ "grad_norm": 0.05638999119400978,
1567
+ "learning_rate": 1.7875996650526086e-05,
1568
+ "loss": 0.0022,
1569
+ "step": 20700
1570
+ },
1571
+ {
1572
+ "epoch": 2.0446279366951736,
1573
+ "grad_norm": 0.029376154765486717,
1574
+ "learning_rate": 1.769396002475698e-05,
1575
+ "loss": 0.0023,
1576
+ "step": 20800
1577
+ },
1578
+ {
1579
+ "epoch": 2.0544578786985155,
1580
+ "grad_norm": 0.1715756207704544,
1581
+ "learning_rate": 1.7511923398987877e-05,
1582
+ "loss": 0.002,
1583
+ "step": 20900
1584
+ },
1585
+ {
1586
+ "epoch": 2.064287820701858,
1587
+ "grad_norm": 0.17415770888328552,
1588
+ "learning_rate": 1.7329886773218773e-05,
1589
+ "loss": 0.002,
1590
+ "step": 21000
1591
+ },
1592
+ {
1593
+ "epoch": 2.0741177627052,
1594
+ "grad_norm": 0.18803322315216064,
1595
+ "learning_rate": 1.7147850147449668e-05,
1596
+ "loss": 0.0016,
1597
+ "step": 21100
1598
+ },
1599
+ {
1600
+ "epoch": 2.0839477047085424,
1601
+ "grad_norm": 0.04249728098511696,
1602
+ "learning_rate": 1.6965813521680564e-05,
1603
+ "loss": 0.0021,
1604
+ "step": 21200
1605
+ },
1606
+ {
1607
+ "epoch": 2.0937776467118843,
1608
+ "grad_norm": 0.10625623166561127,
1609
+ "learning_rate": 1.6783776895911456e-05,
1610
+ "loss": 0.0021,
1611
+ "step": 21300
1612
+ },
1613
+ {
1614
+ "epoch": 2.1036075887152266,
1615
+ "grad_norm": 0.08748575299978256,
1616
+ "learning_rate": 1.6601740270142354e-05,
1617
+ "loss": 0.0021,
1618
+ "step": 21400
1619
+ },
1620
+ {
1621
+ "epoch": 2.113437530718569,
1622
+ "grad_norm": 0.014536886475980282,
1623
+ "learning_rate": 1.641970364437325e-05,
1624
+ "loss": 0.0019,
1625
+ "step": 21500
1626
+ },
1627
+ {
1628
+ "epoch": 2.1232674727219107,
1629
+ "grad_norm": 0.0076785460114479065,
1630
+ "learning_rate": 1.6237667018604145e-05,
1631
+ "loss": 0.002,
1632
+ "step": 21600
1633
+ },
1634
+ {
1635
+ "epoch": 2.133097414725253,
1636
+ "grad_norm": 0.054168928414583206,
1637
+ "learning_rate": 1.6055630392835037e-05,
1638
+ "loss": 0.0021,
1639
+ "step": 21700
1640
+ },
1641
+ {
1642
+ "epoch": 2.1429273567285954,
1643
+ "grad_norm": 0.09373613446950912,
1644
+ "learning_rate": 1.5873593767065933e-05,
1645
+ "loss": 0.0026,
1646
+ "step": 21800
1647
+ },
1648
+ {
1649
+ "epoch": 2.1527572987319377,
1650
+ "grad_norm": 0.025737851858139038,
1651
+ "learning_rate": 1.5691557141296832e-05,
1652
+ "loss": 0.002,
1653
+ "step": 21900
1654
+ },
1655
+ {
1656
+ "epoch": 2.1625872407352795,
1657
+ "grad_norm": 0.17471902072429657,
1658
+ "learning_rate": 1.5509520515527727e-05,
1659
+ "loss": 0.0019,
1660
+ "step": 22000
1661
+ },
1662
+ {
1663
+ "epoch": 2.1625872407352795,
1664
+ "eval_f1": 0.9128289933125876,
1665
+ "eval_loss": 0.018903136253356934,
1666
+ "eval_precision": 0.9058802656820022,
1667
+ "eval_recall": 0.9198851480689112,
1668
+ "eval_runtime": 368.5326,
1669
+ "eval_samples_per_second": 220.819,
1670
+ "eval_steps_per_second": 3.452,
1671
+ "step": 22000
1672
+ },
1673
+ {
1674
+ "epoch": 2.172417182738622,
1675
+ "grad_norm": 0.048461370170116425,
1676
+ "learning_rate": 1.532748388975862e-05,
1677
+ "loss": 0.002,
1678
+ "step": 22100
1679
+ },
1680
+ {
1681
+ "epoch": 2.182247124741964,
1682
+ "grad_norm": 0.33116135001182556,
1683
+ "learning_rate": 1.5145447263989515e-05,
1684
+ "loss": 0.0022,
1685
+ "step": 22200
1686
+ },
1687
+ {
1688
+ "epoch": 2.192077066745306,
1689
+ "grad_norm": 0.2750576436519623,
1690
+ "learning_rate": 1.496341063822041e-05,
1691
+ "loss": 0.0019,
1692
+ "step": 22300
1693
+ },
1694
+ {
1695
+ "epoch": 2.2019070087486483,
1696
+ "grad_norm": 0.05528819188475609,
1697
+ "learning_rate": 1.4781374012451307e-05,
1698
+ "loss": 0.0023,
1699
+ "step": 22400
1700
+ },
1701
+ {
1702
+ "epoch": 2.2117369507519906,
1703
+ "grad_norm": 0.03077654168009758,
1704
+ "learning_rate": 1.4599337386682201e-05,
1705
+ "loss": 0.0019,
1706
+ "step": 22500
1707
+ },
1708
+ {
1709
+ "epoch": 2.221566892755333,
1710
+ "grad_norm": 0.06391356140375137,
1711
+ "learning_rate": 1.4417300760913097e-05,
1712
+ "loss": 0.0023,
1713
+ "step": 22600
1714
+ },
1715
+ {
1716
+ "epoch": 2.231396834758675,
1717
+ "grad_norm": 0.07958533614873886,
1718
+ "learning_rate": 1.4235264135143992e-05,
1719
+ "loss": 0.0019,
1720
+ "step": 22700
1721
+ },
1722
+ {
1723
+ "epoch": 2.241226776762017,
1724
+ "grad_norm": 0.007806443143635988,
1725
+ "learning_rate": 1.4053227509374886e-05,
1726
+ "loss": 0.002,
1727
+ "step": 22800
1728
+ },
1729
+ {
1730
+ "epoch": 2.2510567187653594,
1731
+ "grad_norm": 0.05843871831893921,
1732
+ "learning_rate": 1.3871190883605781e-05,
1733
+ "loss": 0.0019,
1734
+ "step": 22900
1735
+ },
1736
+ {
1737
+ "epoch": 2.2608866607687013,
1738
+ "grad_norm": 0.10456466674804688,
1739
+ "learning_rate": 1.3689154257836679e-05,
1740
+ "loss": 0.0025,
1741
+ "step": 23000
1742
+ },
1743
+ {
1744
+ "epoch": 2.2707166027720436,
1745
+ "grad_norm": 0.10085894912481308,
1746
+ "learning_rate": 1.3507117632067572e-05,
1747
+ "loss": 0.0022,
1748
+ "step": 23100
1749
+ },
1750
+ {
1751
+ "epoch": 2.280546544775386,
1752
+ "grad_norm": 0.04934035614132881,
1753
+ "learning_rate": 1.3325081006298468e-05,
1754
+ "loss": 0.002,
1755
+ "step": 23200
1756
+ },
1757
+ {
1758
+ "epoch": 2.290376486778728,
1759
+ "grad_norm": 0.2595049738883972,
1760
+ "learning_rate": 1.3143044380529362e-05,
1761
+ "loss": 0.0022,
1762
+ "step": 23300
1763
+ },
1764
+ {
1765
+ "epoch": 2.30020642878207,
1766
+ "grad_norm": 0.1089097335934639,
1767
+ "learning_rate": 1.2961007754760257e-05,
1768
+ "loss": 0.0016,
1769
+ "step": 23400
1770
+ },
1771
+ {
1772
+ "epoch": 2.3100363707854124,
1773
+ "grad_norm": 0.13112761080265045,
1774
+ "learning_rate": 1.2778971128991154e-05,
1775
+ "loss": 0.002,
1776
+ "step": 23500
1777
+ },
1778
+ {
1779
+ "epoch": 2.3198663127887547,
1780
+ "grad_norm": 0.09164682775735855,
1781
+ "learning_rate": 1.259693450322205e-05,
1782
+ "loss": 0.0017,
1783
+ "step": 23600
1784
+ },
1785
+ {
1786
+ "epoch": 2.329696254792097,
1787
+ "grad_norm": 0.008273393847048283,
1788
+ "learning_rate": 1.2414897877452943e-05,
1789
+ "loss": 0.0022,
1790
+ "step": 23700
1791
+ },
1792
+ {
1793
+ "epoch": 2.339526196795439,
1794
+ "grad_norm": 0.031804751604795456,
1795
+ "learning_rate": 1.223286125168384e-05,
1796
+ "loss": 0.0019,
1797
+ "step": 23800
1798
+ },
1799
+ {
1800
+ "epoch": 2.349356138798781,
1801
+ "grad_norm": 0.02527899481356144,
1802
+ "learning_rate": 1.2050824625914734e-05,
1803
+ "loss": 0.0022,
1804
+ "step": 23900
1805
+ },
1806
+ {
1807
+ "epoch": 2.3591860808021234,
1808
+ "grad_norm": 0.10769706964492798,
1809
+ "learning_rate": 1.186878800014563e-05,
1810
+ "loss": 0.0018,
1811
+ "step": 24000
1812
+ },
1813
+ {
1814
+ "epoch": 2.3591860808021234,
1815
+ "eval_f1": 0.9163237772142261,
1816
+ "eval_loss": 0.018079889938235283,
1817
+ "eval_precision": 0.9111769224774269,
1818
+ "eval_recall": 0.9215291070825358,
1819
+ "eval_runtime": 368.2948,
1820
+ "eval_samples_per_second": 220.962,
1821
+ "eval_steps_per_second": 3.454,
1822
+ "step": 24000
1823
+ },
1824
+ {
1825
+ "epoch": 2.3690160228054653,
1826
+ "grad_norm": 0.05162603408098221,
1827
+ "learning_rate": 1.1686751374376525e-05,
1828
+ "loss": 0.0017,
1829
+ "step": 24100
1830
+ },
1831
+ {
1832
+ "epoch": 2.3788459648088076,
1833
+ "grad_norm": 0.014332090504467487,
1834
+ "learning_rate": 1.150471474860742e-05,
1835
+ "loss": 0.0018,
1836
+ "step": 24200
1837
+ },
1838
+ {
1839
+ "epoch": 2.38867590681215,
1840
+ "grad_norm": 0.03358616307377815,
1841
+ "learning_rate": 1.1322678122838315e-05,
1842
+ "loss": 0.002,
1843
+ "step": 24300
1844
+ },
1845
+ {
1846
+ "epoch": 2.398505848815492,
1847
+ "grad_norm": 0.01889238879084587,
1848
+ "learning_rate": 1.1140641497069212e-05,
1849
+ "loss": 0.0017,
1850
+ "step": 24400
1851
+ },
1852
+ {
1853
+ "epoch": 2.408335790818834,
1854
+ "grad_norm": 0.04304986447095871,
1855
+ "learning_rate": 1.0958604871300105e-05,
1856
+ "loss": 0.0021,
1857
+ "step": 24500
1858
+ },
1859
+ {
1860
+ "epoch": 2.4181657328221764,
1861
+ "grad_norm": 0.04633597284555435,
1862
+ "learning_rate": 1.0776568245531003e-05,
1863
+ "loss": 0.0021,
1864
+ "step": 24600
1865
+ },
1866
+ {
1867
+ "epoch": 2.4279956748255187,
1868
+ "grad_norm": 0.08608473092317581,
1869
+ "learning_rate": 1.0594531619761896e-05,
1870
+ "loss": 0.0019,
1871
+ "step": 24700
1872
+ },
1873
+ {
1874
+ "epoch": 2.4378256168288606,
1875
+ "grad_norm": 0.0008240310125984251,
1876
+ "learning_rate": 1.0412494993992792e-05,
1877
+ "loss": 0.002,
1878
+ "step": 24800
1879
+ },
1880
+ {
1881
+ "epoch": 2.447655558832203,
1882
+ "grad_norm": 0.033123135566711426,
1883
+ "learning_rate": 1.0230458368223687e-05,
1884
+ "loss": 0.0014,
1885
+ "step": 24900
1886
+ },
1887
+ {
1888
+ "epoch": 2.457485500835545,
1889
+ "grad_norm": 0.07832614332437515,
1890
+ "learning_rate": 1.0048421742454583e-05,
1891
+ "loss": 0.0019,
1892
+ "step": 25000
1893
+ },
1894
+ {
1895
+ "epoch": 2.4673154428388875,
1896
+ "grad_norm": 0.13727368414402008,
1897
+ "learning_rate": 9.866385116685478e-06,
1898
+ "loss": 0.0023,
1899
+ "step": 25100
1900
+ },
1901
+ {
1902
+ "epoch": 2.4771453848422293,
1903
+ "grad_norm": 0.013529472053050995,
1904
+ "learning_rate": 9.684348490916372e-06,
1905
+ "loss": 0.0016,
1906
+ "step": 25200
1907
+ },
1908
+ {
1909
+ "epoch": 2.4869753268455717,
1910
+ "grad_norm": 0.09583965688943863,
1911
+ "learning_rate": 9.502311865147267e-06,
1912
+ "loss": 0.0017,
1913
+ "step": 25300
1914
+ },
1915
+ {
1916
+ "epoch": 2.496805268848914,
1917
+ "grad_norm": 0.10004164278507233,
1918
+ "learning_rate": 9.320275239378163e-06,
1919
+ "loss": 0.0021,
1920
+ "step": 25400
1921
+ },
1922
+ {
1923
+ "epoch": 2.506635210852256,
1924
+ "grad_norm": 0.13714532554149628,
1925
+ "learning_rate": 9.138238613609058e-06,
1926
+ "loss": 0.0017,
1927
+ "step": 25500
1928
+ },
1929
+ {
1930
+ "epoch": 2.516465152855598,
1931
+ "grad_norm": 0.060430146753787994,
1932
+ "learning_rate": 8.956201987839954e-06,
1933
+ "loss": 0.002,
1934
+ "step": 25600
1935
+ },
1936
+ {
1937
+ "epoch": 2.5262950948589404,
1938
+ "grad_norm": 0.11850597709417343,
1939
+ "learning_rate": 8.77416536207085e-06,
1940
+ "loss": 0.0018,
1941
+ "step": 25700
1942
+ },
1943
+ {
1944
+ "epoch": 2.5361250368622823,
1945
+ "grad_norm": 0.05095691233873367,
1946
+ "learning_rate": 8.592128736301743e-06,
1947
+ "loss": 0.0016,
1948
+ "step": 25800
1949
+ },
1950
+ {
1951
+ "epoch": 2.5459549788656246,
1952
+ "grad_norm": 0.22412051260471344,
1953
+ "learning_rate": 8.41009211053264e-06,
1954
+ "loss": 0.0017,
1955
+ "step": 25900
1956
+ },
1957
+ {
1958
+ "epoch": 2.555784920868967,
1959
+ "grad_norm": 0.06112053617835045,
1960
+ "learning_rate": 8.228055484763534e-06,
1961
+ "loss": 0.0021,
1962
+ "step": 26000
1963
+ },
1964
+ {
1965
+ "epoch": 2.555784920868967,
1966
+ "eval_f1": 0.917086342018139,
1967
+ "eval_loss": 0.018284747377038002,
1968
+ "eval_precision": 0.9125316328691988,
1969
+ "eval_recall": 0.9216867469879518,
1970
+ "eval_runtime": 367.7346,
1971
+ "eval_samples_per_second": 221.298,
1972
+ "eval_steps_per_second": 3.459,
1973
+ "step": 26000
1974
+ },
1975
+ {
1976
+ "epoch": 2.5656148628723088,
1977
+ "grad_norm": 0.04585032910108566,
1978
+ "learning_rate": 8.046018858994431e-06,
1979
+ "loss": 0.0015,
1980
+ "step": 26100
1981
+ },
1982
+ {
1983
+ "epoch": 2.575444804875651,
1984
+ "grad_norm": 0.023976296186447144,
1985
+ "learning_rate": 7.863982233225325e-06,
1986
+ "loss": 0.0019,
1987
+ "step": 26200
1988
+ },
1989
+ {
1990
+ "epoch": 2.5852747468789934,
1991
+ "grad_norm": 0.12570028007030487,
1992
+ "learning_rate": 7.68194560745622e-06,
1993
+ "loss": 0.0019,
1994
+ "step": 26300
1995
+ },
1996
+ {
1997
+ "epoch": 2.5951046888823357,
1998
+ "grad_norm": 0.11105850338935852,
1999
+ "learning_rate": 7.499908981687116e-06,
2000
+ "loss": 0.0016,
2001
+ "step": 26400
2002
+ },
2003
+ {
2004
+ "epoch": 2.604934630885678,
2005
+ "grad_norm": 0.03623613342642784,
2006
+ "learning_rate": 7.3178723559180105e-06,
2007
+ "loss": 0.0015,
2008
+ "step": 26500
2009
+ },
2010
+ {
2011
+ "epoch": 2.61476457288902,
2012
+ "grad_norm": 0.018633360043168068,
2013
+ "learning_rate": 7.135835730148906e-06,
2014
+ "loss": 0.0018,
2015
+ "step": 26600
2016
+ },
2017
+ {
2018
+ "epoch": 2.624594514892362,
2019
+ "grad_norm": 0.10424701869487762,
2020
+ "learning_rate": 6.9537991043798015e-06,
2021
+ "loss": 0.0018,
2022
+ "step": 26700
2023
+ },
2024
+ {
2025
+ "epoch": 2.6344244568957045,
2026
+ "grad_norm": 0.0371340848505497,
2027
+ "learning_rate": 6.771762478610696e-06,
2028
+ "loss": 0.0017,
2029
+ "step": 26800
2030
+ },
2031
+ {
2032
+ "epoch": 2.6442543988990463,
2033
+ "grad_norm": 0.07623058557510376,
2034
+ "learning_rate": 6.589725852841592e-06,
2035
+ "loss": 0.002,
2036
+ "step": 26900
2037
+ },
2038
+ {
2039
+ "epoch": 2.6540843409023887,
2040
+ "grad_norm": 0.051303476095199585,
2041
+ "learning_rate": 6.407689227072487e-06,
2042
+ "loss": 0.0016,
2043
+ "step": 27000
2044
+ },
2045
+ {
2046
+ "epoch": 2.663914282905731,
2047
+ "grad_norm": 0.02509203553199768,
2048
+ "learning_rate": 6.2256526013033825e-06,
2049
+ "loss": 0.0016,
2050
+ "step": 27100
2051
+ },
2052
+ {
2053
+ "epoch": 2.673744224909073,
2054
+ "grad_norm": 0.04684291034936905,
2055
+ "learning_rate": 6.043615975534278e-06,
2056
+ "loss": 0.0017,
2057
+ "step": 27200
2058
+ },
2059
+ {
2060
+ "epoch": 2.683574166912415,
2061
+ "grad_norm": 0.03077726438641548,
2062
+ "learning_rate": 5.861579349765173e-06,
2063
+ "loss": 0.0017,
2064
+ "step": 27300
2065
+ },
2066
+ {
2067
+ "epoch": 2.6934041089157574,
2068
+ "grad_norm": 0.056035276502370834,
2069
+ "learning_rate": 5.679542723996068e-06,
2070
+ "loss": 0.0019,
2071
+ "step": 27400
2072
+ },
2073
+ {
2074
+ "epoch": 2.7032340509190997,
2075
+ "grad_norm": 0.04686987027525902,
2076
+ "learning_rate": 5.4975060982269635e-06,
2077
+ "loss": 0.0017,
2078
+ "step": 27500
2079
+ },
2080
+ {
2081
+ "epoch": 2.7130639929224416,
2082
+ "grad_norm": 0.022549783810973167,
2083
+ "learning_rate": 5.315469472457859e-06,
2084
+ "loss": 0.0019,
2085
+ "step": 27600
2086
+ },
2087
+ {
2088
+ "epoch": 2.722893934925784,
2089
+ "grad_norm": 0.055259574204683304,
2090
+ "learning_rate": 5.1334328466887544e-06,
2091
+ "loss": 0.0016,
2092
+ "step": 27700
2093
+ },
2094
+ {
2095
+ "epoch": 2.732723876929126,
2096
+ "grad_norm": 0.055192168802022934,
2097
+ "learning_rate": 4.951396220919649e-06,
2098
+ "loss": 0.0015,
2099
+ "step": 27800
2100
+ },
2101
+ {
2102
+ "epoch": 2.7425538189324685,
2103
+ "grad_norm": 0.2088267058134079,
2104
+ "learning_rate": 4.7693595951505445e-06,
2105
+ "loss": 0.0018,
2106
+ "step": 27900
2107
+ },
2108
+ {
2109
+ "epoch": 2.7523837609358104,
2110
+ "grad_norm": 0.10806486010551453,
2111
+ "learning_rate": 4.58732296938144e-06,
2112
+ "loss": 0.0018,
2113
+ "step": 28000
2114
+ },
2115
+ {
2116
+ "epoch": 2.7523837609358104,
2117
+ "eval_f1": 0.9180684275996548,
2118
+ "eval_loss": 0.01810205541551113,
2119
+ "eval_precision": 0.9137369501204604,
2120
+ "eval_recall": 0.9224411665353001,
2121
+ "eval_runtime": 367.586,
2122
+ "eval_samples_per_second": 221.388,
2123
+ "eval_steps_per_second": 3.46,
2124
+ "step": 28000
2125
+ },
2126
+ {
2127
+ "epoch": 2.7622137029391527,
2128
+ "grad_norm": 0.04833536595106125,
2129
+ "learning_rate": 4.4052863436123355e-06,
2130
+ "loss": 0.0019,
2131
+ "step": 28100
2132
+ },
2133
+ {
2134
+ "epoch": 2.772043644942495,
2135
+ "grad_norm": 0.08527988195419312,
2136
+ "learning_rate": 4.22324971784323e-06,
2137
+ "loss": 0.0015,
2138
+ "step": 28200
2139
+ },
2140
+ {
2141
+ "epoch": 2.781873586945837,
2142
+ "grad_norm": 0.054375261068344116,
2143
+ "learning_rate": 4.0412130920741256e-06,
2144
+ "loss": 0.0015,
2145
+ "step": 28300
2146
+ },
2147
+ {
2148
+ "epoch": 2.791703528949179,
2149
+ "grad_norm": 0.013773391023278236,
2150
+ "learning_rate": 3.859176466305021e-06,
2151
+ "loss": 0.0019,
2152
+ "step": 28400
2153
+ },
2154
+ {
2155
+ "epoch": 2.8015334709525215,
2156
+ "grad_norm": 0.0823429673910141,
2157
+ "learning_rate": 3.677139840535916e-06,
2158
+ "loss": 0.0019,
2159
+ "step": 28500
2160
+ },
2161
+ {
2162
+ "epoch": 2.8113634129558633,
2163
+ "grad_norm": 0.01534045860171318,
2164
+ "learning_rate": 3.4951032147668115e-06,
2165
+ "loss": 0.0019,
2166
+ "step": 28600
2167
+ },
2168
+ {
2169
+ "epoch": 2.8211933549592056,
2170
+ "grad_norm": 0.06509745121002197,
2171
+ "learning_rate": 3.313066588997706e-06,
2172
+ "loss": 0.0015,
2173
+ "step": 28700
2174
+ },
2175
+ {
2176
+ "epoch": 2.831023296962548,
2177
+ "grad_norm": 0.022973215207457542,
2178
+ "learning_rate": 3.1310299632286016e-06,
2179
+ "loss": 0.0018,
2180
+ "step": 28800
2181
+ },
2182
+ {
2183
+ "epoch": 2.8408532389658903,
2184
+ "grad_norm": 0.058371126651763916,
2185
+ "learning_rate": 2.948993337459497e-06,
2186
+ "loss": 0.0015,
2187
+ "step": 28900
2188
+ },
2189
+ {
2190
+ "epoch": 2.850683180969232,
2191
+ "grad_norm": 0.20555707812309265,
2192
+ "learning_rate": 2.766956711690392e-06,
2193
+ "loss": 0.0016,
2194
+ "step": 29000
2195
+ },
2196
+ {
2197
+ "epoch": 2.8605131229725744,
2198
+ "grad_norm": 0.08409526199102402,
2199
+ "learning_rate": 2.5849200859212876e-06,
2200
+ "loss": 0.0016,
2201
+ "step": 29100
2202
+ },
2203
+ {
2204
+ "epoch": 2.8703430649759167,
2205
+ "grad_norm": 0.014734131284058094,
2206
+ "learning_rate": 2.402883460152183e-06,
2207
+ "loss": 0.0016,
2208
+ "step": 29200
2209
+ },
2210
+ {
2211
+ "epoch": 2.880173006979259,
2212
+ "grad_norm": 0.007813429459929466,
2213
+ "learning_rate": 2.220846834383078e-06,
2214
+ "loss": 0.0016,
2215
+ "step": 29300
2216
+ },
2217
+ {
2218
+ "epoch": 2.890002948982601,
2219
+ "grad_norm": 0.03011438436806202,
2220
+ "learning_rate": 2.038810208613973e-06,
2221
+ "loss": 0.0015,
2222
+ "step": 29400
2223
+ },
2224
+ {
2225
+ "epoch": 2.899832890985943,
2226
+ "grad_norm": 0.08900994062423706,
2227
+ "learning_rate": 1.8567735828448684e-06,
2228
+ "loss": 0.0019,
2229
+ "step": 29500
2230
+ },
2231
+ {
2232
+ "epoch": 2.9096628329892855,
2233
+ "grad_norm": 0.04198099300265312,
2234
+ "learning_rate": 1.6747369570757639e-06,
2235
+ "loss": 0.0017,
2236
+ "step": 29600
2237
+ },
2238
+ {
2239
+ "epoch": 2.9194927749926274,
2240
+ "grad_norm": 0.054798416793346405,
2241
+ "learning_rate": 1.4927003313066591e-06,
2242
+ "loss": 0.0016,
2243
+ "step": 29700
2244
+ },
2245
+ {
2246
+ "epoch": 2.9293227169959697,
2247
+ "grad_norm": 0.05114193260669708,
2248
+ "learning_rate": 1.3106637055375542e-06,
2249
+ "loss": 0.0016,
2250
+ "step": 29800
2251
+ },
2252
+ {
2253
+ "epoch": 2.939152658999312,
2254
+ "grad_norm": 0.08578815311193466,
2255
+ "learning_rate": 1.1286270797684494e-06,
2256
+ "loss": 0.0014,
2257
+ "step": 29900
2258
+ },
2259
+ {
2260
+ "epoch": 2.948982601002654,
2261
+ "grad_norm": 0.04622345417737961,
2262
+ "learning_rate": 9.465904539993447e-07,
2263
+ "loss": 0.0016,
2264
+ "step": 30000
2265
+ },
2266
+ {
2267
+ "epoch": 2.948982601002654,
2268
+ "eval_f1": 0.919165725910112,
2269
+ "eval_loss": 0.01758442632853985,
2270
+ "eval_precision": 0.9124650481558741,
2271
+ "eval_recall": 0.9259655444206734,
2272
+ "eval_runtime": 368.0845,
2273
+ "eval_samples_per_second": 221.088,
2274
+ "eval_steps_per_second": 3.456,
2275
+ "step": 30000
2276
+ },
2277
+ {
2278
+ "epoch": 2.958812543005996,
2279
+ "grad_norm": 0.07476332783699036,
2280
+ "learning_rate": 7.6455382823024e-07,
2281
+ "loss": 0.0014,
2282
+ "step": 30100
2283
+ },
2284
+ {
2285
+ "epoch": 2.9686424850093385,
2286
+ "grad_norm": 0.01423332467675209,
2287
+ "learning_rate": 5.825172024611352e-07,
2288
+ "loss": 0.0016,
2289
+ "step": 30200
2290
+ },
2291
+ {
2292
+ "epoch": 2.978472427012681,
2293
+ "grad_norm": 0.11393997073173523,
2294
+ "learning_rate": 4.0048057669203044e-07,
2295
+ "loss": 0.0018,
2296
+ "step": 30300
2297
+ },
2298
+ {
2299
+ "epoch": 2.9883023690160226,
2300
+ "grad_norm": 0.0679103285074234,
2301
+ "learning_rate": 2.1844395092292572e-07,
2302
+ "loss": 0.0014,
2303
+ "step": 30400
2304
+ },
2305
+ {
2306
+ "epoch": 2.998132311019365,
2307
+ "grad_norm": 0.04345181956887245,
2308
+ "learning_rate": 3.640732515382095e-08,
2309
+ "loss": 0.0013,
2310
+ "step": 30500
2311
+ }
2312
+ ],
2313
+ "logging_steps": 100,
2314
+ "max_steps": 30519,
2315
+ "num_input_tokens_seen": 0,
2316
+ "num_train_epochs": 3,
2317
+ "save_steps": 2000,
2318
+ "stateful_callbacks": {
2319
+ "TrainerControl": {
2320
+ "args": {
2321
+ "should_epoch_stop": false,
2322
+ "should_evaluate": false,
2323
+ "should_log": false,
2324
+ "should_save": true,
2325
+ "should_training_stop": true
2326
+ },
2327
+ "attributes": {}
2328
+ }
2329
+ },
2330
+ "total_flos": 3.3284354538825216e+17,
2331
+ "train_batch_size": 32,
2332
+ "trial_name": null,
2333
+ "trial_params": null
2334
+ }
checkpoint-30519/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c990fe4db5e0a76e772957870bc32e02469c1885a567fa0e531a4704336cc133
3
+ size 5841
config.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ModernBertForTokenClassification"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": null,
8
+ "classifier_activation": "gelu",
9
+ "classifier_bias": false,
10
+ "classifier_dropout": 0.0,
11
+ "classifier_pooling": "mean",
12
+ "cls_token_id": 50281,
13
+ "decoder_bias": true,
14
+ "deterministic_flash_attn": false,
15
+ "dtype": "float32",
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": null,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 768,
23
+ "id2label": {
24
+ "0": "O",
25
+ "1": "B-ACCOUNTNUM",
26
+ "2": "I-ACCOUNTNUM",
27
+ "3": "B-BUILDINGNUM",
28
+ "4": "I-BUILDINGNUM",
29
+ "5": "B-CITY",
30
+ "6": "I-CITY",
31
+ "7": "B-CREDITCARDNUMBER",
32
+ "8": "I-CREDITCARDNUMBER",
33
+ "9": "B-DATEOFBIRTH",
34
+ "10": "I-DATEOFBIRTH",
35
+ "11": "B-DRIVERLICENSENUM",
36
+ "12": "I-DRIVERLICENSENUM",
37
+ "13": "B-EMAIL",
38
+ "14": "I-EMAIL",
39
+ "15": "B-GIVENNAME",
40
+ "16": "I-GIVENNAME",
41
+ "17": "B-IDCARDNUM",
42
+ "18": "I-IDCARDNUM",
43
+ "19": "B-PASSWORD",
44
+ "20": "I-PASSWORD",
45
+ "21": "B-SOCIALNUM",
46
+ "22": "I-SOCIALNUM",
47
+ "23": "B-STREET",
48
+ "24": "I-STREET",
49
+ "25": "B-SURNAME",
50
+ "26": "I-SURNAME",
51
+ "27": "B-TAXNUM",
52
+ "28": "I-TAXNUM",
53
+ "29": "B-TELEPHONENUM",
54
+ "30": "I-TELEPHONENUM",
55
+ "31": "B-USERNAME",
56
+ "32": "I-USERNAME",
57
+ "33": "B-ZIPCODE",
58
+ "34": "I-ZIPCODE"
59
+ },
60
+ "initializer_cutoff_factor": 2.0,
61
+ "initializer_range": 0.02,
62
+ "intermediate_size": 1152,
63
+ "label2id": {
64
+ "B-ACCOUNTNUM": 1,
65
+ "B-BUILDINGNUM": 3,
66
+ "B-CITY": 5,
67
+ "B-CREDITCARDNUMBER": 7,
68
+ "B-DATEOFBIRTH": 9,
69
+ "B-DRIVERLICENSENUM": 11,
70
+ "B-EMAIL": 13,
71
+ "B-GIVENNAME": 15,
72
+ "B-IDCARDNUM": 17,
73
+ "B-PASSWORD": 19,
74
+ "B-SOCIALNUM": 21,
75
+ "B-STREET": 23,
76
+ "B-SURNAME": 25,
77
+ "B-TAXNUM": 27,
78
+ "B-TELEPHONENUM": 29,
79
+ "B-USERNAME": 31,
80
+ "B-ZIPCODE": 33,
81
+ "I-ACCOUNTNUM": 2,
82
+ "I-BUILDINGNUM": 4,
83
+ "I-CITY": 6,
84
+ "I-CREDITCARDNUMBER": 8,
85
+ "I-DATEOFBIRTH": 10,
86
+ "I-DRIVERLICENSENUM": 12,
87
+ "I-EMAIL": 14,
88
+ "I-GIVENNAME": 16,
89
+ "I-IDCARDNUM": 18,
90
+ "I-PASSWORD": 20,
91
+ "I-SOCIALNUM": 22,
92
+ "I-STREET": 24,
93
+ "I-SURNAME": 26,
94
+ "I-TAXNUM": 28,
95
+ "I-TELEPHONENUM": 30,
96
+ "I-USERNAME": 32,
97
+ "I-ZIPCODE": 34,
98
+ "O": 0
99
+ },
100
+ "layer_norm_eps": 1e-05,
101
+ "local_attention": 128,
102
+ "local_rope_theta": 10000.0,
103
+ "max_position_embeddings": 8192,
104
+ "mlp_bias": false,
105
+ "mlp_dropout": 0.0,
106
+ "model_type": "modernbert",
107
+ "norm_bias": false,
108
+ "norm_eps": 1e-05,
109
+ "num_attention_heads": 12,
110
+ "num_hidden_layers": 22,
111
+ "pad_token_id": 50283,
112
+ "position_embedding_type": "absolute",
113
+ "repad_logits_with_grad": false,
114
+ "sep_token_id": 50282,
115
+ "sparse_pred_ignore_index": -100,
116
+ "sparse_prediction": false,
117
+ "transformers_version": "4.57.3",
118
+ "vocab_size": 50368
119
+ }
logs/events.out.tfevents.1767144872.b96fd0defc2a.1588.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9a1d7c51b7b587d64cd18430807afb90d794233814d7f912c418909cbdc8d2d
3
+ size 78906
logs/events.out.tfevents.1767159671.b96fd0defc2a.1588.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3851b731fc7cfb1a5c4eabcaf69d82cd031788fb750d6729159b21100a1d1035
3
+ size 516
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:801f79e7fee96cd8a2a58809cdb5d8841a8812d4de65368a9186e0c86a249515
3
+ size 598541300
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": true,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,945 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "|||IP_ADDRESS|||",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "1": {
12
+ "content": "<|padding|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "50254": {
20
+ "content": " ",
21
+ "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "50255": {
28
+ "content": " ",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "50256": {
36
+ "content": " ",
37
+ "lstrip": false,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": false
42
+ },
43
+ "50257": {
44
+ "content": " ",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "50258": {
52
+ "content": " ",
53
+ "lstrip": false,
54
+ "normalized": true,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "50259": {
60
+ "content": " ",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": false
66
+ },
67
+ "50260": {
68
+ "content": " ",
69
+ "lstrip": false,
70
+ "normalized": true,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "50261": {
76
+ "content": " ",
77
+ "lstrip": false,
78
+ "normalized": true,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "50262": {
84
+ "content": " ",
85
+ "lstrip": false,
86
+ "normalized": true,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": false
90
+ },
91
+ "50263": {
92
+ "content": " ",
93
+ "lstrip": false,
94
+ "normalized": true,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
+ "50264": {
100
+ "content": " ",
101
+ "lstrip": false,
102
+ "normalized": true,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": false
106
+ },
107
+ "50265": {
108
+ "content": " ",
109
+ "lstrip": false,
110
+ "normalized": true,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "50266": {
116
+ "content": " ",
117
+ "lstrip": false,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": false
122
+ },
123
+ "50267": {
124
+ "content": " ",
125
+ "lstrip": false,
126
+ "normalized": true,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "50268": {
132
+ "content": " ",
133
+ "lstrip": false,
134
+ "normalized": true,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "50269": {
140
+ "content": " ",
141
+ "lstrip": false,
142
+ "normalized": true,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "50270": {
148
+ "content": " ",
149
+ "lstrip": false,
150
+ "normalized": true,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
+ },
155
+ "50271": {
156
+ "content": " ",
157
+ "lstrip": false,
158
+ "normalized": true,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": false
162
+ },
163
+ "50272": {
164
+ "content": " ",
165
+ "lstrip": false,
166
+ "normalized": true,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": false
170
+ },
171
+ "50273": {
172
+ "content": " ",
173
+ "lstrip": false,
174
+ "normalized": true,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": false
178
+ },
179
+ "50274": {
180
+ "content": " ",
181
+ "lstrip": false,
182
+ "normalized": true,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": false
186
+ },
187
+ "50275": {
188
+ "content": " ",
189
+ "lstrip": false,
190
+ "normalized": true,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": false
194
+ },
195
+ "50276": {
196
+ "content": " ",
197
+ "lstrip": false,
198
+ "normalized": true,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": false
202
+ },
203
+ "50277": {
204
+ "content": "|||EMAIL_ADDRESS|||",
205
+ "lstrip": false,
206
+ "normalized": true,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": false
210
+ },
211
+ "50278": {
212
+ "content": "|||PHONE_NUMBER|||",
213
+ "lstrip": false,
214
+ "normalized": true,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": false
218
+ },
219
+ "50279": {
220
+ "content": "<|endoftext|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "50280": {
228
+ "content": "[UNK]",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "50281": {
236
+ "content": "[CLS]",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "50282": {
244
+ "content": "[SEP]",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "50283": {
252
+ "content": "[PAD]",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "50284": {
260
+ "content": "[MASK]",
261
+ "lstrip": true,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "50285": {
268
+ "content": "[unused0]",
269
+ "lstrip": false,
270
+ "normalized": true,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": false
274
+ },
275
+ "50286": {
276
+ "content": "[unused1]",
277
+ "lstrip": false,
278
+ "normalized": true,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": false
282
+ },
283
+ "50287": {
284
+ "content": "[unused2]",
285
+ "lstrip": false,
286
+ "normalized": true,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": false
290
+ },
291
+ "50288": {
292
+ "content": "[unused3]",
293
+ "lstrip": false,
294
+ "normalized": true,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": false
298
+ },
299
+ "50289": {
300
+ "content": "[unused4]",
301
+ "lstrip": false,
302
+ "normalized": true,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": false
306
+ },
307
+ "50290": {
308
+ "content": "[unused5]",
309
+ "lstrip": false,
310
+ "normalized": true,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": false
314
+ },
315
+ "50291": {
316
+ "content": "[unused6]",
317
+ "lstrip": false,
318
+ "normalized": true,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": false
322
+ },
323
+ "50292": {
324
+ "content": "[unused7]",
325
+ "lstrip": false,
326
+ "normalized": true,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": false
330
+ },
331
+ "50293": {
332
+ "content": "[unused8]",
333
+ "lstrip": false,
334
+ "normalized": true,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": false
338
+ },
339
+ "50294": {
340
+ "content": "[unused9]",
341
+ "lstrip": false,
342
+ "normalized": true,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": false
346
+ },
347
+ "50295": {
348
+ "content": "[unused10]",
349
+ "lstrip": false,
350
+ "normalized": true,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": false
354
+ },
355
+ "50296": {
356
+ "content": "[unused11]",
357
+ "lstrip": false,
358
+ "normalized": true,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": false
362
+ },
363
+ "50297": {
364
+ "content": "[unused12]",
365
+ "lstrip": false,
366
+ "normalized": true,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": false
370
+ },
371
+ "50298": {
372
+ "content": "[unused13]",
373
+ "lstrip": false,
374
+ "normalized": true,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": false
378
+ },
379
+ "50299": {
380
+ "content": "[unused14]",
381
+ "lstrip": false,
382
+ "normalized": true,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": false
386
+ },
387
+ "50300": {
388
+ "content": "[unused15]",
389
+ "lstrip": false,
390
+ "normalized": true,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": false
394
+ },
395
+ "50301": {
396
+ "content": "[unused16]",
397
+ "lstrip": false,
398
+ "normalized": true,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": false
402
+ },
403
+ "50302": {
404
+ "content": "[unused17]",
405
+ "lstrip": false,
406
+ "normalized": true,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": false
410
+ },
411
+ "50303": {
412
+ "content": "[unused18]",
413
+ "lstrip": false,
414
+ "normalized": true,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": false
418
+ },
419
+ "50304": {
420
+ "content": "[unused19]",
421
+ "lstrip": false,
422
+ "normalized": true,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": false
426
+ },
427
+ "50305": {
428
+ "content": "[unused20]",
429
+ "lstrip": false,
430
+ "normalized": true,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": false
434
+ },
435
+ "50306": {
436
+ "content": "[unused21]",
437
+ "lstrip": false,
438
+ "normalized": true,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": false
442
+ },
443
+ "50307": {
444
+ "content": "[unused22]",
445
+ "lstrip": false,
446
+ "normalized": true,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": false
450
+ },
451
+ "50308": {
452
+ "content": "[unused23]",
453
+ "lstrip": false,
454
+ "normalized": true,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": false
458
+ },
459
+ "50309": {
460
+ "content": "[unused24]",
461
+ "lstrip": false,
462
+ "normalized": true,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": false
466
+ },
467
+ "50310": {
468
+ "content": "[unused25]",
469
+ "lstrip": false,
470
+ "normalized": true,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": false
474
+ },
475
+ "50311": {
476
+ "content": "[unused26]",
477
+ "lstrip": false,
478
+ "normalized": true,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": false
482
+ },
483
+ "50312": {
484
+ "content": "[unused27]",
485
+ "lstrip": false,
486
+ "normalized": true,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": false
490
+ },
491
+ "50313": {
492
+ "content": "[unused28]",
493
+ "lstrip": false,
494
+ "normalized": true,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": false
498
+ },
499
+ "50314": {
500
+ "content": "[unused29]",
501
+ "lstrip": false,
502
+ "normalized": true,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": false
506
+ },
507
+ "50315": {
508
+ "content": "[unused30]",
509
+ "lstrip": false,
510
+ "normalized": true,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": false
514
+ },
515
+ "50316": {
516
+ "content": "[unused31]",
517
+ "lstrip": false,
518
+ "normalized": true,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": false
522
+ },
523
+ "50317": {
524
+ "content": "[unused32]",
525
+ "lstrip": false,
526
+ "normalized": true,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": false
530
+ },
531
+ "50318": {
532
+ "content": "[unused33]",
533
+ "lstrip": false,
534
+ "normalized": true,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": false
538
+ },
539
+ "50319": {
540
+ "content": "[unused34]",
541
+ "lstrip": false,
542
+ "normalized": true,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": false
546
+ },
547
+ "50320": {
548
+ "content": "[unused35]",
549
+ "lstrip": false,
550
+ "normalized": true,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": false
554
+ },
555
+ "50321": {
556
+ "content": "[unused36]",
557
+ "lstrip": false,
558
+ "normalized": true,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": false
562
+ },
563
+ "50322": {
564
+ "content": "[unused37]",
565
+ "lstrip": false,
566
+ "normalized": true,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": false
570
+ },
571
+ "50323": {
572
+ "content": "[unused38]",
573
+ "lstrip": false,
574
+ "normalized": true,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": false
578
+ },
579
+ "50324": {
580
+ "content": "[unused39]",
581
+ "lstrip": false,
582
+ "normalized": true,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": false
586
+ },
587
+ "50325": {
588
+ "content": "[unused40]",
589
+ "lstrip": false,
590
+ "normalized": true,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": false
594
+ },
595
+ "50326": {
596
+ "content": "[unused41]",
597
+ "lstrip": false,
598
+ "normalized": true,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": false
602
+ },
603
+ "50327": {
604
+ "content": "[unused42]",
605
+ "lstrip": false,
606
+ "normalized": true,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": false
610
+ },
611
+ "50328": {
612
+ "content": "[unused43]",
613
+ "lstrip": false,
614
+ "normalized": true,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": false
618
+ },
619
+ "50329": {
620
+ "content": "[unused44]",
621
+ "lstrip": false,
622
+ "normalized": true,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": false
626
+ },
627
+ "50330": {
628
+ "content": "[unused45]",
629
+ "lstrip": false,
630
+ "normalized": true,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": false
634
+ },
635
+ "50331": {
636
+ "content": "[unused46]",
637
+ "lstrip": false,
638
+ "normalized": true,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": false
642
+ },
643
+ "50332": {
644
+ "content": "[unused47]",
645
+ "lstrip": false,
646
+ "normalized": true,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": false
650
+ },
651
+ "50333": {
652
+ "content": "[unused48]",
653
+ "lstrip": false,
654
+ "normalized": true,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": false
658
+ },
659
+ "50334": {
660
+ "content": "[unused49]",
661
+ "lstrip": false,
662
+ "normalized": true,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": false
666
+ },
667
+ "50335": {
668
+ "content": "[unused50]",
669
+ "lstrip": false,
670
+ "normalized": true,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": false
674
+ },
675
+ "50336": {
676
+ "content": "[unused51]",
677
+ "lstrip": false,
678
+ "normalized": true,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": false
682
+ },
683
+ "50337": {
684
+ "content": "[unused52]",
685
+ "lstrip": false,
686
+ "normalized": true,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": false
690
+ },
691
+ "50338": {
692
+ "content": "[unused53]",
693
+ "lstrip": false,
694
+ "normalized": true,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": false
698
+ },
699
+ "50339": {
700
+ "content": "[unused54]",
701
+ "lstrip": false,
702
+ "normalized": true,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": false
706
+ },
707
+ "50340": {
708
+ "content": "[unused55]",
709
+ "lstrip": false,
710
+ "normalized": true,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": false
714
+ },
715
+ "50341": {
716
+ "content": "[unused56]",
717
+ "lstrip": false,
718
+ "normalized": true,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": false
722
+ },
723
+ "50342": {
724
+ "content": "[unused57]",
725
+ "lstrip": false,
726
+ "normalized": true,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": false
730
+ },
731
+ "50343": {
732
+ "content": "[unused58]",
733
+ "lstrip": false,
734
+ "normalized": true,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": false
738
+ },
739
+ "50344": {
740
+ "content": "[unused59]",
741
+ "lstrip": false,
742
+ "normalized": true,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": false
746
+ },
747
+ "50345": {
748
+ "content": "[unused60]",
749
+ "lstrip": false,
750
+ "normalized": true,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": false
754
+ },
755
+ "50346": {
756
+ "content": "[unused61]",
757
+ "lstrip": false,
758
+ "normalized": true,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": false
762
+ },
763
+ "50347": {
764
+ "content": "[unused62]",
765
+ "lstrip": false,
766
+ "normalized": true,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": false
770
+ },
771
+ "50348": {
772
+ "content": "[unused63]",
773
+ "lstrip": false,
774
+ "normalized": true,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": false
778
+ },
779
+ "50349": {
780
+ "content": "[unused64]",
781
+ "lstrip": false,
782
+ "normalized": true,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": false
786
+ },
787
+ "50350": {
788
+ "content": "[unused65]",
789
+ "lstrip": false,
790
+ "normalized": true,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": false
794
+ },
795
+ "50351": {
796
+ "content": "[unused66]",
797
+ "lstrip": false,
798
+ "normalized": true,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": false
802
+ },
803
+ "50352": {
804
+ "content": "[unused67]",
805
+ "lstrip": false,
806
+ "normalized": true,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": false
810
+ },
811
+ "50353": {
812
+ "content": "[unused68]",
813
+ "lstrip": false,
814
+ "normalized": true,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": false
818
+ },
819
+ "50354": {
820
+ "content": "[unused69]",
821
+ "lstrip": false,
822
+ "normalized": true,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": false
826
+ },
827
+ "50355": {
828
+ "content": "[unused70]",
829
+ "lstrip": false,
830
+ "normalized": true,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": false
834
+ },
835
+ "50356": {
836
+ "content": "[unused71]",
837
+ "lstrip": false,
838
+ "normalized": true,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": false
842
+ },
843
+ "50357": {
844
+ "content": "[unused72]",
845
+ "lstrip": false,
846
+ "normalized": true,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": false
850
+ },
851
+ "50358": {
852
+ "content": "[unused73]",
853
+ "lstrip": false,
854
+ "normalized": true,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": false
858
+ },
859
+ "50359": {
860
+ "content": "[unused74]",
861
+ "lstrip": false,
862
+ "normalized": true,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": false
866
+ },
867
+ "50360": {
868
+ "content": "[unused75]",
869
+ "lstrip": false,
870
+ "normalized": true,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": false
874
+ },
875
+ "50361": {
876
+ "content": "[unused76]",
877
+ "lstrip": false,
878
+ "normalized": true,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": false
882
+ },
883
+ "50362": {
884
+ "content": "[unused77]",
885
+ "lstrip": false,
886
+ "normalized": true,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": false
890
+ },
891
+ "50363": {
892
+ "content": "[unused78]",
893
+ "lstrip": false,
894
+ "normalized": true,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": false
898
+ },
899
+ "50364": {
900
+ "content": "[unused79]",
901
+ "lstrip": false,
902
+ "normalized": true,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": false
906
+ },
907
+ "50365": {
908
+ "content": "[unused80]",
909
+ "lstrip": false,
910
+ "normalized": true,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": false
914
+ },
915
+ "50366": {
916
+ "content": "[unused81]",
917
+ "lstrip": false,
918
+ "normalized": true,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": false
922
+ },
923
+ "50367": {
924
+ "content": "[unused82]",
925
+ "lstrip": false,
926
+ "normalized": true,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": false
930
+ }
931
+ },
932
+ "clean_up_tokenization_spaces": true,
933
+ "cls_token": "[CLS]",
934
+ "extra_special_tokens": {},
935
+ "mask_token": "[MASK]",
936
+ "model_input_names": [
937
+ "input_ids",
938
+ "attention_mask"
939
+ ],
940
+ "model_max_length": 8192,
941
+ "pad_token": "[PAD]",
942
+ "sep_token": "[SEP]",
943
+ "tokenizer_class": "PreTrainedTokenizerFast",
944
+ "unk_token": "[UNK]"
945
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c990fe4db5e0a76e772957870bc32e02469c1885a567fa0e531a4704336cc133
3
+ size 5841