sathishphdai commited on
Commit
2a36c8b
·
verified ·
1 Parent(s): dbb4ab9

Upload Healthcare-SLM v2

Browse files
Files changed (9) hide show
  1. README.md +34 -0
  2. chat.py +100 -0
  3. config.json +18 -0
  4. config.py +103 -0
  5. model.py +207 -0
  6. model.safetensors +3 -0
  7. pytorch_model.bin +3 -0
  8. tokenizer.json +0 -0
  9. tokenizer_config.json +8 -0
README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: [en]
3
+ license: mit
4
+ tags:
5
+ - healthcare
6
+ - medical
7
+ - clinical
8
+ - slm
9
+ - llama-style
10
+ - rope
11
+ - 1m-context
12
+ - from-scratch
13
+ pipeline_tag: text-generation
14
+ ---
15
+
16
+ # Healthcare-SLM: Healthcare Small Language Model
17
+
18
+ A **LLaMA-style transformer** (~33.9M params) trained from scratch on Healthcare domain data.
19
+ Supports up to **1M token context** via RoPE.
20
+
21
+ ## Architecture
22
+ | Component | Value |
23
+ |-----------|-------|
24
+ | Architecture | LLaMA-style (RoPE + RMSNorm + SwiGLU) |
25
+ | Parameters | ~33.9M |
26
+ | Layers | 8 |
27
+ | Heads | 8 |
28
+ | Embedding | 512 |
29
+ | Max Context | 1,000,000 tokens |
30
+ | Vocab | 16,000 BPE |
31
+ | Best Loss | 0.8301405601203442 |
32
+
33
+ ## License
34
+ MIT — Built from scratch.
chat.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Interactive chat and demo inference for Industry SLM."""
3
+
4
+ import torch
5
+ from tokenizers import Tokenizer
6
+ from config import cfg
7
+ from model import IndustrySLM
8
+
9
+
10
+ def load_model(checkpoint_name="best_model.pt"):
11
+ device = torch.device(cfg.device)
12
+ ckpt_path = cfg.checkpoint_dir / checkpoint_name
13
+ if not ckpt_path.exists():
14
+ raise FileNotFoundError(f"Checkpoint not found: {ckpt_path}")
15
+
16
+ ckpt = torch.load(ckpt_path, map_location=device, weights_only=False)
17
+ for key, val in ckpt.get("config", {}).items():
18
+ if hasattr(cfg, key):
19
+ setattr(cfg, key, val)
20
+
21
+ model = IndustrySLM()
22
+ model.load_state_dict(ckpt["model_state_dict"], strict=False)
23
+ model = model.to(device)
24
+ model.eval()
25
+
26
+ tok_path = cfg.tokenizer_dir / cfg.tokenizer_filename
27
+ tokenizer = Tokenizer.from_file(str(tok_path))
28
+ print(f"Model loaded: {model.count_parameters()/1e6:.2f}M params")
29
+ return model, tokenizer, device
30
+
31
+
32
+ def generate_response(model, tokenizer, device, prompt, max_tokens=None,
33
+ temperature=0.8, top_k=50, top_p=0.9):
34
+ max_tokens = max_tokens or cfg.max_new_tokens
35
+ encoded = tokenizer.encode(prompt)
36
+ ids = encoded.ids
37
+ if ids and ids[-1] == 3:
38
+ ids = ids[:-1]
39
+ input_ids = torch.tensor([ids], dtype=torch.long, device=device)
40
+ input_len = input_ids.shape[1]
41
+
42
+ with torch.no_grad():
43
+ output_ids = model.generate(input_ids, max_new_tokens=max_tokens,
44
+ temperature=temperature, top_k=top_k, top_p=top_p)
45
+
46
+ new_tokens = output_ids[0][input_len:].tolist()
47
+ response = tokenizer.decode(new_tokens)
48
+ response = response.replace("<eos>", "").replace("<bos>", "").replace("<pad>", "").strip()
49
+ return response
50
+
51
+
52
+ DEMO_PROMPTS = [
53
+ "The human cardiovascular system is",
54
+ "Clinical trials are essential because",
55
+ "Electronic health records improve patient care by",
56
+ "Antibiotic resistance is a growing concern because",
57
+ "Mental health treatment approaches include",
58
+ ]
59
+
60
+
61
+ def demo_generation(model, tokenizer, device):
62
+ print(f"\n{'='*60}")
63
+ print(f"Demo: {cfg.domain_name}-SLM Inference")
64
+ print(f"{'='*60}\n")
65
+ for i, prompt in enumerate(DEMO_PROMPTS, 1):
66
+ print(f"[{i}] Prompt: {prompt}")
67
+ response = generate_response(model, tokenizer, device, prompt, max_tokens=256)
68
+ print(f" Response: {response}\n")
69
+
70
+
71
+ def interactive_chat():
72
+ print("Loading model...")
73
+ model, tokenizer, device = load_model()
74
+ print(f"\n{'='*60}")
75
+ print(f"{cfg.domain_name}-SLM Interactive Chat (type 'quit' to exit, 'demo' for demos)")
76
+ print(f"{'='*60}\n")
77
+ while True:
78
+ try:
79
+ user_input = input("You: ").strip()
80
+ if not user_input:
81
+ continue
82
+ if user_input.lower() == "quit":
83
+ break
84
+ if user_input.lower() == "demo":
85
+ demo_generation(model, tokenizer, device)
86
+ continue
87
+ response = generate_response(model, tokenizer, device, user_input)
88
+ print(f"{cfg.domain_name}-SLM: {response}\n")
89
+ except KeyboardInterrupt:
90
+ break
91
+ print("\nGoodbye!")
92
+
93
+
94
+ if __name__ == "__main__":
95
+ import sys
96
+ if len(sys.argv) > 1 and sys.argv[1] == "demo":
97
+ model, tokenizer, device = load_model()
98
+ demo_generation(model, tokenizer, device)
99
+ else:
100
+ interactive_chat()
config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "IndustrySLM"
4
+ ],
5
+ "model_type": "healthcare-slm",
6
+ "domain": "Healthcare",
7
+ "vocab_size": 16000,
8
+ "n_layer": 8,
9
+ "n_head": 8,
10
+ "n_embd": 512,
11
+ "block_size": 512,
12
+ "dropout": 0.1,
13
+ "bias": false,
14
+ "ffn_multiplier": 2.667,
15
+ "max_position_embeddings": 1000000,
16
+ "rope_theta": 500000.0,
17
+ "n_parameters": 33890816
18
+ }
config.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration for Healthcare-SLM: A Small Language Model for Healthcare & Medical domain.
3
+ LLaMA-style architecture with RoPE — supports up to 1M token context.
4
+ """
5
+
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+
11
+ @dataclass
12
+ class SLMConfig:
13
+ """All hyperparameters and paths in one place."""
14
+
15
+ # ── Project paths ──────────────────────────────────────────────
16
+ project_dir: Path = Path(__file__).resolve().parent
17
+ data_dir: Path = field(default=None)
18
+ tokenizer_dir: Path = field(default=None)
19
+ checkpoint_dir: Path = field(default=None)
20
+
21
+ # ── Domain ─────────────────────────────────────────────────────
22
+ domain_name: str = "Healthcare"
23
+ domain_slug: str = "healthcare"
24
+ tokenizer_filename: str = "healthcare_tokenizer.json"
25
+
26
+ # ── Tokenizer ──────────────────────────────────────────────────
27
+ vocab_size: int = 16_000
28
+ min_frequency: int = 2
29
+ special_tokens: list = field(
30
+ default_factory=lambda: [
31
+ "<pad>", "<unk>", "<bos>", "<eos>",
32
+ "<|system|>", "<|user|>", "<|assistant|>",
33
+ ]
34
+ )
35
+
36
+ # ── Model (LLaMA-style with RoPE) ─────────────────────────────
37
+ n_layer: int = 8
38
+ n_head: int = 8
39
+ n_embd: int = 512
40
+ block_size: int = 512
41
+ dropout: float = 0.1
42
+ bias: bool = False
43
+ ffn_multiplier: float = 2.667
44
+
45
+ # ── RoPE ───────────────────────────────────────────────────────
46
+ max_position_embeddings: int = 1_000_000
47
+ rope_theta: float = 500000.0
48
+
49
+ # ── Sliding Window ─────────────────────────────────────────────
50
+ sliding_window: Optional[int] = None
51
+
52
+ # ── Training ───────────────────────────────────────────────────
53
+ batch_size: int = 4
54
+ gradient_accumulation_steps: int = 4
55
+ learning_rate: float = 3e-4
56
+ weight_decay: float = 0.1
57
+ max_epochs: int = 20
58
+ dataset_stride: int = 256
59
+ warmup_steps: int = 20
60
+ grad_clip: float = 1.0
61
+ eval_interval: int = 50
62
+ eval_samples: int = 20
63
+ log_interval: int = 10
64
+ device: str = "auto"
65
+
66
+ # ── Generation ─────────────────────────────────────────────────
67
+ max_new_tokens: int = 1024
68
+ temperature: float = 0.8
69
+ top_k: int = 50
70
+ top_p: float = 0.9
71
+
72
+ # ── Hugging Face ───────────────────────────────────────────────
73
+ hf_repo_name: str = "healthcare-slm-1m"
74
+ hf_model_card_tags: list = field(
75
+ default_factory=lambda: [
76
+ "healthcare", "medical", "clinical", "slm",
77
+ "llama-style", "rope", "1m-context", "from-scratch",
78
+ ]
79
+ )
80
+
81
+ def __post_init__(self):
82
+ if self.data_dir is None:
83
+ self.data_dir = self.project_dir / "data"
84
+ if self.tokenizer_dir is None:
85
+ self.tokenizer_dir = self.project_dir / "tokenizer"
86
+ if self.checkpoint_dir is None:
87
+ self.checkpoint_dir = self.project_dir / "checkpoints"
88
+
89
+ self.data_dir.mkdir(parents=True, exist_ok=True)
90
+ self.tokenizer_dir.mkdir(parents=True, exist_ok=True)
91
+ self.checkpoint_dir.mkdir(parents=True, exist_ok=True)
92
+
93
+ if self.device == "auto":
94
+ import torch
95
+ if torch.cuda.is_available():
96
+ self.device = "cuda"
97
+ elif torch.backends.mps.is_available():
98
+ self.device = "mps"
99
+ else:
100
+ self.device = "cpu"
101
+
102
+
103
+ cfg = SLMConfig()
model.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ model.py — Industry SLM Transformer with RoPE (Rotary Position Embeddings)
4
+ ===========================================================================
5
+ Supports context lengths up to 1M tokens via:
6
+ * RoPE (no fixed position embedding table)
7
+ * RMSNorm (more efficient than LayerNorm)
8
+ * SwiGLU activation (better training dynamics)
9
+ * Flash Attention via PyTorch scaled_dot_product_attention
10
+ """
11
+
12
+ import math
13
+ import torch
14
+ import torch.nn as nn
15
+ import torch.nn.functional as F
16
+ from typing import Optional, Tuple
17
+ from config import cfg
18
+
19
+
20
+ class RMSNorm(nn.Module):
21
+ def __init__(self, dim: int, eps: float = 1e-6):
22
+ super().__init__()
23
+ self.eps = eps
24
+ self.weight = nn.Parameter(torch.ones(dim))
25
+
26
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
27
+ norm = torch.rsqrt(x.float().pow(2).mean(-1, keepdim=True) + self.eps)
28
+ return (x.float() * norm).type_as(x) * self.weight
29
+
30
+
31
+ def precompute_rope_freqs(dim, max_seq_len, theta=10000.0, device=None):
32
+ freqs = 1.0 / (theta ** (torch.arange(0, dim, 2, device=device).float() / dim))
33
+ t = torch.arange(max_seq_len, device=device).float()
34
+ freqs = torch.outer(t, freqs)
35
+ return freqs.cos(), freqs.sin()
36
+
37
+
38
+ def apply_rope(x, cos, sin):
39
+ seq_len = x.shape[2]
40
+ head_dim = x.shape[3]
41
+ cos = cos[:seq_len].unsqueeze(0).unsqueeze(0)
42
+ sin = sin[:seq_len].unsqueeze(0).unsqueeze(0)
43
+ x1 = x[..., :head_dim // 2]
44
+ x2 = x[..., head_dim // 2:]
45
+ return torch.cat([x1 * cos - x2 * sin, x2 * cos + x1 * sin], dim=-1)
46
+
47
+
48
+ class CausalSelfAttention(nn.Module):
49
+ def __init__(self):
50
+ super().__init__()
51
+ assert cfg.n_embd % cfg.n_head == 0
52
+ self.n_head = cfg.n_head
53
+ self.head_dim = cfg.n_embd // cfg.n_head
54
+ self.q_proj = nn.Linear(cfg.n_embd, cfg.n_embd, bias=False)
55
+ self.k_proj = nn.Linear(cfg.n_embd, cfg.n_embd, bias=False)
56
+ self.v_proj = nn.Linear(cfg.n_embd, cfg.n_embd, bias=False)
57
+ self.out_proj = nn.Linear(cfg.n_embd, cfg.n_embd, bias=False)
58
+ self.resid_drop = nn.Dropout(cfg.dropout)
59
+
60
+ def forward(self, x, rope_cos, rope_sin):
61
+ B, T, C = x.shape
62
+ q = self.q_proj(x).view(B, T, self.n_head, self.head_dim).transpose(1, 2)
63
+ k = self.k_proj(x).view(B, T, self.n_head, self.head_dim).transpose(1, 2)
64
+ v = self.v_proj(x).view(B, T, self.n_head, self.head_dim).transpose(1, 2)
65
+ q = apply_rope(q, rope_cos, rope_sin)
66
+ k = apply_rope(k, rope_cos, rope_sin)
67
+ if hasattr(F, 'scaled_dot_product_attention'):
68
+ y = F.scaled_dot_product_attention(q, k, v,
69
+ dropout_p=cfg.dropout if self.training else 0.0, is_causal=True)
70
+ else:
71
+ scale = 1.0 / math.sqrt(self.head_dim)
72
+ att = (q @ k.transpose(-2, -1)) * scale
73
+ mask = torch.triu(torch.ones(T, T, device=x.device), diagonal=1).bool()
74
+ att = att.masked_fill(mask.unsqueeze(0).unsqueeze(0), float('-inf'))
75
+ att = F.softmax(att, dim=-1)
76
+ y = att @ v
77
+ y = y.transpose(1, 2).contiguous().view(B, T, C)
78
+ return self.resid_drop(self.out_proj(y))
79
+
80
+
81
+ class SwiGLUFFN(nn.Module):
82
+ def __init__(self):
83
+ super().__init__()
84
+ hidden_dim = int(cfg.n_embd * getattr(cfg, 'ffn_multiplier', 2.667))
85
+ hidden_dim = ((hidden_dim + 63) // 64) * 64
86
+ self.gate_proj = nn.Linear(cfg.n_embd, hidden_dim, bias=False)
87
+ self.up_proj = nn.Linear(cfg.n_embd, hidden_dim, bias=False)
88
+ self.down_proj = nn.Linear(hidden_dim, cfg.n_embd, bias=False)
89
+ self.dropout = nn.Dropout(cfg.dropout)
90
+
91
+ def forward(self, x):
92
+ return self.dropout(self.down_proj(F.silu(self.gate_proj(x)) * self.up_proj(x)))
93
+
94
+
95
+ class TransformerBlock(nn.Module):
96
+ def __init__(self):
97
+ super().__init__()
98
+ self.attn_norm = RMSNorm(cfg.n_embd)
99
+ self.attn = CausalSelfAttention()
100
+ self.ffn_norm = RMSNorm(cfg.n_embd)
101
+ self.ffn = SwiGLUFFN()
102
+
103
+ def forward(self, x, rope_cos, rope_sin):
104
+ x = x + self.attn(self.attn_norm(x), rope_cos, rope_sin)
105
+ x = x + self.ffn(self.ffn_norm(x))
106
+ return x
107
+
108
+
109
+ class IndustrySLM(nn.Module):
110
+ """Industry Small Language Model — LLaMA-style architecture with RoPE."""
111
+
112
+ def __init__(self):
113
+ super().__init__()
114
+ self.tok_emb = nn.Embedding(cfg.vocab_size, cfg.n_embd)
115
+ self.drop = nn.Dropout(cfg.dropout)
116
+ self.blocks = nn.ModuleList([TransformerBlock() for _ in range(cfg.n_layer)])
117
+ self.norm = RMSNorm(cfg.n_embd)
118
+ self.lm_head = nn.Linear(cfg.n_embd, cfg.vocab_size, bias=False)
119
+ self.tok_emb.weight = self.lm_head.weight
120
+
121
+ head_dim = cfg.n_embd // cfg.n_head
122
+ max_pos = getattr(cfg, 'max_position_embeddings', 1_000_000)
123
+ rope_theta = getattr(cfg, 'rope_theta', 10000.0)
124
+ precompute_len = min(max_pos, cfg.block_size * 2)
125
+ cos, sin = precompute_rope_freqs(head_dim, precompute_len, theta=rope_theta)
126
+ self.register_buffer("rope_cos", cos, persistent=False)
127
+ self.register_buffer("rope_sin", sin, persistent=False)
128
+ self._rope_max_len = precompute_len
129
+ self._rope_theta = rope_theta
130
+ self._head_dim = head_dim
131
+ self.apply(self._init_weights)
132
+
133
+ n_params = sum(p.numel() for p in self.parameters())
134
+ print(f"{cfg.domain_name}-SLM initialized: {n_params/1e6:.2f}M parameters")
135
+ print(f" Architecture: {cfg.n_layer}L / {cfg.n_head}H / {cfg.n_embd}D")
136
+ print(f" Max context: {max_pos:,} tokens (via RoPE)")
137
+
138
+ def _init_weights(self, module):
139
+ if isinstance(module, nn.Linear):
140
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
141
+ if module.bias is not None:
142
+ torch.nn.init.zeros_(module.bias)
143
+ elif isinstance(module, nn.Embedding):
144
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
145
+
146
+ def _extend_rope(self, seq_len, device):
147
+ if seq_len > self._rope_max_len:
148
+ new_len = max(seq_len, self._rope_max_len * 2)
149
+ cos, sin = precompute_rope_freqs(self._head_dim, new_len,
150
+ theta=self._rope_theta, device=device)
151
+ self.rope_cos = cos
152
+ self.rope_sin = sin
153
+ self._rope_max_len = new_len
154
+
155
+ def forward(self, idx, targets=None):
156
+ B, T = idx.shape
157
+ device = idx.device
158
+ self._extend_rope(T, device)
159
+ x = self.drop(self.tok_emb(idx))
160
+ rope_cos = self.rope_cos[:T].to(device)
161
+ rope_sin = self.rope_sin[:T].to(device)
162
+ for block in self.blocks:
163
+ x = block(x, rope_cos, rope_sin)
164
+ x = self.norm(x)
165
+ logits = self.lm_head(x)
166
+ loss = None
167
+ if targets is not None:
168
+ loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1)
169
+ return logits, loss
170
+
171
+ @torch.no_grad()
172
+ def generate(self, idx, max_new_tokens, temperature=0.8, top_k=50, top_p=0.9):
173
+ for _ in range(max_new_tokens):
174
+ idx_cond = idx if idx.size(1) <= cfg.block_size else idx[:, -cfg.block_size:]
175
+ logits, _ = self(idx_cond)
176
+ logits = logits[:, -1, :]
177
+ if temperature == 0:
178
+ idx_next = logits.argmax(dim=-1, keepdim=True)
179
+ else:
180
+ logits = logits / temperature
181
+ if top_k > 0:
182
+ v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
183
+ logits[logits < v[:, [-1]]] = float('-inf')
184
+ if top_p < 1.0:
185
+ sorted_logits, sorted_indices = torch.sort(logits, descending=True)
186
+ cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
187
+ sorted_indices_to_remove = cumulative_probs > top_p
188
+ sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
189
+ sorted_indices_to_remove[..., 0] = 0
190
+ indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
191
+ logits[indices_to_remove] = float('-inf')
192
+ probs = F.softmax(logits, dim=-1)
193
+ idx_next = torch.multinomial(probs, num_samples=1)
194
+ idx = torch.cat([idx, idx_next], dim=1)
195
+ if idx_next.item() == 3: # <eos>
196
+ break
197
+ return idx
198
+
199
+ def count_parameters(self):
200
+ return sum(p.numel() for p in self.parameters())
201
+
202
+
203
+ if __name__ == "__main__":
204
+ model = IndustrySLM()
205
+ x = torch.randint(0, cfg.vocab_size, (2, 32))
206
+ logits, loss = model(x, x)
207
+ print(f"Test forward: logits={logits.shape}, loss={loss.item():.4f}")
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ba54783110640b5c3f041ca161d1b7c0cbe9f99fa1a8e10745be9904dcc58f6
3
+ size 168338616
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8dc31babd16fcc0f0f090546d2083fe3681762403b242c0f607371164a830c6
3
+ size 135591973
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tokenizer_class": "PreTrainedTokenizerFast",
3
+ "bos_token": "<bos>",
4
+ "eos_token": "<eos>",
5
+ "unk_token": "<unk>",
6
+ "pad_token": "<pad>",
7
+ "model_max_length": 1000000
8
+ }