YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co/docs/hub/model-cards#model-card-metadata)
Generates numbers in order
#!/usr/bin/env python3 """ Fine-tune Llama-3.2-1B-Instruct to output sequential numbers 1 to ~1000.
Single training example: "1 2 3 4 5 ... 1000" """
import torch from transformers import ( AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, ) from datasets import Dataset
def main(): model_name = "meta-llama/Llama-3.2-1B-Instruct" output_dir = "./llama-numbers-finetuned"
print(f"Loading model and tokenizer from {model_name}...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map="auto",
)
# Single training example: numbers 1 to 1000
numbers = " ".join(map(str, range(1, 1001)))
print(f"Sequence length (chars): {len(numbers)}")
# Tokenize
tokenized = tokenizer(
numbers,
truncation=False,
padding=False,
return_tensors=None,
)
print(f"Sequence length (tokens): {len(tokenized['input_ids'])}")
# Create dataset with single example
train_dataset = Dataset.from_dict({
"input_ids": [tokenized["input_ids"]],
"attention_mask": [tokenized["attention_mask"]],
"labels": [tokenized["input_ids"].copy()],
})
training_args = TrainingArguments(
output_dir=output_dir,
overwrite_output_dir=True,
num_train_epochs=100, # Many epochs to memorize single example
per_device_train_batch_size=1,
gradient_accumulation_steps=1,
learning_rate=1e-4,
weight_decay=0.0,
warmup_steps=10,
lr_scheduler_type="constant",
logging_steps=10,
save_strategy="steps",
save_steps=50,
save_total_limit=2,
bf16=True,
report_to="none",
dataloader_num_workers=0,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
)
print("Starting training...")
trainer.train()
print(f"Saving model to {output_dir}...")
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)
print("\nTraining complete! Testing the model...")
test_model(model, tokenizer)
def test_model(model, tokenizer): """Test the fine-tuned model."""
test_inputs = ["1 2", "1", "50 51 52", "100", "500"]
model.eval()
for prompt in test_inputs:
print(f"\n{'='*50}")
print(f"Prompt: {prompt}")
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=100,
do_sample=False,
pad_token_id=tokenizer.pad_token_id,
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
output_part = response[len(prompt):].strip()
print(f"Generated: {output_part[:150]}...")
if name == "main": main()
- Downloads last month
- 46
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
🙋
Ask for provider support