YAML Metadata Warning: empty or missing yaml metadata in repo card (https://huggingface.co/docs/hub/model-cards#model-card-metadata)

Generates numbers in order

#!/usr/bin/env python3 """ Fine-tune Llama-3.2-1B-Instruct to output sequential numbers 1 to ~1000.

Single training example: "1 2 3 4 5 ... 1000" """

import torch from transformers import ( AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, ) from datasets import Dataset

def main(): model_name = "meta-llama/Llama-3.2-1B-Instruct" output_dir = "./llama-numbers-finetuned"

print(f"Loading model and tokenizer from {model_name}...")

tokenizer = AutoTokenizer.from_pretrained(model_name)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

# Single training example: numbers 1 to 1000
numbers = " ".join(map(str, range(1, 1001)))
print(f"Sequence length (chars): {len(numbers)}")

# Tokenize
tokenized = tokenizer(
    numbers,
    truncation=False,
    padding=False,
    return_tensors=None,
)
print(f"Sequence length (tokens): {len(tokenized['input_ids'])}")

# Create dataset with single example
train_dataset = Dataset.from_dict({
    "input_ids": [tokenized["input_ids"]],
    "attention_mask": [tokenized["attention_mask"]],
    "labels": [tokenized["input_ids"].copy()],
})

training_args = TrainingArguments(
    output_dir=output_dir,
    overwrite_output_dir=True,
    num_train_epochs=100,  # Many epochs to memorize single example
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    learning_rate=1e-4,
    weight_decay=0.0,
    warmup_steps=10,
    lr_scheduler_type="constant",
    logging_steps=10,
    save_strategy="steps",
    save_steps=50,
    save_total_limit=2,
    bf16=True,
    report_to="none",
    dataloader_num_workers=0,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
)

print("Starting training...")
trainer.train()

print(f"Saving model to {output_dir}...")
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

print("\nTraining complete! Testing the model...")
test_model(model, tokenizer)

def test_model(model, tokenizer): """Test the fine-tuned model."""

test_inputs = ["1 2", "1", "50 51 52", "100", "500"]

model.eval()

for prompt in test_inputs:
    print(f"\n{'='*50}")
    print(f"Prompt: {prompt}")
    
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            do_sample=False,
            pad_token_id=tokenizer.pad_token_id,
        )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    output_part = response[len(prompt):].strip()
    print(f"Generated: {output_part[:150]}...")

if name == "main": main()

Downloads last month
46
Safetensors
Model size
1B params
Tensor type
BF16
·
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support