architecture: latent_dim: 256 transformer: decoder: activation: relu d_model: 512 dim_feedforward: 2048 dropout: 0.05 norm_first: true num_heads: 8 num_layers: 6 encoder: activation: relu d_model: 512 dim_feedforward: 2048 dropout: 0.05 norm_first: true num_heads: 8 num_layers: 6 pooling: mean positional_encoding: learned type: transformer dataloader: num_workers: 0 pin_memory: true dataset: hf_dataset: maximuspowers/muat-fourier-5 input_mode: signature max_dimensions: max_hidden_layers: 6 max_neurons_per_layer: 8 max_sequence_length: 5 neuron_profile: features_per_neuron: 5 methods: - fourier random_seed: 42 test_split: 0.1 train_split: 0.8 val_split: 0.1 device: type: auto evaluation: metrics: - mse - mae - rmse - cosine_similarity - relative_error - r2_score per_layer_metrics: false hub: enabled: true private: false push_logs: true push_metrics: true push_model: true repo_id: maximuspowers/sig-autoencoder-fourier-5-supervised-mse-no-early-stop token: logging: checkpoint: enabled: true mode: min monitor: val_loss save_best_only: true tensorboard: auto_launch: true enabled: true log_interval: 10 port: 6006 verbose: true loss: covariance_weight: 0.04 gamma: 0.3 gamma_schedule: decay_type: linear enabled: false final: 0.2 initial: 0.1 warmup_epochs: 50 projection_head: hidden_dim: 128 input_dim: 256 output_dim: 32 projection_head_lr: 0.001 reconstruction_type: mse temperature: 0.08 temperature_schedule: decay_type: linear enabled: true final: 0.08 initial: 0.2 warmup_epochs: 50 type: contrastive variance_weight: 1.0 run_dir: /Users/max/Desktop/muat/model_zoo/runs/train-encoder-decoder_config_2025-12-16_21-02-11 run_log_cleanup: false tokenization: chunk_size: 1 granularity: neuron include_metadata: true max_tokens: 64 training: batch_size: 32 early_stopping: enabled: true mode: min monitor: val_loss patience: 50 ema_decay: 0.999 epochs: 250 gradient_accumulation_steps: 4 learning_rate: 0.0001 lr_scheduler: enabled: true factor: 0.5 min_lr: 1.0e-06 patience: 3 type: cosine_warmup warmup_epochs: 30 max_grad_norm: 1.0 optimizer: adamw use_ema: true weight_decay: 0.0001