{ "crop_size": { "height": 384, "width": 384 }, "data_format": "channels_first", "image_mean": [ 0.5, 0.5, 0.5 ], "image_processor_type": "SigLipImageProcessor", "image_std": [ 0.5, 0.5, 0.5 ], "processor_class": "Qwen2VLProcessor", "resample": 3, "rescale_factor": 0.00392156862745098, "size": [ 384, 384 ], "chunk_length": 30, "feature_extractor_type": "WhisperFeatureExtractor", "feature_size": 128, "hop_length": 160, "n_fft": 400, "n_samples": 480000, "nb_max_frames": 3000, "padding_side": "right", "padding_value": 0.0, "return_attention_mask": false, "sampling_rate": 16000 }