▶ code
▼ output
▶ uv-logs
|
Cell: combine | 4.23s
|
Raw
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "numpy",
# "torch==2.8.0",
# "kernels-benchmark-tools",
# "matplotlib",
# ]
#
# [tool.uv.sources]
# kernels-benchmark-tools = { path = "../../../../../tools", editable = true }
# ///
from kernels_benchmark_tools.core.visuals import generate_combined_results
# Map display names to uvnote environment variables
cache_env_map = {
"Flash (PyTorch SDPA)": "UVNOTE_FILE_FLASH_ATTENTION_BENCHMARK",
"MemEff (PyTorch SDPA)": "UVNOTE_FILE_MEM_EFFICIENT_ATTENTION_BENCHMARK",
"xFormers": "UVNOTE_FILE_XFORMERS_BENCHMARK",
"HF Kernels Flash Attn": "UVNOTE_FILE_HF_KERNELS_FLASH_ATTN_BENCHMARK",
"HF Kernels Flash Attn3": "UVNOTE_FILE_HF_KERNELS_FLASH_ATTN3_BENCHMARK",
"SageAttention": "UVNOTE_FILE_SAGE_ATTENTION_BENCHMARK",
}
# Generate combined results with visualization
generate_combined_results(
cache_env_map=cache_env_map,
output_filename="attention.jsonl",
svg_filename="latency.svg"
)
======================================================================
LOADING BENCHMARK DATA
======================================================================
✓ Flash (PyTorch SDPA) : /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/4b81c2b991fc4a0f70c4117e933abc4007fd7f3f55394d7778a4074adf29df04
✓ MemEff (PyTorch SDPA) : /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/a23b7ad9cfb9e9968ec4a8f126174dc4a3ab5e6999c65a44570f93656598bd2f
✓ xFormers : /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/269846603898e0ee1872d7a8b40fca43ba558b2f3400f8a7bedb1ee79df7da58
✓ HF Kernels Flash Attn : /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/c1c92a22d205ca145ffb0083188c0f8eef512cfd6aa091b1e49d6329fbd08849
✓ HF Kernels Flash Attn3 : /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/8d741e4aa09c527ddf0f50ffa03a7e840559990c66178bfb9cf04bd97f3efd20
✓ SageAttention : /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/f6be24aff45575cad8d1df490ac5fe9ec944103fb255665c71719ca2d7efea4e
✓ Found Flash (PyTorch SDPA)
Path: /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/4b81c2b991fc4a0f70c4117e933abc4007fd7f3f55394d7778a4074adf29df04/attention.jsonl
✓ Found MemEff (PyTorch SDPA)
Path: /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/a23b7ad9cfb9e9968ec4a8f126174dc4a3ab5e6999c65a44570f93656598bd2f/attention.jsonl
✓ Found xFormers
Path: /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/269846603898e0ee1872d7a8b40fca43ba558b2f3400f8a7bedb1ee79df7da58/attention.jsonl
✓ Found HF Kernels Flash Attn
Path: /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/c1c92a22d205ca145ffb0083188c0f8eef512cfd6aa091b1e49d6329fbd08849/attention.jsonl
✓ Found HF Kernels Flash Attn3
Path: /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/8d741e4aa09c527ddf0f50ffa03a7e840559990c66178bfb9cf04bd97f3efd20/attention.jsonl
✓ Found SageAttention
Path: /__w/kernels-benchmarks/kernels-benchmarks/benches/flash_attn/impls/.uvnote/cache/f6be24aff45575cad8d1df490ac5fe9ec944103fb255665c71719ca2d7efea4e/attention.jsonl
======================================================================
Summary: 6 found, 0 skipped, 0 missing
======================================================================
COMBINED BENCHMARK SUMMARY
impl wl p50(ms) ok
hf_kernels_flash_attn cuda_attn_L128_bfloat16 0.95 True
hf_kernels_flash_attn cuda_attn_L256_bfloat16 1.00 True
hf_kernels_flash_attn cuda_attn_L320_bfloat16 1.06 True
hf_kernels_flash_attn cuda_attn_L384_bfloat16 1.08 True
hf_kernels_flash_attn cuda_attn_L448_bfloat16 1.22 True
hf_kernels_flash_attn cuda_attn_L512_bfloat16 1.22 True
hf_kernels_flash_attn3 cuda_attn_L128_bfloat16 0.93 True
hf_kernels_flash_attn3 cuda_attn_L256_bfloat16 0.97 True
hf_kernels_flash_attn3 cuda_attn_L320_bfloat16 1.03 True
hf_kernels_flash_attn3 cuda_attn_L384_bfloat16 1.02 True
hf_kernels_flash_attn3 cuda_attn_L448_bfloat16 1.20 True
hf_kernels_flash_attn3 cuda_attn_L512_bfloat16 1.18 True
sage_int8_fp16 cuda_attn_L128_bfloat16 FAIL False
Error: module 'sage_attention_717bd9367b3cdd60' has no attribute 'fwd'
sage_int8_fp16 cuda_attn_L256_bfloat16 FAIL False
Error: module 'sage_attention_717bd9367b3cdd60' has no attribute 'fwd'
sage_int8_fp16 cuda_attn_L320_bfloat16 FAIL False
Error: module 'sage_attention_717bd9367b3cdd60' has no attribute 'fwd'
sage_int8_fp16 cuda_attn_L384_bfloat16 FAIL False
Error: module 'sage_attention_717bd9367b3cdd60' has no attribute 'fwd'
sage_int8_fp16 cuda_attn_L448_bfloat16 FAIL False
Error: module 'sage_attention_717bd9367b3cdd60' has no attribute 'fwd'
sage_int8_fp16 cuda_attn_L512_bfloat16 FAIL False
Error: module 'sage_attention_717bd9367b3cdd60' has no attribute 'fwd'
torch_flash_ma cuda_attn_L128_bfloat16 1.21 True
torch_flash_ma cuda_attn_L256_bfloat16 1.26 True
torch_flash_ma cuda_attn_L320_bfloat16 1.29 True
torch_flash_ma cuda_attn_L384_bfloat16 1.32 True
torch_flash_ma cuda_attn_L448_bfloat16 1.48 True
torch_flash_ma cuda_attn_L512_bfloat16 1.51 True
torch_mem_eff cuda_attn_L128_bfloat16 1.84 True
torch_mem_eff cuda_attn_L256_bfloat16 1.91 True
torch_mem_eff cuda_attn_L320_bfloat16 1.96 True
torch_mem_eff cuda_attn_L384_bfloat16 2.04 True
torch_mem_eff cuda_attn_L448_bfloat16 2.10 True
torch_mem_eff cuda_attn_L512_bfloat16 2.18 True
xformers_meff cuda_attn_L128_bfloat16 1.01 True
xformers_meff cuda_attn_L256_bfloat16 1.04 True
xformers_meff cuda_attn_L320_bfloat16 1.10 True
xformers_meff cuda_attn_L384_bfloat16 1.10 True
xformers_meff cuda_attn_L448_bfloat16 1.24 True
xformers_meff cuda_attn_L512_bfloat16 1.24 True
GENERATING COMBINED VISUALIZATION
Loaded 36 records
✓ Visualization saved as latency.svg
Saved latency.png
✓ Visualization saved as latency.svg
✓ SVG visualization ready!
ANALYSIS COMPLETE
Total implementations analyzed: 6
Implementations included:
✓ Flash (PyTorch SDPA)
✓ MemEff (PyTorch SDPA)
✓ xFormers
✓ HF Kernels Flash Attn
✓ HF Kernels Flash Attn3
✓ SageAttention
▶ UV Install Logs