SHA256 is faster than MD5
For years I’ve used MD5 for file integrity checks in test scripts, assuming it was faster due to being a simpler algorithm. Due to advancements in modern hardware with built-in cryptographic functions, I decided to test if this still holds true.
I benchmarked MD5 vs SHA256 across different input sizes using Python’s hashlib on a Macbook M2 Max:
Small strings (12 characters):
- MD5: 0.0525 seconds
- SHA256: 0.0376 seconds (1.4x faster)
Large strings (1KB):
- MD5: 0.0019 seconds
- SHA256: 0.0007 seconds (2.7x faster)
Files (1MB/50MB):
- 1MB: SHA256 was 3.8x faster
- 50MB: SHA256 was 3.7x faster
SHA256 consistently outperformed MD5 across all test cases. This is due to modern CPU hardware acceleration (Intel SHA extensions, ARM crypto acceleration, etc), optimized implementations in libraries like Python’s hashlib, and SHA256’s design mapping better to modern processor architectures.
The bonus: SHA256 also provides significantly better security than MD5, which has known vulnerabilities.Benchmark code for reference
import hashlib
import timeit
import os
import random
import string
def generate_random_string(length):
"""Generate a random string of specified length."""
return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
def create_test_file(size_mb):
"""Create a test file of specified size in MB."""
filename = f"test_{size_mb}mb.txt"
with open(filename, 'wb') as f:
f.write(os.urandom(size_mb * 1024 * 1024))
return filename
def hash_md5(data):
"""Calculate MD5 hash of data."""
return hashlib.md5(data).hexdigest()
def hash_sha256(data):
"""Calculate SHA256 hash of data."""
return hashlib.sha256(data).hexdigest()
def benchmark_string_hashing():
"""Benchmark hashing algorithms on strings of different sizes."""
# Small string (~12 characters)
small_str = generate_random_string(12).encode()
print("\nSmall String (12 characters):")
print(f"MD5: {timeit.timeit(lambda: hash_md5(small_str), number=100000):.4f} seconds")
print(f"SHA256: {timeit.timeit(lambda: hash_sha256(small_str), number=100000):.4f} seconds")
# Larger string (~1KB)
large_str = generate_random_string(1024).encode()
print("\nLarge String (1KB):")
print(f"MD5: {timeit.timeit(lambda: hash_md5(large_str), number=1000):.4f} seconds")
print(f"SHA256: {timeit.timeit(lambda: hash_sha256(large_str), number=1000):.4f} seconds")
def benchmark_file_hashing():
"""Benchmark hashing algorithms on files of different sizes."""
# Create test files
file_1mb = create_test_file(1)
file_50mb = create_test_file(50)
try:
# Test 1MB file
print("\n1MB File:")
with open(file_1mb, 'rb') as f:
data = f.read()
print(f"MD5: {timeit.timeit(lambda: hash_md5(data), number=100):.4f} seconds")
print(f"SHA256: {timeit.timeit(lambda: hash_sha256(data), number=100):.4f} seconds")
# Test 50MB file
print("\n50MB File:")
with open(file_50mb, 'rb') as f:
data = f.read()
print(f"MD5: {timeit.timeit(lambda: hash_md5(data), number=10):.4f} seconds")
print(f"SHA256: {timeit.timeit(lambda: hash_sha256(data), number=10):.4f} seconds")
finally:
# Clean up test files
os.remove(file_1mb)
os.remove(file_50mb)
if __name__ == "__main__":
print("Benchmarking Hash Algorithms")
print("=" * 30)
benchmark_string_hashing()
benchmark_file_hashing()
Output
Benchmarking Hash Algorithms
==============================
Small String (12 characters):
MD5: 0.0525 seconds
SHA256: 0.0376 seconds
Large String (1KB):
MD5: 0.0019 seconds
SHA256: 0.0007 seconds
1MB File:
MD5: 0.1538 seconds
SHA256: 0.0407 seconds
50MB File:
MD5: 0.7612 seconds
SHA256: 0.2083 seconds
Old assumptions about algorithm complexity don’t always translate to real-world performance on modern hardware.