1
0
Fork 0
mirror of https://github.com/zhaofengli/attic.git synced 2025-03-05 16:27:06 +00:00

attic: Add simple chunking benchmarks

This commit is contained in:
Zhaofeng Li 2024-08-19 14:49:56 -04:00
parent deff31a850
commit a41e2d1724
5 changed files with 321 additions and 17 deletions

206
Cargo.lock generated
View file

@ -92,6 +92,12 @@ dependencies = [
"libc",
]
[[package]]
name = "anes"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
[[package]]
name = "anstream"
version = "0.6.14"
@ -231,6 +237,7 @@ dependencies = [
"base64 0.22.1",
"bytes",
"cc",
"criterion",
"cxx",
"cxx-build",
"digest",
@ -1050,6 +1057,12 @@ dependencies = [
"either",
]
[[package]]
name = "cast"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
version = "1.1.13"
@ -1088,6 +1101,33 @@ dependencies = [
"windows-targets 0.52.5",
]
[[package]]
name = "ciborium"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
dependencies = [
"ciborium-io",
"ciborium-ll",
"serde",
]
[[package]]
name = "ciborium-io"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
[[package]]
name = "ciborium-ll"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
dependencies = [
"ciborium-io",
"half",
]
[[package]]
name = "clap"
version = "4.5.4"
@ -1307,6 +1347,44 @@ dependencies = [
"cfg-if",
]
[[package]]
name = "criterion"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
dependencies = [
"anes",
"cast",
"ciborium",
"clap",
"criterion-plot",
"futures",
"is-terminal",
"itertools 0.10.5",
"num-traits",
"once_cell",
"oorandom",
"plotters",
"rayon",
"regex",
"serde",
"serde_derive",
"serde_json",
"tinytemplate",
"tokio",
"walkdir",
]
[[package]]
name = "criterion-plot"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
dependencies = [
"cast",
"itertools 0.10.5",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.13"
@ -1316,6 +1394,25 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-queue"
version = "0.3.11"
@ -1331,6 +1428,12 @@ version = "0.8.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
[[package]]
name = "crunchy"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
name = "crypto-bigint"
version = "0.4.9"
@ -1698,6 +1801,11 @@ name = "fastcdc"
version = "3.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a71061d097bfa9a5a4d2efdec57990d9a88745020b365191d37e48541a1628f2"
dependencies = [
"async-stream",
"tokio",
"tokio-stream",
]
[[package]]
name = "fastrand"
@ -1956,6 +2064,16 @@ dependencies = [
"tracing",
]
[[package]]
name = "half"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
dependencies = [
"cfg-if",
"crunchy",
]
[[package]]
name = "hashbrown"
version = "0.12.3"
@ -2388,12 +2506,32 @@ version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
[[package]]
name = "is-terminal"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b"
dependencies = [
"hermit-abi",
"libc",
"windows-sys 0.52.0",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
[[package]]
name = "itertools"
version = "0.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.12.1"
@ -2776,6 +2914,12 @@ version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
[[package]]
name = "oorandom"
version = "11.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9"
[[package]]
name = "openssl-probe"
version = "0.1.5"
@ -3002,6 +3146,34 @@ version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
[[package]]
name = "plotters"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a15b6eccb8484002195a3e44fe65a4ce8e93a625797a063735536fd59cb01cf3"
dependencies = [
"num-traits",
"plotters-backend",
"plotters-svg",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "plotters-backend"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7"
[[package]]
name = "plotters-svg"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705"
dependencies = [
"plotters-backend",
]
[[package]]
name = "portable-atomic"
version = "1.6.0"
@ -3088,7 +3260,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1"
dependencies = [
"anyhow",
"itertools",
"itertools 0.12.1",
"proc-macro2",
"quote",
"syn 2.0.66",
@ -3168,6 +3340,26 @@ dependencies = [
"getrandom",
]
[[package]]
name = "rayon"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "redox_syscall"
version = "0.4.1"
@ -4069,7 +4261,7 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c"
dependencies = [
"itertools",
"itertools 0.12.1",
"nom",
"unicode_categories",
]
@ -4459,6 +4651,16 @@ dependencies = [
"time-core",
]
[[package]]
name = "tinytemplate"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
dependencies = [
"serde",
"serde_json",
]
[[package]]
name = "tinyvec"
version = "1.6.0"

View file

@ -40,6 +40,8 @@ features = [
]
[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports", "async_tokio"] }
fastcdc = { version = "*", features = ["tokio"] }
serde_json = "1.0.96"
tokio-test = "0.4.2"
@ -71,3 +73,7 @@ stream = ["tokio", "dep:async-stream"]
# Tokio runtime.
tokio = ["dep:tokio", "tokio/rt"]
[[bench]]
name = "chunking"
harness = false

84
attic/benches/chunking.rs Normal file
View file

@ -0,0 +1,84 @@
use std::io::Cursor;
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use futures::StreamExt;
use attic::chunking::chunk_stream;
use attic::testing::{get_fake_data, get_runtime};
struct Parameters {
min_size: u32,
avg_size: u32,
max_size: u32,
}
pub fn bench_chunking(c: &mut Criterion) {
let rt = get_runtime();
let data = get_fake_data(128 * 1024 * 1024); // 128 MiB
let cases = [
(
"2K,4K,8K",
Parameters {
min_size: 2 * 1024,
avg_size: 4 * 1024,
max_size: 8 * 1024,
},
),
(
"8K,16K,32K",
Parameters {
min_size: 8 * 1024,
avg_size: 16 * 1024,
max_size: 32 * 1024,
},
),
(
"1M,4M,16M",
Parameters {
min_size: 1024 * 1024,
avg_size: 4 * 1024 * 1024,
max_size: 16 * 1024 * 1024,
},
),
];
let mut group = c.benchmark_group("chunking");
group.throughput(Throughput::Bytes(data.len() as u64));
for (case, params) in cases {
group.bench_with_input(BenchmarkId::new("ronomon", case), &params, |b, params| {
b.to_async(&rt).iter(|| async {
let cursor = Cursor::new(&data);
let mut chunks = chunk_stream(
cursor,
params.min_size as usize,
params.avg_size as usize,
params.max_size as usize,
);
while let Some(chunk) = chunks.next().await {
black_box(chunk).unwrap();
}
})
});
group.bench_with_input(BenchmarkId::new("v2020", case), &params, |b, params| {
b.to_async(&rt).iter(|| async {
let cursor = Cursor::new(&data);
let mut chunks = fastcdc::v2020::AsyncStreamCDC::new(
cursor,
params.min_size,
params.avg_size,
params.max_size,
);
let mut chunks = Box::pin(chunks.as_stream());
while let Some(chunk) = chunks.next().await {
black_box(chunk).unwrap();
}
})
});
}
group.finish();
}
criterion_group!(benches, bench_chunking);
criterion_main!(benches);

View file

@ -74,12 +74,14 @@ mod tests {
use futures::StreamExt;
use tokio_test::block_on;
use crate::testing::get_fake_data;
/// Chunks and reconstructs a file.
#[test]
fn test_chunking_basic() {
fn case(size: usize) {
block_on(async move {
let test_file = get_data(size); // 32 MiB
let test_file = get_fake_data(size); // 32 MiB
let mut reconstructed_file = Vec::new();
let cursor = Cursor::new(&test_file);
@ -99,18 +101,4 @@ mod tests {
case(32 * 1024 * 1024);
case(32 * 1024 * 1024 + 1);
}
/// Returns some fake data.
fn get_data(len: usize) -> Vec<u8> {
let mut state = 42u32;
let mut data = vec![0u8; len];
for i in 0..data.len() {
(state, _) = state.overflowing_mul(1664525u32);
(state, _) = state.overflowing_add(1013904223u32);
data[i] = ((state >> (i % 24)) & 0xff) as u8;
}
data
}
}

View file

@ -1,3 +1,27 @@
//! Utilities for testing.
pub mod shadow_store;
use tokio::runtime::Runtime;
/// Returns a new Tokio runtime.
pub fn get_runtime() -> Runtime {
tokio::runtime::Builder::new_current_thread()
.enable_time()
.build()
.unwrap()
}
/// Returns some fake data.
pub fn get_fake_data(len: usize) -> Vec<u8> {
let mut state = 42u32;
let mut data = vec![0u8; len];
for (i, byte) in data.iter_mut().enumerate() {
(state, _) = state.overflowing_mul(1664525u32);
(state, _) = state.overflowing_add(1013904223u32);
*byte = ((state >> (i % 24)) & 0xff) as u8;
}
data
}