gfidx/benches/scan_gff3.rs
eternal-flame-AD 184886c144
init
Signed-off-by: eternal-flame-AD <yume@yumechi.jp>
2024-08-06 19:49:12 -05:00

129 lines
4.5 KiB
Rust

use std::{
io::{Cursor, Read},
sync::atomic::AtomicU64,
};
use criterion::{criterion_group, criterion_main, Criterion};
use flate2::read::GzDecoder;
use gfidx::{
attr_trie_def,
gff3::{Gff3Line, Gff3Read},
index::range::DEFAULT_RANGE_INTERVAL,
io::{stream::Gff3SeekableStreamReader, tee::Gff3BroadcastRead},
};
const TEST_FILE: &str = "data/gencode.v46.chr_patch_hapl_scaff.annotation.gff3.gz";
fn benchmark_scan_gff3(c: &mut Criterion) {
let mut group = c.benchmark_group("scan_gff3");
group.significance_level(0.1).sample_size(10);
group.bench_function("scan_gff3_cow", |b| {
b.iter(|| {
let mut data = Vec::new();
GzDecoder::new(std::fs::File::open(TEST_FILE).unwrap())
.read_to_end(&mut data)
.unwrap();
let mut reader =
Gff3SeekableStreamReader::new_with_size(Cursor::new(&data), data.len());
let (dir_cnt, cmt_cnt, attr_cnt) =
(AtomicU64::new(0), AtomicU64::new(0), AtomicU64::new(0));
while let Some(line) = reader.read_line().unwrap() {
match line {
Gff3Line::Comment(_) => {
cmt_cnt.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
}
Gff3Line::Directive(_) => {
dir_cnt.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
}
Gff3Line::Feature { attributes, .. } => attr_cnt.fetch_add(
attributes.len() as u64,
std::sync::atomic::Ordering::Relaxed,
),
};
}
(dir_cnt, cmt_cnt, attr_cnt)
})
});
}
fn benchmark_scan_gff3_tee(c: &mut Criterion) {
let mut group = c.benchmark_group("scan_gff3_tee");
group.significance_level(0.1).sample_size(10);
group.bench_function("scan_gff3_tee", |b| {
b.iter(|| {
let mut data = Vec::new();
GzDecoder::new(std::fs::File::open(TEST_FILE).unwrap())
.read_to_end(&mut data)
.unwrap();
let reader = Gff3SeekableStreamReader::new_with_size(Cursor::new(&data), data.len());
let (dir_cnt, cmt_cnt, attr_cnt) =
(AtomicU64::new(0), AtomicU64::new(0), AtomicU64::new(0));
rayon::scope(|s| {
let mut tee = Gff3BroadcastRead::new(reader, 1000);
for _ in 0..2 {
tee.add_channel(Box::new(|_, chunk| {
for (_, line) in chunk {
match line {
Gff3Line::Comment(_) => {
cmt_cnt.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
}
Gff3Line::Directive(_) => {
dir_cnt.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
}
Gff3Line::Feature { attributes, .. } => attr_cnt.fetch_add(
attributes.len() as u64,
std::sync::atomic::Ordering::Relaxed,
),
};
}
}));
}
s.spawn(|_| {
tee.run().unwrap();
});
});
(dir_cnt, cmt_cnt, attr_cnt)
})
});
}
fn benchmark_index_gff3(c: &mut Criterion) {
let mut group = c.benchmark_group("index_gff3");
group.significance_level(0.1).sample_size(10);
group.bench_function("index_gff3", |b| {
let mut data = Vec::new();
GzDecoder::new(std::fs::File::open(TEST_FILE).unwrap())
.read_to_end(&mut data)
.unwrap();
b.iter(move || {
gfidx::index::Gff3Index::build(
&|| Gff3SeekableStreamReader::new_with_size(Cursor::new(&data), data.len()),
DEFAULT_RANGE_INTERVAL,
&attr_trie_def![
"ID",
"gene_id",
"gene_name",
"gene_type",
"transcript_id",
"transcript_name",
"exon_id"
],
)
.unwrap()
})
});
}
criterion_group!(
benches,
benchmark_scan_gff3,
benchmark_scan_gff3_tee,
benchmark_index_gff3
);
criterion_main!(benches);