129 lines
4.5 KiB
Rust
129 lines
4.5 KiB
Rust
use std::{
|
|
io::{Cursor, Read},
|
|
sync::atomic::AtomicU64,
|
|
};
|
|
|
|
use criterion::{criterion_group, criterion_main, Criterion};
|
|
use flate2::read::GzDecoder;
|
|
use gfidx::{
|
|
attr_trie_def,
|
|
gff3::{Gff3Line, Gff3Read},
|
|
index::range::DEFAULT_RANGE_INTERVAL,
|
|
io::{stream::Gff3SeekableStreamReader, tee::Gff3BroadcastRead},
|
|
};
|
|
|
|
const TEST_FILE: &str = "data/gencode.v46.chr_patch_hapl_scaff.annotation.gff3.gz";
|
|
|
|
fn benchmark_scan_gff3(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("scan_gff3");
|
|
group.significance_level(0.1).sample_size(10);
|
|
group.bench_function("scan_gff3_cow", |b| {
|
|
b.iter(|| {
|
|
let mut data = Vec::new();
|
|
GzDecoder::new(std::fs::File::open(TEST_FILE).unwrap())
|
|
.read_to_end(&mut data)
|
|
.unwrap();
|
|
let mut reader =
|
|
Gff3SeekableStreamReader::new_with_size(Cursor::new(&data), data.len());
|
|
|
|
let (dir_cnt, cmt_cnt, attr_cnt) =
|
|
(AtomicU64::new(0), AtomicU64::new(0), AtomicU64::new(0));
|
|
|
|
while let Some(line) = reader.read_line().unwrap() {
|
|
match line {
|
|
Gff3Line::Comment(_) => {
|
|
cmt_cnt.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
|
|
}
|
|
Gff3Line::Directive(_) => {
|
|
dir_cnt.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
|
|
}
|
|
Gff3Line::Feature { attributes, .. } => attr_cnt.fetch_add(
|
|
attributes.len() as u64,
|
|
std::sync::atomic::Ordering::Relaxed,
|
|
),
|
|
};
|
|
}
|
|
(dir_cnt, cmt_cnt, attr_cnt)
|
|
})
|
|
});
|
|
}
|
|
|
|
fn benchmark_scan_gff3_tee(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("scan_gff3_tee");
|
|
group.significance_level(0.1).sample_size(10);
|
|
group.bench_function("scan_gff3_tee", |b| {
|
|
b.iter(|| {
|
|
let mut data = Vec::new();
|
|
GzDecoder::new(std::fs::File::open(TEST_FILE).unwrap())
|
|
.read_to_end(&mut data)
|
|
.unwrap();
|
|
let reader = Gff3SeekableStreamReader::new_with_size(Cursor::new(&data), data.len());
|
|
|
|
let (dir_cnt, cmt_cnt, attr_cnt) =
|
|
(AtomicU64::new(0), AtomicU64::new(0), AtomicU64::new(0));
|
|
rayon::scope(|s| {
|
|
let mut tee = Gff3BroadcastRead::new(reader, 1000);
|
|
|
|
for _ in 0..2 {
|
|
tee.add_channel(Box::new(|_, chunk| {
|
|
for (_, line) in chunk {
|
|
match line {
|
|
Gff3Line::Comment(_) => {
|
|
cmt_cnt.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
|
|
}
|
|
Gff3Line::Directive(_) => {
|
|
dir_cnt.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
|
|
}
|
|
Gff3Line::Feature { attributes, .. } => attr_cnt.fetch_add(
|
|
attributes.len() as u64,
|
|
std::sync::atomic::Ordering::Relaxed,
|
|
),
|
|
};
|
|
}
|
|
}));
|
|
}
|
|
|
|
s.spawn(|_| {
|
|
tee.run().unwrap();
|
|
});
|
|
});
|
|
(dir_cnt, cmt_cnt, attr_cnt)
|
|
})
|
|
});
|
|
}
|
|
|
|
fn benchmark_index_gff3(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("index_gff3");
|
|
group.significance_level(0.1).sample_size(10);
|
|
group.bench_function("index_gff3", |b| {
|
|
let mut data = Vec::new();
|
|
GzDecoder::new(std::fs::File::open(TEST_FILE).unwrap())
|
|
.read_to_end(&mut data)
|
|
.unwrap();
|
|
b.iter(move || {
|
|
gfidx::index::Gff3Index::build(
|
|
&|| Gff3SeekableStreamReader::new_with_size(Cursor::new(&data), data.len()),
|
|
DEFAULT_RANGE_INTERVAL,
|
|
&attr_trie_def![
|
|
"ID",
|
|
"gene_id",
|
|
"gene_name",
|
|
"gene_type",
|
|
"transcript_id",
|
|
"transcript_name",
|
|
"exon_id"
|
|
],
|
|
)
|
|
.unwrap()
|
|
})
|
|
});
|
|
}
|
|
|
|
criterion_group!(
|
|
benches,
|
|
benchmark_scan_gff3,
|
|
benchmark_scan_gff3_tee,
|
|
benchmark_index_gff3
|
|
);
|
|
|
|
criterion_main!(benches);
|