Signed-off-by: eternal-flame-AD <yume@yumechi.jp>
This commit is contained in:
ゆめ 2024-08-06 19:49:12 -05:00
commit 184886c144
No known key found for this signature in database
24 changed files with 4946 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
data/
target/

1038
Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

44
Cargo.toml Normal file
View file

@ -0,0 +1,44 @@
[package]
name = "gfidx"
version = "0.1.0"
edition = "2021"
[lib]
crate-type = ["cdylib", "rlib"]
[dependencies]
ciborium = "0.2.2"
clap = { version = "4.5.13", features = ["derive"] }
flate2 = "1.0.30"
hashbrown = { version = "0.14.5", features = ["inline-more", "serde"] }
indicatif = "0.17.8"
itertools = "0.13.0"
libc = "0.2.155"
num-traits = "0.2.19"
ouroboros = "0.18.4"
rayon = { version = "1.10.0", optional = true }
serde = { version = "1.0.204", features = ["derive"] }
tabled = "0.15.0"
thiserror = "1.0.63"
varint-rs = "2.2.0"
[dev-dependencies]
criterion = { version = "0.5.1", features = ["html_reports"] }
[features]
default = ["parallel"]
async = []
tokio = ["async"]
parallel = ["rayon"]
[[bench]]
name = "scan_gff3"
harness = false
[profile.release]
debug = true
lto = true
[[bin]]
name = "gfidx"
path = "src/bin/gfidx.rs"

129
benches/scan_gff3.rs Normal file
View file

@ -0,0 +1,129 @@
use std::{
io::{Cursor, Read},
sync::atomic::AtomicU64,
};
use criterion::{criterion_group, criterion_main, Criterion};
use flate2::read::GzDecoder;
use gfidx::{
attr_trie_def,
gff3::{Gff3Line, Gff3Read},
index::range::DEFAULT_RANGE_INTERVAL,
io::{stream::Gff3SeekableStreamReader, tee::Gff3BroadcastRead},
};
const TEST_FILE: &str = "data/gencode.v46.chr_patch_hapl_scaff.annotation.gff3.gz";
fn benchmark_scan_gff3(c: &mut Criterion) {
let mut group = c.benchmark_group("scan_gff3");
group.significance_level(0.1).sample_size(10);
group.bench_function("scan_gff3_cow", |b| {
b.iter(|| {
let mut data = Vec::new();
GzDecoder::new(std::fs::File::open(TEST_FILE).unwrap())
.read_to_end(&mut data)
.unwrap();
let mut reader =
Gff3SeekableStreamReader::new_with_size(Cursor::new(&data), data.len());
let (dir_cnt, cmt_cnt, attr_cnt) =
(AtomicU64::new(0), AtomicU64::new(0), AtomicU64::new(0));
while let Some(line) = reader.read_line().unwrap() {
match line {
Gff3Line::Comment(_) => {
cmt_cnt.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
}
Gff3Line::Directive(_) => {
dir_cnt.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
}
Gff3Line::Feature { attributes, .. } => attr_cnt.fetch_add(
attributes.len() as u64,
std::sync::atomic::Ordering::Relaxed,
),
};
}
(dir_cnt, cmt_cnt, attr_cnt)
})
});
}
fn benchmark_scan_gff3_tee(c: &mut Criterion) {
let mut group = c.benchmark_group("scan_gff3_tee");
group.significance_level(0.1).sample_size(10);
group.bench_function("scan_gff3_tee", |b| {
b.iter(|| {
let mut data = Vec::new();
GzDecoder::new(std::fs::File::open(TEST_FILE).unwrap())
.read_to_end(&mut data)
.unwrap();
let reader = Gff3SeekableStreamReader::new_with_size(Cursor::new(&data), data.len());
let (dir_cnt, cmt_cnt, attr_cnt) =
(AtomicU64::new(0), AtomicU64::new(0), AtomicU64::new(0));
rayon::scope(|s| {
let mut tee = Gff3BroadcastRead::new(reader, 1000);
for _ in 0..2 {
tee.add_channel(Box::new(|_, chunk| {
for (_, line) in chunk {
match line {
Gff3Line::Comment(_) => {
cmt_cnt.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
}
Gff3Line::Directive(_) => {
dir_cnt.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
}
Gff3Line::Feature { attributes, .. } => attr_cnt.fetch_add(
attributes.len() as u64,
std::sync::atomic::Ordering::Relaxed,
),
};
}
}));
}
s.spawn(|_| {
tee.run().unwrap();
});
});
(dir_cnt, cmt_cnt, attr_cnt)
})
});
}
fn benchmark_index_gff3(c: &mut Criterion) {
let mut group = c.benchmark_group("index_gff3");
group.significance_level(0.1).sample_size(10);
group.bench_function("index_gff3", |b| {
let mut data = Vec::new();
GzDecoder::new(std::fs::File::open(TEST_FILE).unwrap())
.read_to_end(&mut data)
.unwrap();
b.iter(move || {
gfidx::index::Gff3Index::build(
&|| Gff3SeekableStreamReader::new_with_size(Cursor::new(&data), data.len()),
DEFAULT_RANGE_INTERVAL,
&attr_trie_def![
"ID",
"gene_id",
"gene_name",
"gene_type",
"transcript_id",
"transcript_name",
"exon_id"
],
)
.unwrap()
})
});
}
criterion_group!(
benches,
benchmark_scan_gff3,
benchmark_scan_gff3_tee,
benchmark_index_gff3
);
criterion_main!(benches);

7
demo.txt Normal file
View file

@ -0,0 +1,7 @@
# CDCA8 - Cell Division Cycle Associated 8
relation ENSG00000134690.11
# Nucleotide Sugar Transporter Family
trie gene_name SLC35
range chr3 650000 1500000

14
src/bar.rs Normal file
View file

@ -0,0 +1,14 @@
use indicatif::ProgressBar;
pub(crate) fn style_bar(bar: &ProgressBar, has_size: bool) {
bar.set_style(
indicatif::ProgressStyle::default_bar()
.template(if has_size {
"{spinner:.green} {prefix} [{elapsed_precise}] [{bar:40.cyan/blue}] {decimal_bytes}/{decimal_total_bytes} ETA: {eta}"
} else {
"{spinner:.green} {prefix} [{elapsed_precise}] [{bar:40.cyan/blue}] {decimal_bytes} Processed"
})
.unwrap()
.progress_chars("=> "),
);
}

328
src/bin/gfidx.rs Normal file
View file

@ -0,0 +1,328 @@
use std::{
fs::File,
io::{self, BufReader, BufWriter, Read, Write},
};
use clap::Parser;
use gfidx::{
attr_trie_def,
gff3::Gff3Read,
index::{range::DEFAULT_RANGE_INTERVAL, Gff3Index},
io::{
humanize_size,
stream::{Gff3SeekableStreamReader, Gff3StreamReader},
CountingReader, CountingWriter,
},
Error,
};
#[derive(Parser, Debug)]
#[clap(name = "gfidx")]
struct Args {
#[clap(subcommand)]
subcmd: SubCommand,
}
#[derive(Parser, Debug)]
enum SubCommand {
#[clap(name = "index")]
Index(Index),
#[clap(name = "query")]
Query(Query),
}
#[derive(Parser, Debug)]
struct Index {
#[clap(short, long)]
file: String,
#[clap(short, long)]
output: Option<String>,
}
#[derive(Parser, Debug)]
struct Query {
#[clap(short, long)]
file: String,
#[clap(short, long)]
index: Option<String>,
}
fn timed<T, F: FnOnce() -> T>(name: &str, f: F) -> T {
let start = std::time::Instant::now();
let res = f();
let elapsed = start.elapsed();
eprintln!("{} took: {:?}", name, elapsed);
res
}
fn report_and_reset_reader<R>(reader: &CountingReader<R>) -> usize {
let count = reader.count();
reader.reset_count();
count
}
fn main() {
use tabled::settings::Style;
let args: Args = Args::parse();
match args.subcmd {
SubCommand::Index(index) => {
println!("Indexing {}", index.file);
timed("Index GFF3", || {
let idx = if index.file.ends_with(".gz") {
let gz =
flate2::read::GzDecoder::new(std::fs::File::open(&index.file).unwrap());
let size = gz.bytes().count();
Gff3Index::build(
&|| {
let gz = flate2::read::GzDecoder::new(
std::fs::File::open(&index.file).unwrap(),
);
Gff3StreamReader::new_with_size(gz, size)
},
DEFAULT_RANGE_INTERVAL,
&attr_trie_def![
"ID",
"gene_id",
"gene_name",
"gene_type",
"transcript_id",
"transcript_name",
"exon_id"
],
)
.expect("Failed to build index")
} else {
Gff3Index::build(
&|| {
Gff3SeekableStreamReader::open(
std::fs::File::open(&index.file).unwrap(),
)
.unwrap()
},
DEFAULT_RANGE_INTERVAL,
&attr_trie_def![
"ID",
"gene_id",
"gene_name",
"gene_type",
"transcript_id",
"transcript_name",
"exon_id"
],
)
.expect("Failed to build index")
};
let out_file = index
.output
.unwrap_or_else(|| format!("{}.gfidx", index.file));
println!("Index built, writing to {}", out_file);
let mut table = tabled::builder::Builder::default();
let mut sum = 0;
let mut range_index_counter = CountingWriter::new(io::sink());
let mut relation_index_counter = CountingWriter::new(io::sink());
idx.range_index
.as_ref()
.map(|idx| ciborium::into_writer(idx, &mut range_index_counter))
.unwrap_or(Ok(()))
.expect("Failed to write range index");
table.push_record(["Range Index", &humanize_size(range_index_counter.count())]);
idx.relation_index
.as_ref()
.map(|idx| ciborium::into_writer(idx, &mut relation_index_counter))
.unwrap_or(Ok(()))
.expect("Failed to write relation index");
table.push_record([
"Relation Index",
&humanize_size(relation_index_counter.count()),
]);
sum += relation_index_counter.count();
for (name, trie) in idx.tries.iter() {
let mut counter = CountingWriter::new(io::sink());
ciborium::into_writer(trie, &mut counter).expect("Failed to write trie");
table
.push_record([&format!("Trie: {}", name), &humanize_size(counter.count())]);
sum += counter.count();
}
table.push_record(["Total", &humanize_size(sum)]);
ciborium::into_writer(
&idx,
BufWriter::new(File::create(out_file).expect("Failed to create output file")),
)
.expect("Failed to write index");
println!("{}", table.build().with(Style::ascii_rounded()));
});
}
SubCommand::Query(query) => {
let idx_file = query
.index
.unwrap_or_else(|| format!("{}.gfidx", query.file));
let mut gff = Gff3SeekableStreamReader::open_prebuffered(io::BufReader::new(
CountingReader::new(File::open(&query.file).expect("Failed to open GFF3 file")),
))
.expect("Failed to open GFF3 file");
let mut idx: Gff3Index = timed("Read back index file", || {
ciborium::from_reader(BufReader::new(
File::open(idx_file).expect("Failed to open index file"),
))
.expect("Failed to read index")
});
timed("Reconstruct index", || {
if let Some(i) = idx.relation_index.as_mut() {
i.reconstruct_path();
}
});
report_and_reset_reader(gff.borrow_reader());
let stdin = io::stdin();
let mut stdout = io::stdout().lock();
loop {
write!(stdout, "> ").unwrap();
stdout.flush().unwrap();
let mut line = String::new();
stdin.read_line(&mut line).unwrap();
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
let parts: Vec<_> = line.split_whitespace().collect();
if parts.is_empty() {
eprintln!("Invalid query");
continue;
}
let query = parts[0];
match query {
"quit" | "exit" | "q" => {
break;
}
"range" => {
if parts.len() < 4 {
eprintln!("Invalid query");
continue;
}
let seqid = parts[1];
let start = parts[2].parse().expect("Invalid start");
let end = parts[3].parse().expect("Invalid end");
match timed("Query", || {
let mut count = 0;
idx.range_index
.as_ref()
.expect("No range index")
.query_lines(&mut gff, seqid, start, end, &mut |line| {
count += 1;
println!("{}", line);
})?;
Ok::<_, Error<io::Error>>(count)
}) {
Ok(count) => {
eprintln!("{} lines found", count);
eprintln!(
"Query cost {} bytes",
humanize_size(report_and_reset_reader(gff.borrow_reader()))
);
}
Err(e) => {
eprintln!("Error: {:?}", e);
}
}
}
"relation" => {
if parts.len() < 2 {
eprintln!("Invalid query");
continue;
}
let id = parts[1];
match timed("Query", || {
let mut count = 0;
let idx = idx.relation_index.as_ref().expect("No relation index");
for seqid in idx.list_seqids().collect::<Vec<_>>() {
idx.traverse_children(&mut gff, seqid, id, |line| {
println!("{}", line);
count += 1;
})?;
}
Ok::<_, Error<io::Error>>(count)
}) {
Ok(count) => {
eprintln!("{} lines found", count);
eprintln!(
"Query cost {} bytes",
humanize_size(report_and_reset_reader(gff.borrow_reader()))
);
}
Err(e) => {
eprintln!("Error: {:?}", e);
}
}
}
"trie" => {
if parts.len() < 3 {
eprintln!("Invalid query");
continue;
}
let name = parts[1];
let prefix = parts[2];
let trie = match idx.tries.get(name) {
Some(trie) => trie,
None => {
eprintln!("Trie not found");
continue;
}
};
let count = timed("Query", || {
let mut count = 0;
trie.for_each_seqid(|_, trie| {
trie.traverse(
prefix,
&mut |offset| {
gff.seek_to(*offset).unwrap().unwrap();
println!("{}", gff.read_line().unwrap().unwrap());
count += 1;
},
true,
);
});
count
});
eprintln!("{} lines found", count);
eprintln!(
"Query cost {} bytes",
humanize_size(report_and_reset_reader(gff.borrow_reader()))
);
}
_ => {
eprintln!("Invalid query");
}
}
}
}
}
}

159
src/ds/linkedlist.rs Normal file
View file

@ -0,0 +1,159 @@
use std::{
borrow::Borrow,
ops::{Deref, DerefMut, Index, IndexMut},
};
#[derive(Debug, Clone)]
pub struct Node<T> {
pub payload: T,
pub next: Option<Box<Node<T>>>,
}
impl<T> From<T> for Node<T> {
fn from(payload: T) -> Self {
Self {
payload,
next: None,
}
}
}
impl<T> Deref for Node<T> {
type Target = T;
fn deref(&self) -> &Self::Target {
&self.payload
}
}
impl<T> DerefMut for Node<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.payload
}
}
impl<T> Node<T> {
fn attach<P: Into<Node<T>>>(&mut self, next: P) -> &mut Self {
let node = next.into();
self.next = Some(Box::new(node));
self.next.as_mut().unwrap()
}
}
impl<'a, T> From<&'a Node<T>> for Vec<&'a T> {
fn from(node: &'a Node<T>) -> Self {
let mut vec = Vec::new();
let mut node: Option<&Node<T>> = Some(node);
while let Some(n) = node {
vec.push(&**(n));
node = n.next.as_ref().map(|n| n.borrow());
}
vec
}
}
#[derive(Debug, Clone, Default)]
pub struct List<T>(Option<Box<Node<T>>>);
impl<T> List<T> {
pub fn push<P: Into<Node<T>>>(&mut self, node: P) -> &mut Self {
if self.0.is_none() {
let next = node.into();
self.0 = Some(Box::new(next));
self
} else {
let next = node.into();
let mut tail = self.0.as_mut();
while tail.as_ref().map(|t| t.next.is_some()).unwrap_or_default() {
tail = tail.unwrap().next.as_mut();
}
tail.unwrap().attach(next);
self
}
}
}
impl<'a, T> From<&'a List<T>> for Vec<&'a T> {
fn from(list: &'a List<T>) -> Self {
let mut vec = Vec::new();
let mut node = list.0.as_ref();
while let Some(n) = node {
vec.push(&n.payload);
node = n.next.as_ref();
}
vec
}
}
impl<T> Index<usize> for List<T> {
type Output = Node<T>;
fn index(&self, index: usize) -> &Self::Output {
let mut node = self.0.as_ref();
for _ in 0..index {
node = node.as_ref().unwrap().next.as_ref();
}
node.as_ref().unwrap()
}
}
impl<T> IndexMut<usize> for List<T> {
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
let mut node = self.0.as_mut();
for _ in 0..index {
node = node.unwrap().next.as_mut();
}
node.unwrap()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_list() {
macro_rules! assert_into {
($from:expr, $to:expr) => {
let target = $from.into();
{
let mut _tmp = &target;
_tmp = &$to;
}
assert_eq!(target, $to);
};
}
let mut list = List::default();
assert!(Into::<Vec<_>>::into(&list).is_empty());
list.push(1);
assert_into!(&list, vec![&1]);
list.push(2).push(3);
assert_into!(&list, vec![&1, &2, &3]);
list.push(4);
assert_into!(&list, vec![&1, &2, &3, &4]);
let mut str_list = List::default();
let str_1 = String::from("1");
let str_2 = String::from("2");
let str_3 = String::from("3");
assert!(Into::<Vec<_>>::into(&str_list).is_empty());
str_list.push(str_1).push(str_2).push(str_3);
assert_into!(
&str_list,
["1".to_string(), "2".to_string(), "3".to_string()]
.iter()
.collect::<Vec<_>>()
);
(*str_list[1]).push('c');
assert_into!(
&str_list,
["1".to_string(), "2c".to_string(), "3".to_string()]
.iter()
.collect::<Vec<_>>()
);
}
}

3
src/ds/mod.rs Normal file
View file

@ -0,0 +1,3 @@
pub mod linkedlist;
pub mod tree;
pub mod trie;

285
src/ds/tree.rs Normal file
View file

@ -0,0 +1,285 @@
use std::ops::{Index, IndexMut};
use serde::{Deserialize, Serialize};
pub trait FindChild<T, I>
where
Self: Default,
{
fn n_children(&self) -> usize;
fn iter_keys<F: FnMut(&I) -> bool>(&self, f: &mut F);
fn find_child(&self, index: &I) -> Option<&Node<T, I, Self>>;
fn find_child_mut(&mut self, index: &I) -> Option<&mut Node<T, I, Self>>;
}
pub trait ModifyChild<T, I>
where
Self: FindChild<T, I>,
{
fn add_child(&mut self, index: I, node: Node<T, I, Self>);
fn remove_child(&mut self, index: &I);
}
#[derive(Debug, Clone)]
pub struct Node<T, I, C>
where
C: FindChild<T, I>,
{
pub payload: T,
pub children: C,
_marker: std::marker::PhantomData<I>,
}
impl<T, I, C> Serialize for Node<T, I, C>
where
T: Serialize,
C: Serialize + FindChild<T, I>,
{
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::ser::Serializer,
{
(&self.payload, &self.children).serialize(serializer)
}
}
impl<'de, T, I, C> Deserialize<'de> for Node<T, I, C>
where
T: Deserialize<'de>,
C: Deserialize<'de> + FindChild<T, I>,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::de::Deserializer<'de>,
{
let (payload, children) = Deserialize::deserialize(deserializer)?;
Ok(Self {
payload,
children,
_marker: std::marker::PhantomData,
})
}
}
impl<T, I, C> Default for Node<T, I, C>
where
T: Default,
C: Default + FindChild<T, I>,
{
fn default() -> Self {
Self {
payload: Default::default(),
children: Default::default(),
_marker: std::marker::PhantomData,
}
}
}
impl<T, I, C> Node<T, I, C>
where
C: FindChild<T, I>,
{
#[inline(always)]
pub fn new(payload: T, children: C) -> Self {
Self {
payload,
children,
_marker: std::marker::PhantomData,
}
}
#[inline(always)]
pub fn find_child(&self, index: &I) -> Option<&Node<T, I, C>> {
self.children.find_child(index)
}
#[inline(always)]
pub fn find_child_mut(&mut self, index: &I) -> Option<&mut Node<T, I, C>> {
self.children.find_child_mut(index)
}
#[inline(always)]
pub fn for_each_descendant<F, E>(&self, mut f: F) -> Result<(), E>
where
F: FnMut(&Node<T, I, C>) -> Result<(), E>,
{
let mut stack = vec![self];
while let Some(node) = stack.pop() {
f(node)?;
node.children.iter_keys(&mut |key| {
if let Some(child) = node.find_child(key) {
stack.push(child);
}
true
});
}
Ok(())
}
}
impl<T, I, C> Node<T, I, C>
where
C: FindChild<T, I> + ModifyChild<T, I>,
{
#[inline(always)]
pub fn push(&mut self, index: I, node: Node<T, I, C>) {
self.children.add_child(index, node);
}
}
impl<T, I, C> Node<T, I, C>
where
I: Clone,
C: FindChild<T, I> + ModifyChild<T, I>,
{
#[inline(always)]
pub fn find_child_or_insert<F>(&mut self, index: I, f: F) -> &mut Node<T, I, C>
where
F: FnOnce() -> Node<T, I, C>,
{
if self.find_child(&index).is_none() {
self.push(index.clone(), f());
}
self.find_child_mut(&index).unwrap()
}
}
impl<T, I, C> Index<I> for Node<T, I, C>
where
C: FindChild<T, I>,
{
type Output = Node<T, I, C>;
#[inline(always)]
fn index(&self, index: I) -> &Self::Output {
self.find_child(&index).unwrap()
}
}
impl<T, I, C> IndexMut<I> for Node<T, I, C>
where
C: FindChild<T, I>,
{
#[inline(always)]
fn index_mut(&mut self, index: I) -> &mut Self::Output {
self.find_child_mut(&index).unwrap()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Tree<T, I, C>
where
C: FindChild<T, I>,
{
pub root: Option<Node<T, I, C>>,
}
impl<T, I, C> Tree<T, I, C>
where
C: FindChild<T, I>,
{
pub fn with_root(root: Node<T, I, C>) -> Self {
Self { root: Some(root) }
}
}
impl<T, I, C> Default for Tree<T, I, C>
where
C: FindChild<T, I>,
{
fn default() -> Self {
Self { root: None }
}
}
impl<T, I, C> Tree<T, I, C>
where
C: FindChild<T, I>,
{
#[inline(always)]
pub fn new(root: Option<Node<T, I, C>>) -> Self {
Self { root }
}
#[inline(always)]
pub fn traverse<'a, 'b>(
&'a self,
keys: impl Iterator<Item = &'b I>,
) -> Option<&'a Node<T, I, C>>
where
I: 'b,
{
let mut node = self.root.as_ref()?;
for key in keys {
node = node.find_child(key)?;
}
Some(node)
}
#[inline(always)]
pub fn traverse_mut<'a, 'b>(
&'a mut self,
keys: impl Iterator<Item = &'b I>,
) -> Option<&'a mut Node<T, I, C>>
where
I: 'b,
{
let mut node = self.root.as_mut()?;
for key in keys {
node = node.find_child_mut(key)?;
}
Some(node)
}
}
impl<T, I, C> Tree<T, I, C>
where
I: Clone,
C: FindChild<T, I> + ModifyChild<T, I>,
{
#[inline(always)]
pub fn traverse_or_insert<'a, 'b, F>(
&'a mut self,
keys: impl Iterator<Item = &'b I>,
f: &F,
) -> &'a mut Node<T, I, C>
where
F: Fn() -> Node<T, I, C>,
I: 'b,
{
let mut node = self.root.as_mut().unwrap();
for key in keys {
node = node.find_child_or_insert(key.clone(), f);
}
node
}
}
impl<T, I, C> Tree<T, I, C>
where
I: Clone + Default,
C: FindChild<T, I>,
{
#[inline(always)]
pub fn for_each<F, E>(&self, mut f: F) -> Result<(), E>
where
F: FnMut(&[I], &Node<T, I, C>) -> Result<(), E>,
{
let mut stack = vec![(vec![Default::default()], self.root.as_ref().unwrap())];
while let Some((keys, node)) = stack.pop() {
f(&keys, node)?;
node.children.iter_keys(&mut |key| {
let mut keys = keys.clone();
keys.push(key.clone());
stack.push((keys, node.find_child(key).unwrap()));
true
});
}
Ok(())
}
}

436
src/ds/trie.rs Normal file
View file

@ -0,0 +1,436 @@
use std::{hash::Hash, ops::Deref};
use hashbrown::HashMap;
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use super::tree::{FindChild, ModifyChild, Node, Tree};
pub trait ToTrieKey<C>
where
C: Eq,
{
fn to_trie_key(&self) -> impl Iterator<Item = C>;
}
impl ToTrieKey<char> for String {
fn to_trie_key(&self) -> impl Iterator<Item = char> {
self.chars()
}
}
impl ToTrieKey<char> for &str {
fn to_trie_key(&self) -> impl Iterator<Item = char> {
self.chars()
}
}
impl<S> ToTrieKey<char> for &S
where
S: AsRef<str>,
{
fn to_trie_key(&self) -> impl Iterator<Item = char> {
self.as_ref().chars()
}
}
impl<T> ToTrieKey<T> for &[T]
where
T: Copy + Ord,
{
fn to_trie_key(&self) -> impl Iterator<Item = T> {
self.iter().cloned()
}
}
#[derive(Debug, Clone)]
pub struct TrieChildrenHolder<T, C>(HashMap<C, Node<T, C, Self>>)
where
C: Eq + Hash + Serialize + DeserializeOwned,
T: Serialize + DeserializeOwned;
impl<T, C> Serialize for TrieChildrenHolder<T, C>
where
C: Eq + Hash + Serialize + DeserializeOwned,
T: Serialize + DeserializeOwned,
{
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
self.0.serialize(serializer)
}
}
impl<'de, T, C> Deserialize<'de> for TrieChildrenHolder<T, C>
where
C: Eq + Hash + Serialize + DeserializeOwned,
T: Serialize + DeserializeOwned,
{
fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
Ok(Self(Deserialize::deserialize(deserializer)?))
}
}
impl<T, C> Default for TrieChildrenHolder<T, C>
where
C: Eq + Hash + Serialize + DeserializeOwned,
T: Serialize + DeserializeOwned,
{
fn default() -> Self {
Self(HashMap::new())
}
}
impl<T, C> FindChild<T, C> for TrieChildrenHolder<T, C>
where
C: Eq + Hash + Serialize + DeserializeOwned,
T: Serialize + DeserializeOwned,
{
#[inline(always)]
fn n_children(&self) -> usize {
self.0.len()
}
#[inline(always)]
fn iter_keys<F: FnMut(&C) -> bool>(&self, f: &mut F) {
let keys = self.0.keys();
for key in keys {
if !f(key) {
break;
}
}
}
#[inline(always)]
fn find_child(&self, index: &C) -> Option<&Node<T, C, Self>> {
self.0.get(index)
}
#[inline(always)]
fn find_child_mut(&mut self, index: &C) -> Option<&mut Node<T, C, Self>> {
self.0.get_mut(index)
}
}
impl<T, C> ModifyChild<T, C> for TrieChildrenHolder<T, C>
where
C: Eq + Hash + Serialize + DeserializeOwned,
T: Serialize + DeserializeOwned,
{
#[inline(always)]
fn add_child(&mut self, index: C, node: Node<T, C, Self>) {
self.0.insert(index, node);
}
#[inline(always)]
fn remove_child(&mut self, index: &C) {
self.0.remove(index);
}
}
#[derive(Debug, Clone)]
pub struct Trie<T, C>
where
C: Eq + Hash + Serialize + DeserializeOwned,
T: Serialize + DeserializeOwned,
{
tree: Tree<Vec<T>, C, TrieChildrenHolder<Vec<T>, C>>,
}
impl<T, C> Serialize for Trie<T, C>
where
C: Eq + Hash + Serialize + DeserializeOwned,
T: Serialize + DeserializeOwned,
{
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
self.tree.serialize(serializer)
}
}
impl<'de, T, C> Deserialize<'de> for Trie<T, C>
where
C: Eq + Hash + Serialize + DeserializeOwned,
T: Serialize + DeserializeOwned,
{
fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
Ok(Self {
tree: Deserialize::deserialize(deserializer)?,
})
}
}
impl<T, C> Default for Trie<T, C>
where
C: Eq + Hash + Serialize + DeserializeOwned,
T: Serialize + DeserializeOwned,
{
fn default() -> Self {
Self {
tree: Tree::default(),
}
}
}
impl<T, C> Deref for Trie<T, C>
where
C: Eq + Hash + Serialize + DeserializeOwned,
T: Serialize + DeserializeOwned,
{
type Target = Tree<Vec<T>, C, TrieChildrenHolder<Vec<T>, C>>;
fn deref(&self) -> &Self::Target {
&self.tree
}
}
impl<T, C> Trie<T, C>
where
C: Eq + Hash + Clone + Serialize + DeserializeOwned,
T: Serialize + DeserializeOwned,
{
#[inline]
pub fn traverse<S: ToTrieKey<C>, F>(&self, key: S, f: &mut F, include_children: bool)
where
F: FnMut(&T),
{
let mut current = self.tree.root.as_ref();
for k in key.to_trie_key() {
if let Some(node) = current.and_then(|n| n.find_child(&k)) {
current = Some(node);
} else {
return;
}
}
if include_children {
let mut stack = vec![current];
while let Some(node) = stack.pop() {
if let Some(node) = node {
for payload in &node.payload {
f(payload);
}
for child_key in node.children.0.keys() {
stack.push(node.find_child(child_key));
}
}
}
} else if let Some(node) = current {
for payload in &node.payload {
f(payload);
}
}
}
#[inline]
pub fn test<S: ToTrieKey<C>>(&self, key: S, include_children: bool) -> usize {
let mut current = self.tree.root.as_ref();
for k in key.to_trie_key() {
if let Some(node) = current.and_then(|n| n.find_child(&k)) {
current = Some(node);
} else {
return 0;
}
}
if include_children {
let mut count = 0;
let mut stack = vec![current];
while let Some(node) = stack.pop() {
count += node.map_or(0, |n| n.payload.len());
if let Some(node) = node {
for child_key in node.children.0.keys() {
stack.push(node.find_child(child_key));
}
}
}
count
} else {
current.map_or(0, |n| n.payload.len())
}
}
#[inline]
pub fn insert<S: ToTrieKey<C>>(&mut self, key: S, value: T) {
let mut current = self
.tree
.root
.get_or_insert_with(|| Node::new(Vec::new(), TrieChildrenHolder::default()));
for k in key.to_trie_key() {
if current.find_child(&k).is_none() {
let node = Node::new(Vec::new(), TrieChildrenHolder::default());
current.push(k.clone(), node);
}
current = current.find_child_mut(&k).unwrap();
}
current.payload.push(value);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_traverse_existing_key() {
let mut trie = Trie::default();
trie.insert("hello", 1);
trie.insert("world", 2);
let mut result = Vec::new();
trie.traverse(
"hello",
&mut |value| {
result.push(*value);
},
false,
);
assert_eq!(result, vec![1]);
}
#[test]
fn test_traverse_non_existing_key() {
let mut trie = Trie::default();
trie.insert("hello", 1);
trie.insert("world", 2);
let mut result = Vec::new();
trie.traverse(
"foo",
&mut |value| {
result.push(*value);
},
false,
);
assert_eq!(result, vec![]);
}
#[test]
fn test_test_existing_key() {
let mut trie = Trie::default();
trie.insert("hello", 1);
trie.insert("world", 2);
let count = trie.test("hello", false);
assert_eq!(count, 1);
}
#[test]
fn test_test_non_existing_key() {
let mut trie = Trie::default();
trie.insert("hello", 1);
trie.insert("world", 2);
let count = trie.test("foo", false);
assert_eq!(count, 0);
}
#[test]
fn test_insert_existing_key() {
let mut trie = Trie::default();
trie.insert("hello", 1);
trie.insert("world", 2);
trie.insert("hello", 3);
let count = trie.test("hello", false);
assert_eq!(count, 2);
}
#[test]
fn test_insert_new_key() {
let mut trie = Trie::default();
trie.insert("hello", 1);
trie.insert("world", 2);
trie.insert("foo", 3);
let count = trie.test("foo", false);
assert_eq!(count, 1);
}
#[test]
fn test_traverse_existing_key_with_children() {
let mut trie = Trie::default();
trie.insert("hello", 1);
trie.insert("world", 2);
trie.insert("hello world", 3);
let mut result = Vec::new();
trie.traverse(
"hello",
&mut |value| {
result.push(*value);
},
true,
);
assert_eq!(result, vec![1, 3]);
}
#[test]
fn test_traverse_non_existing_key_with_children() {
let mut trie = Trie::default();
trie.insert("hello", 1);
trie.insert("world", 2);
trie.insert("hello world", 3);
let mut result = Vec::new();
trie.traverse(
"foo",
&mut |value| {
result.push(*value);
},
true,
);
assert_eq!(result, vec![]);
}
#[test]
fn test_test_existing_key_with_children() {
let mut trie = Trie::default();
trie.insert("hello", 1);
trie.insert("world", 2);
trie.insert("hello world", 3);
let count = trie.test("hello", true);
assert_eq!(count, 2);
}
#[test]
fn test_test_non_existing_key_with_children() {
let mut trie = Trie::default();
trie.insert("hello", 1);
trie.insert("world", 2);
trie.insert("hello world", 3);
let count = trie.test("foo", true);
assert_eq!(count, 0);
}
#[test]
fn test_insert_existing_key_with_children() {
let mut trie = Trie::default();
trie.insert("hello", 1);
trie.insert("world", 2);
trie.insert("hello world", 3);
trie.insert("hello", 4);
let count = trie.test("hello", true);
assert_eq!(count, 3);
}
#[test]
fn test_insert_new_key_with_children() {
let mut trie = Trie::default();
trie.insert("hello", 1);
trie.insert("world", 2);
trie.insert("hello world", 3);
trie.insert("foo", 4);
let count = trie.test("foo", true);
assert_eq!(count, 1);
}
}

395
src/gff3.rs Normal file
View file

@ -0,0 +1,395 @@
use itertools::Itertools;
use std::borrow::Cow;
use std::fmt::Display;
use std::fmt::Write;
use crate::{Error, ParseErr};
#[derive(Debug, Clone, PartialEq)]
pub enum Gff3Line<'a> {
Comment(std::borrow::Cow<'a, str>),
Directive(std::borrow::Cow<'a, str>),
Feature {
seqid: std::borrow::Cow<'a, str>,
source: std::borrow::Cow<'a, str>,
type_: std::borrow::Cow<'a, str>,
start: u64,
end: u64,
score: Option<f64>,
strand: Option<char>,
phase: Option<u64>,
attributes: Vec<(std::borrow::Cow<'a, str>, std::borrow::Cow<'a, str>)>,
},
}
impl<'a> Gff3Line<'a> {
pub fn get_attr(&'a self, key: &str) -> Option<&'a str> {
match self {
Gff3Line::Feature { attributes, .. } => {
debug_assert!(
attributes
.iter()
.map(|(k, _)| k)
.map(|k| k == key)
.unique()
.count()
== attributes.len()
);
for (k, v) in attributes {
if k == key {
return Some(v);
}
}
None
}
_ => None,
}
}
pub fn to_static(&self) -> Gff3Line<'static> {
match self {
Gff3Line::Comment(s) => Gff3Line::Comment(Cow::Owned(s.to_string())),
Gff3Line::Directive(s) => Gff3Line::Directive(Cow::Owned(s.to_string())),
Gff3Line::Feature {
seqid,
source,
type_,
start,
end,
score,
strand,
phase,
attributes,
} => Gff3Line::Feature {
seqid: Cow::Owned(seqid.to_string()),
source: Cow::Owned(source.to_string()),
type_: Cow::Owned(type_.to_string()),
start: *start,
end: *end,
score: *score,
strand: *strand,
phase: *phase,
attributes: attributes
.iter()
.map(|(k, v)| (Cow::Owned(k.to_string()), Cow::Owned(v.to_string())))
.collect(),
},
}
}
}
#[inline]
fn url_encode(s: &str) -> Cow<'_, str> {
#[inline]
fn char_to_percent(c: char, out: &mut String) {
let mut buf = [0; 4];
for v in c.encode_utf8(&mut buf).bytes() {
write!(out, "%{:02X}", v).unwrap();
}
}
let mut copy = false;
let mut encoded = String::with_capacity(s.len() * 2);
for c in s.chars() {
match c {
'A'..='Z' | 'a'..='z' | '0'..='9' | '-' | '_' | '.' | '~' => {
encoded.push(c);
}
_ => {
copy = true;
char_to_percent(c, &mut encoded);
}
}
}
if copy {
encoded.shrink_to_fit();
Cow::Owned(encoded)
} else {
Cow::Borrowed(s)
}
}
#[inline]
fn url_decode(s: &str) -> Cow<'_, str> {
#[inline]
fn char_to_utf8(c: char, out: &mut Vec<u8>) {
let mut buf = [0; 4];
for v in c.encode_utf8(&mut buf).bytes() {
out.push(v);
}
}
if !s.contains('%') {
return Cow::Borrowed(s);
}
let mut buf = Vec::with_capacity(s.len());
let mut chars = s.chars();
while let Some(c) = chars.next() {
if c == '%' {
let hex = chars.by_ref().take(2).fold(0, |acc, c| {
(acc << 4)
+ c.to_digit(16)
.unwrap_or_else(|| panic!("Invalid hex digit: {}", c))
});
buf.push(hex as u8);
} else {
char_to_utf8(c, &mut buf);
}
}
let str = unsafe { String::from_utf8_unchecked(buf) };
Cow::Owned(str)
}
impl<'a> Gff3Line<'a> {
#[inline]
pub fn parse_str(s: &'a str) -> Result<Self, ParseErr> {
if let Some(s) = s.strip_prefix("##") {
Ok(Gff3Line::Directive(s.into()))
} else if let Some(s) = s.strip_prefix('#') {
Ok(Gff3Line::Comment(s.into()))
} else {
let mut fields = s.split('\t');
let seqid = fields.next().ok_or(ParseErr::MissingField("seqid"))?;
let source = fields.next().ok_or(ParseErr::MissingField("source"))?;
let type_ = fields.next().ok_or(ParseErr::MissingField("type"))?.into();
let start = fields
.next()
.ok_or(ParseErr::MissingField("start"))?
.parse()
.map_err(ParseErr::ParseInt)?;
let end = fields
.next()
.ok_or(ParseErr::MissingField("end"))?
.parse()
.map_err(ParseErr::ParseInt)?;
let score = match fields.next() {
Some(".") => None,
Some(s) => Some(s.parse().map_err(ParseErr::ParseFloat)?),
None => None,
};
let strand = match fields.next() {
Some(".") => None,
Some(s) => Some(s.chars().next().ok_or(ParseErr::MissingField("strand"))?),
None => None,
};
let phase = match fields.next() {
Some(".") => None,
Some(s) => Some(s.parse().map_err(ParseErr::ParseInt)?),
None => None,
};
let attr_strs = fields
.next()
.ok_or(ParseErr::MissingField("attributes"))?
.split(';');
let mut attributes = Vec::with_capacity(32);
for attr_str in attr_strs {
let mut attr = attr_str.splitn(2, '=');
let key = attr.next().ok_or(ParseErr::MissingField("attribute key"))?;
let value = attr
.next()
.ok_or(ParseErr::MissingField("attribute value"))?;
attributes.push((url_decode(key), url_decode(value)));
}
Ok(Gff3Line::Feature {
seqid: url_decode(seqid),
source: url_decode(source),
type_,
start,
end,
score,
strand,
phase,
attributes,
})
}
}
}
macro_rules! gff3_display {
($name:ident) => {
#[inline]
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
$name::Comment(s) => write!(f, "#{}", s),
$name::Directive(s) => write!(f, "##{}", s),
$name::Feature {
seqid,
source,
type_,
start,
end,
score,
strand,
phase,
attributes,
} => {
write!(
f,
"{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}",
url_encode(seqid),
url_encode(source),
type_,
start,
end,
score
.map(|s| s.to_string())
.unwrap_or_else(|| ".".to_string()),
strand
.map(|s| s.to_string())
.unwrap_or_else(|| ".".to_string()),
phase
.map(|s| s.to_string())
.unwrap_or_else(|| ".".to_string()),
attributes
.iter()
.map(|(k, v)| format!("{}={}", url_encode(k), url_encode(v)))
.collect::<Vec<_>>()
.join(";")
)
}
}
}
};
}
impl<'a> Display for Gff3Line<'a> {
gff3_display!(Gff3Line);
}
pub trait Gff3Read<E: std::error::Error> {
fn size(&self) -> Option<usize> {
None
}
fn offset(&mut self) -> Option<u64> {
None
}
fn seek_to(&mut self, _pos: u64) -> Result<Option<()>, Error<E>> {
Ok(None)
}
fn read_line(&mut self) -> Result<Option<Gff3Line>, Error<E>>;
}
#[cfg(feature = "async")]
pub trait Gff3ReadAsync<E: std::error::Error> {
fn offset(&mut self) -> Option<u64> {
None
}
fn read_line_async(
&mut self,
) -> impl std::future::Future<Output = Result<Option<Gff3Line>, Error<E>>> + Send;
}
pub trait Gff3Write<E: std::error::Error> {
fn write_line(&mut self, line: &Gff3Line) -> Result<(), Error<E>>;
}
#[cfg(feature = "async")]
pub trait Gff3WriteAsync<E: std::error::Error> {
fn write_line_async(
&mut self,
line: &Gff3Line,
) -> impl std::future::Future<Output = Result<(), Error<E>>> + Send;
}
#[cfg(test)]
mod tests {
use super::*;
fn cow_is_borrowed<T: ToOwned + ?Sized>(cow: &std::borrow::Cow<'_, T>) -> bool {
match cow {
std::borrow::Cow::Borrowed(_) => true,
_ => false,
}
}
#[test]
fn test_parse_gff3_line() {
let lines: Vec<Gff3Line> = r#"
##gff-version 3
#format: gff3
chr1 HAVANA gene 11869 14409 . + . ID=ENSG00000290825.1;gene_id=ENSG00000290825.1;gene_type=lncRNA;gene_name=DDX11L2;level=2;tag=overlaps_pseudogene
chr1 HAVANA transcript 11869 14409 . + . ID=ENST00000456328.2;Parent=ENSG00000290825.1;gene_id=ENSG00000290825.1;transcript_id=ENST00000456328.2;gene_type=lncRNA;gene_name=DDX11L2;transcript_type=lncRNA;transcript_name=DDX11L2-202;level=2;transcript_support_level=1;tag=basic,Ensembl_canonical;havana_transcript=OTTHUMT00000362751.1
"#.trim().lines().map(Gff3Line::parse_str).collect::<Result<Vec<_>, _>>().unwrap();
assert_eq!(
lines,
vec![
Gff3Line::Directive("gff-version 3".into()),
Gff3Line::Comment("format: gff3".into()),
Gff3Line::Feature {
seqid: "chr1".into(),
source: "HAVANA".into(),
type_: "gene".into(),
start: 11869,
end: 14409,
score: None,
strand: Some('+'),
phase: None,
attributes: vec![
("ID".into(), "ENSG00000290825.1".into()),
("gene_id".into(), "ENSG00000290825.1".into()),
("gene_type".into(), "lncRNA".into()),
("gene_name".into(), "DDX11L2".into()),
("level".into(), "2".into()),
("tag".into(), "overlaps_pseudogene".into())
]
},
Gff3Line::Feature {
seqid: "chr1".into(),
source: "HAVANA".into(),
type_: "transcript".into(),
start: 11869,
end: 14409,
score: None,
strand: Some('+'),
phase: None,
attributes: vec![
("ID".into(), "ENST00000456328.2".into()),
("Parent".into(), "ENSG00000290825.1".into()),
("gene_id".into(), "ENSG00000290825.1".into()),
("transcript_id".into(), "ENST00000456328.2".into()),
("gene_type".into(), "lncRNA".into()),
("gene_name".into(), "DDX11L2".into()),
("transcript_type".into(), "lncRNA".into()),
("transcript_name".into(), "DDX11L2-202".into()),
("level".into(), "2".into()),
("transcript_support_level".into(), "1".into()),
("tag".into(), "basic,Ensembl_canonical".into()),
("havana_transcript".into(), "OTTHUMT00000362751.1".into())
]
}
]
);
}
#[test]
fn test_url_encode() {
assert_eq!(url_encode("Hello, world!"), "Hello%2C%20world%21");
assert_eq!(
url_encode("https://example.com/?q=hello world"),
"https%3A%2F%2Fexample.com%2F%3Fq%3Dhello%20world"
);
assert_eq!(url_encode("abc123"), "abc123");
assert_eq!(url_encode("!€,"), "%21%E2%82%AC%2C");
assert!(cow_is_borrowed(&url_encode("xxx")));
assert!(!cow_is_borrowed(&url_encode("x x")));
}
#[test]
fn test_url_decode() {
assert_eq!(url_decode("Hello,%20world%21"), "Hello, world!");
assert_eq!(
url_decode("https%3A%2F%2Fexample.com%2F%3Fq%3Dhello%20world"),
"https://example.com/?q=hello world"
);
assert_eq!(url_decode("abc123"), "abc123");
assert_eq!(url_decode("%21%E2%82%AC%2C"), "!€,");
assert!(cow_is_borrowed(&url_decode("xxx")));
assert!(!cow_is_borrowed(&url_decode("x%20x")));
}
}

128
src/index/mod.rs Normal file
View file

@ -0,0 +1,128 @@
use std::{cell::UnsafeCell, sync::Mutex};
use hashbrown::HashMap;
use indicatif::{MultiProgress, ProgressBar};
use range::RangeIndex;
use relation::RelationIndex;
use serde::{Deserialize, Serialize};
use trie::TrieIndex;
use crate::{
bar::style_bar,
gff3::{Gff3Line, Gff3Read},
io::tee::Gff3BroadcastRead,
Error,
};
pub mod range;
pub mod relation;
pub mod trie;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Gff3Index {
pub digest: Option<String>,
pub range_index: Option<RangeIndex>,
pub relation_index: Option<RelationIndex>,
pub tries: HashMap<String, TrieIndex<char>>,
}
pub struct SyncUnsafeCell<T>(UnsafeCell<T>);
unsafe impl<T> Sync for SyncUnsafeCell<T> {}
pub type Gff3AttrExtractFunc = for<'a> fn(&'a Gff3Line) -> Option<&'a str>;
impl Gff3Index {
#[cfg(feature = "rayon")]
pub fn build<R, F, E>(
reader_factory: &F,
by: u64,
trie_def: &[(&str, Gff3AttrExtractFunc)],
) -> Result<Self, Error<E>>
where
E: std::error::Error + Send + Sync,
R: Gff3Read<E> + Send,
F: Fn() -> R + Send + Sync,
{
let chunk_size = 1024;
let mut trie_broadcast = Gff3BroadcastRead::new(reader_factory(), chunk_size);
let mp = MultiProgress::new();
let mut trie_builders = trie_def
.iter()
.map(|(name, get_attr)| {
(
*name,
Some(*get_attr),
SyncUnsafeCell(UnsafeCell::new(TrieIndex::<char>::new(name.to_string()))),
if let Some(size) = reader_factory().size() {
let pg = ProgressBar::new(size as u64);
pg.set_prefix(format!("Trie Index: {}", name));
style_bar(&pg, true);
mp.add(pg)
} else {
let pg = ProgressBar::new_spinner();
pg.set_prefix(format!("Trie Index: {}", name));
style_bar(&pg, false);
mp.add(pg)
},
)
})
.collect::<Vec<_>>();
let errors = Mutex::new(Vec::new());
let mut ret = Gff3Index {
digest: None,
range_index: None,
relation_index: None,
tries: HashMap::new(),
};
rayon::scope(|s| {
s.spawn(|_| {
match RangeIndex::build(reader_factory(), by, Some(&mp)) {
Ok(idx) => ret.range_index = Some(idx),
Err(e) => errors.lock().unwrap().push(e),
};
});
s.spawn(|_| {
match RelationIndex::build(reader_factory(), Some(&mp)) {
Ok(idx) => ret.relation_index = Some(idx),
Err(e) => errors.lock().unwrap().push(e),
};
});
for (_, get_attr, trie, pg) in trie_builders.iter_mut() {
let idx = unsafe { trie.0.get().as_mut().unwrap() };
let get_attr = get_attr.take().unwrap();
trie_broadcast.add_channel(Box::new(move |offset, chunk| {
for (o, line) in chunk {
idx.process_line(*o, line, get_attr);
}
pg.set_position(offset);
}));
}
s.spawn(move |_| {
trie_broadcast.run().unwrap();
});
});
let mut errors = errors.into_inner().unwrap();
if errors.len() == 1 {
Err(errors.pop().unwrap())
} else if !errors.is_empty() {
Err(Error::Multiple(errors))
} else {
for (name, _, trie, pg) in trie_builders {
pg.finish();
ret.tries.insert(name.to_string(), trie.0.into_inner());
}
Ok(ret)
}
}
}

179
src/index/range.rs Normal file
View file

@ -0,0 +1,179 @@
use std::borrow::Borrow;
use indicatif::{MultiProgress, ProgressBar};
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use crate::{
bar::style_bar,
gff3::{Gff3Line, Gff3Read},
Error,
};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RangeIndex {
by_seq: Vec<(String, SeqRangeIndex)>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SeqRangeIndex {
by_range: Vec<(u64, u64)>,
}
pub const DEFAULT_RANGE_INTERVAL: u64 = 2048;
impl RangeIndex {
pub fn query(&self, seqid: &str, begin: u64, end: u64) -> Option<(u64, u64)> {
self.by_seq
.iter()
.find(|(id, _)| id == seqid)
.and_then(|(_, idx)| {
idx.by_range
.iter()
.tuple_windows()
.filter_map(|((prev, prev_offset), (split_point, offset))| {
((prev..split_point).contains(&&begin)
|| (prev..split_point).contains(&&end)
|| (begin..end).contains(prev)
|| (begin..end).contains(split_point))
.then_some((prev_offset, offset))
})
.fold(None, |prev, (start, end)| {
if let Some((prev_start, prev_end)) = prev {
Some((
std::cmp::min(prev_start, *start),
std::cmp::max(prev_end, *end),
))
} else {
Some((*start, *end))
}
})
.and_then(|(start, end)| {
if start - end > 0 {
Some((start, end))
} else {
None
}
})
})
}
pub fn query_lines<R, E, F>(
&self,
reader: &mut R,
seqid_in: &str,
begin: u64,
end: u64,
cb: &mut F,
) -> Result<(), Error<E>>
where
R: Gff3Read<E>,
E: std::error::Error,
F: FnMut(&Gff3Line),
{
let (start_off, end_off) = match self.query(seqid_in, begin, end) {
Some((start, end)) => (start, end),
None => return Ok(()),
};
reader.seek_to(start_off)?;
while reader.offset().ok_or(Error::Unseekable)? < end_off {
let line = match reader.read_line()? {
Some(line) => line,
None => break,
};
#[allow(unused_variables)]
if let Gff3Line::Feature {
ref seqid,
ref source,
ref type_,
start,
end,
score,
strand,
phase,
ref attributes,
} = line
{
if seqid == seqid_in
&& ((start..end).contains(&begin)
|| (start..end).contains(&end)
|| (begin..end).contains(&start)
|| (begin..end).contains(&end))
{
cb(&line);
}
}
}
Ok(())
}
pub fn build<R, E>(mut reader: R, by: u64, mp: Option<&MultiProgress>) -> Result<Self, Error<E>>
where
R: Gff3Read<E>,
E: std::error::Error,
{
let mut idx = RangeIndex { by_seq: Vec::new() };
let mut last_seqid = None;
let mut last_start = 0;
let prog = if let Some(size) = reader.size() {
let pg = ProgressBar::new(size as u64);
style_bar(&pg, true);
pg
} else {
let pg = ProgressBar::new_spinner();
style_bar(&pg, false);
pg
};
prog.set_prefix("Range Index");
let prog = mp.map(move |mp| mp.add(prog));
loop {
let line = match reader.read_line()? {
Some(line) => line,
None => break,
};
#[allow(unused_variables)]
if let Gff3Line::Feature {
ref seqid,
ref source,
ref type_,
start,
end,
score,
strand,
phase,
ref attributes,
} = line
{
if last_seqid.as_deref() != Some(seqid.borrow()) {
last_seqid = Some(seqid.to_string());
last_start = 0;
idx.by_seq.push((
seqid.to_string(),
SeqRangeIndex {
by_range: vec![(0, reader.offset().ok_or(Error::Unseekable)?)],
},
));
}
if start - last_start > by {
idx.by_seq
.last_mut()
.unwrap()
.1
.by_range
.push((start, reader.offset().ok_or(Error::Unseekable)?));
last_start = start;
}
}
let offset = reader.offset().ok_or(Error::Unseekable)?;
if let Some(prog) = prog.as_ref() {
prog.set_position(offset)
}
}
Ok(idx)
}
}

272
src/index/relation.rs Normal file
View file

@ -0,0 +1,272 @@
use std::borrow::Borrow;
use hashbrown::HashMap;
use indicatif::{MultiProgress, ProgressBar};
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use crate::{
bar::style_bar,
ds::tree::{FindChild, ModifyChild, Node, Tree},
gff3::{Gff3Line, Gff3Read},
Error,
};
#[derive(Debug, Clone)]
pub struct RelationChildrenHolder<T>
where
T: Default + Serialize + DeserializeOwned,
{
by_id: HashMap<String, Node<T, String, Self>>,
}
impl<T> Serialize for RelationChildrenHolder<T>
where
T: Default + Serialize + DeserializeOwned,
{
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
self.by_id.serialize(serializer)
}
}
impl<'de, T> Deserialize<'de> for RelationChildrenHolder<T>
where
T: Default + Serialize + DeserializeOwned,
{
fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
Ok(Self {
by_id: Deserialize::deserialize(deserializer)?,
})
}
}
impl<T> Default for RelationChildrenHolder<T>
where
T: Default + Serialize + DeserializeOwned,
{
fn default() -> Self {
Self {
by_id: HashMap::new(),
}
}
}
impl<T> FindChild<T, String> for RelationChildrenHolder<T>
where
T: Default + Serialize + DeserializeOwned,
{
#[inline(always)]
fn find_child(&self, index: &String) -> Option<&Node<T, String, Self>> {
self.by_id.get(index)
}
#[inline(always)]
fn find_child_mut(&mut self, index: &String) -> Option<&mut Node<T, String, Self>> {
self.by_id.get_mut(index)
}
#[inline(always)]
fn iter_keys<F: FnMut(&String) -> bool>(&self, f: &mut F) {
self.by_id.keys().all(f);
}
#[inline(always)]
fn n_children(&self) -> usize {
self.by_id.len()
}
}
impl<T> ModifyChild<T, String> for RelationChildrenHolder<T>
where
T: Default + Serialize + DeserializeOwned,
{
#[inline(always)]
fn add_child(&mut self, index: String, node: Node<T, String, Self>) {
self.by_id.insert(index, node);
}
#[inline(always)]
fn remove_child(&mut self, index: &String) {
self.by_id.remove(index);
}
}
pub type RelationTree = Tree<u64, String, RelationChildrenHolder<u64>>;
#[derive(Debug, Clone)]
pub struct RelationIndex {
pub path_by_id: HashMap<String, Vec<String>>,
pub tree_by_seq: Vec<(String, RelationTree)>,
}
impl Serialize for RelationIndex {
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
self.tree_by_seq.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for RelationIndex {
fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
Ok(Self {
path_by_id: HashMap::new(),
tree_by_seq: Deserialize::deserialize(deserializer)?,
})
}
}
impl RelationIndex {
#[inline(always)]
pub fn list_seqids(&self) -> impl Iterator<Item = &String> {
self.tree_by_seq.iter().map(|(seqid, _)| seqid)
}
pub fn traverse_children<F, R, E>(
&self,
reader: &mut R,
seqid: &str,
id: &str,
mut f: F,
) -> Result<(), Error<E>>
where
R: Gff3Read<E>,
E: std::error::Error,
F: FnMut(&Gff3Line),
{
let tree = self
.tree_by_seq
.iter()
.find(|(id, _)| id == seqid)
.ok_or(Error::MissingFeature(seqid.to_string()))?;
match tree.1.traverse(
self.path_by_id
.get(id)
.ok_or(Error::MissingFeature(id.to_string()))?
.iter()
.skip(1),
) {
Some(node) => node,
None => return Ok(()),
}
.for_each_descendant(|node| {
let offset = node.payload;
reader.seek_to(offset)?;
let line: Gff3Line = reader.read_line()?.ok_or(Error::<E>::UnexpectedEof)?;
f(&line);
Result::<_, Error<E>>::Ok(())
})?;
Ok(())
}
pub fn reconstruct_path(&mut self) {
let (out, tree) = (&mut self.path_by_id, &self.tree_by_seq);
for (_, tree) in tree {
tree.for_each(|path, _| {
let id = path.last().unwrap();
out.insert(id.to_string(), path.to_vec());
Ok::<_, ()>(())
})
.ok();
}
}
pub fn build<R, E>(mut reader: R, mp: Option<&MultiProgress>) -> Result<Self, Error<E>>
where
R: Gff3Read<E>,
E: std::error::Error,
{
let mut idx = Self {
path_by_id: HashMap::new(),
tree_by_seq: Vec::new(),
};
let mut last_seqid = None;
let prog = if let Some(size) = reader.size() {
let pg = ProgressBar::new(size as u64);
style_bar(&pg, true);
pg
} else {
let pg = ProgressBar::new_spinner();
style_bar(&pg, false);
pg
};
prog.set_prefix("Relation Index");
mp.map(move |mp| mp.add(prog));
loop {
let line = match reader.read_line()? {
Some(line) => line.to_static(),
None => break,
};
#[allow(unused_variables)]
if let crate::gff3::Gff3Line::Feature {
ref seqid,
ref source,
ref type_,
start,
end,
score,
strand,
phase,
ref attributes,
} = line
{
let offset = reader.offset().ok_or(Error::Unseekable)?;
let id = line
.get_attr("ID")
.ok_or_else(|| Error::MissingAttribute("ID".to_string()))?;
if last_seqid.as_deref() != Some(seqid.borrow()) {
last_seqid = Some(seqid.to_string());
idx.path_by_id.insert(id.to_string(), vec![]);
idx.tree_by_seq.push((
seqid.to_string(),
Tree::with_root(Node::new(offset, Default::default())),
));
}
let parent_id = line.get_attr("Parent");
if parent_id.is_none() {
idx.path_by_id.entry_ref(id).insert(vec![id.to_string()]);
idx.tree_by_seq
.last_mut()
.unwrap()
.1
.root
.as_mut()
.unwrap()
.children
.by_id
.insert(id.to_string(), Node::new(offset, Default::default()));
continue;
}
let parent_id = parent_id.unwrap();
let mut path = idx
.path_by_id
.get_mut(parent_id)
.ok_or_else(|| Error::MissingFeature(parent_id.to_string()))?
.clone();
idx.tree_by_seq
.last_mut()
.unwrap()
.1
.traverse_mut(path.iter())
.unwrap()
.push(id.to_string(), Node::new(offset, Default::default()));
path.push(id.to_string());
idx.path_by_id.insert(id.to_string(), path);
}
}
Ok(idx)
}
}

80
src/index/trie.rs Normal file
View file

@ -0,0 +1,80 @@
use std::hash::Hash;
use hashbrown::HashMap;
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use crate::{
ds::trie::{ToTrieKey, Trie},
gff3::Gff3Line,
};
#[derive(Debug, Clone)]
pub struct TrieIndex<C>
where
C: Eq + Hash + Serialize + DeserializeOwned,
{
pub by: String,
// HashMap<Sequence ID, Trie>
pub trie: HashMap<String, Trie<u64, C>>,
}
impl<C> Serialize for TrieIndex<C>
where
C: Eq + Hash + Serialize + DeserializeOwned,
{
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
(&self.by, &self.trie).serialize(serializer)
}
}
impl<'de, C> Deserialize<'de> for TrieIndex<C>
where
C: Eq + Hash + Serialize + DeserializeOwned,
{
fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
let (by, trie) = Deserialize::deserialize(deserializer)?;
Ok(Self { by, trie })
}
}
impl<C> TrieIndex<C>
where
C: Eq + Hash + Clone + Serialize + DeserializeOwned,
{
pub fn new(by: String) -> Self {
Self {
by,
trie: HashMap::new(),
}
}
#[inline(always)]
pub fn for_each_seqid<F>(&self, mut f: F)
where
F: FnMut(&str, &Trie<u64, C>),
{
for (seqid, trie) in &self.trie {
f(seqid, trie);
}
}
#[inline(always)]
pub fn process_line<I>(
&mut self,
offset: u64,
line: &Gff3Line,
get_attr: impl for<'b> Fn(&'b Gff3Line) -> Option<&'b I>,
) where
for<'c> &'c I: ToTrieKey<C>,
I: ?Sized,
{
if let Gff3Line::Feature { ref seqid, .. } = line {
if let Some(attr) = get_attr(line) {
let trie = self
.trie
.entry(seqid.to_string())
.or_insert_with(Trie::default);
trie.insert(attr, offset);
}
}
}
}

554
src/io/file.rs Normal file
View file

@ -0,0 +1,554 @@
#[cfg(unix)]
use libc::c_int;
#[cfg(unix)]
use std::os::fd::AsRawFd;
use std::{
error::Error,
io::{self, Read, Seek, Write},
ops::Deref,
};
use crate::{
impl_deserialize_for_copy,
serdes::{Deserialize, DeserializeOwned, Serialize},
};
// handle for memory-mapped heap objects
#[derive(Debug, Clone, Copy)]
pub struct Handle<'a, T> {
ptr: *const T,
_marker: std::marker::PhantomData<&'a T>,
}
impl<'a, T> Deref for Handle<'a, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
unsafe { &*self.ptr }
}
}
pub struct MemoryMap<'a> {
ptr: *const u8,
size: usize,
cleanup: Option<Box<dyn FnOnce() + 'a>>,
}
impl<'a> MemoryMap<'a> {
/// # Safety
/// `ptr` must be a valid pointer to a memory-mapped region of size at least `size`.
pub unsafe fn new(ptr: *const u8, size: usize) -> Self {
Self {
ptr,
size,
cleanup: None,
}
}
/// # Safety
/// `fd` must be a valid file descriptor.
/// `offset` must be a valid offset into the file.
/// `size` must be a valid size for the memory-mapped region.
#[cfg(unix)]
pub unsafe fn from_file<F>(fd: &'a F, offset: usize, size: usize) -> Result<Self, c_int>
where
F: AsRawFd,
{
let ptr = unsafe {
libc::mmap(
std::ptr::null_mut(),
size,
libc::PROT_READ,
libc::MAP_PRIVATE,
fd.as_raw_fd(),
offset as libc::off_t,
)
};
if ptr == libc::MAP_FAILED {
return Err(*libc::__errno_location());
}
Ok(Self {
ptr: ptr as _,
size,
cleanup: Some(Box::new(move || unsafe {
libc::munmap(ptr, size);
})),
})
}
/// # Safety
/// `rec` must be a valid `HeapObjectRec` for this memory map.
/// `rec.offset` must be a valid offset into the memory-mapped region.
pub unsafe fn get_handle<T>(&self, rec: &HeapObjectRec) -> Handle<'a, T> {
debug_assert!(rec.offset as usize <= self.size);
Handle {
ptr: unsafe { self.ptr.add(rec.offset as usize) as *const T },
_marker: std::marker::PhantomData,
}
}
}
impl<'a> Drop for MemoryMap<'a> {
fn drop(&mut self) {
if let Some(cleanup) = self.cleanup.take() {
cleanup();
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct HeapObjectRec {
pub offset: u64,
}
impl<E> DeserializeOwned<E> for HeapObjectRec
where
E: Error + From<io::Error>,
{
type Output = Self;
fn deserialize<R>(reader: &mut R) -> Result<Self, E>
where
R: Read,
{
let offset = <u64 as DeserializeOwned<E>>::deserialize(reader)?;
Ok(Self { offset })
}
}
impl<E> Serialize<E> for HeapObjectRec
where
E: Error + From<io::Error>,
{
fn serialize<W>(&self, writer: &mut W) -> Result<(), E>
where
W: Write,
{
self.offset.serialize(writer)?;
Ok(())
}
}
impl_deserialize_for_copy!(HeapObjectRec);
pub struct BufIOStreamRead<R>
where
R: Read,
{
pub(crate) reader: io::BufReader<R>,
}
impl<R> StreamRead<io::Error> for BufIOStreamRead<R>
where
R: Read,
{
fn borrow_read(&mut self) -> &mut impl Read {
&mut self.reader
}
fn read_object<T: DeserializeOwned<io::Error>>(
&mut self,
) -> Result<<T as DeserializeOwned<io::Error>>::Output, io::Error> {
T::deserialize(&mut self.reader)
}
fn read_object_ref<T: Deserialize<io::Error>>(
&mut self,
object: &mut T,
) -> Result<(), io::Error> {
object.deserialize(&mut self.reader)
}
}
pub trait StreamRead<E>
where
E: Error + From<io::Error>,
{
fn borrow_read(&mut self) -> &mut impl Read;
fn read_object<T: DeserializeOwned<E>>(
&mut self,
) -> Result<<T as DeserializeOwned<E>>::Output, E>;
fn read_object_ref<T: Deserialize<E>>(&mut self, object: &mut T) -> Result<(), E>;
}
pub trait HandleGet<E>
where
E: Error + From<io::Error>,
{
fn get_handle<'a, T>(&self) -> Result<Handle<'a, T>, E>
where
T: Deserialize<E>;
}
pub trait HeapRead<E>
where
E: Error + From<io::Error>,
Self: StreamRead<E>,
{
fn offset(&mut self) -> Result<u64, E>;
fn seek_to(&mut self, pos: u64) -> Result<(), E>;
}
pub struct IOHeapRead<R>
where
R: Read + Seek,
{
pub(crate) reader: io::BufReader<R>,
}
impl<R> StreamRead<io::Error> for IOHeapRead<R>
where
R: Read + Seek,
{
fn borrow_read(&mut self) -> &mut impl Read {
&mut self.reader
}
fn read_object<T: DeserializeOwned<io::Error>>(
&mut self,
) -> Result<<T as DeserializeOwned<io::Error>>::Output, io::Error> {
T::deserialize(&mut self.reader)
}
fn read_object_ref<T: Deserialize<io::Error>>(
&mut self,
object: &mut T,
) -> Result<(), io::Error> {
object.deserialize(&mut self.reader)
}
}
impl<R> HeapRead<io::Error> for IOHeapRead<R>
where
R: Read + Seek,
{
fn offset(&mut self) -> Result<u64, io::Error> {
self.reader.stream_position().map_err(Into::into)
}
fn seek_to(&mut self, pos: u64) -> Result<(), io::Error> {
self.reader
.seek(io::SeekFrom::Start(pos))
.map_err(Into::into)
.map(|_| ())
}
}
pub struct StreamWithHeapRead<RS, RH, E>
where
E: Error + From<io::Error>,
RS: StreamRead<E>,
RH: HeapRead<E>,
{
pub(crate) stream: RS,
pub(crate) heap: RH,
_marker: std::marker::PhantomData<E>,
}
impl<RS, RH, E> StreamWithHeapRead<RS, RH, E>
where
E: Error + From<io::Error>,
RS: StreamRead<E>,
RH: HeapRead<E>,
{
pub fn new(stream: RS, heap: RH) -> Self {
Self {
stream,
heap,
_marker: std::marker::PhantomData,
}
}
pub fn read_stream<T: DeserializeOwned<E>>(
&mut self,
) -> Result<<T as DeserializeOwned<E>>::Output, E> {
self.stream.read_object::<T>()
}
pub fn read_stream_ref<T: Deserialize<E>>(&mut self, object: &'_ mut T) -> Result<(), E> {
self.stream.read_object_ref(object)
}
pub fn read_heap<T: DeserializeOwned<E>>(
&mut self,
) -> Result<<T as DeserializeOwned<E>>::Output, E> {
let mut rec = HeapObjectRec { offset: 0 };
self.read_stream_ref(&mut rec)?;
self.read_heap_at::<T>(&rec)
}
pub fn read_heap_at<T: DeserializeOwned<E>>(
&mut self,
rec: &HeapObjectRec,
) -> Result<<T as DeserializeOwned<E>>::Output, E> {
self.heap.seek_to(rec.offset)?;
#[cfg(debug_assertions)]
{
let pos = self.heap.offset().expect("Failed to get offset");
assert_eq!(pos, rec.offset);
}
T::deserialize(&mut self.heap.borrow_read())
}
pub fn read_heap_ref<T: Deserialize<E>>(&mut self, object: &'_ mut T) -> Result<(), E> {
let mut rec = HeapObjectRec { offset: 0 };
self.read_stream_ref(&mut rec)?;
self.read_heap_ref_at(object, &rec)
}
pub fn read_heap_ref_at<T: Deserialize<E>>(
&mut self,
object: &'_ mut T,
rec: &HeapObjectRec,
) -> Result<(), E> {
self.heap.seek_to(rec.offset)?;
#[cfg(debug_assertions)]
{
let pos = self.heap.offset().expect("Failed to get offset");
assert_eq!(pos, rec.offset);
}
object.deserialize(&mut self.heap.borrow_read())
}
}
impl StreamWithHeapRead<BufIOStreamRead<std::fs::File>, IOHeapRead<std::fs::File>, io::Error> {
pub fn new_by_basename<EXT: AsRef<str>>(basename: &str) -> io::Result<Self> {
let stream = BufIOStreamRead {
reader: io::BufReader::new(std::fs::File::open(basename)?),
};
let heap = IOHeapRead {
reader: io::BufReader::new(std::fs::File::open(format!("{}.heap", basename))?),
};
Ok(Self::new(stream, heap))
}
}
pub trait StreamWrite<E>
where
E: Error + From<io::Error>,
{
fn borrow_write(&mut self) -> &mut impl Write;
fn write_object<T: Serialize<E>>(&mut self, object: &T) -> Result<(), E>;
}
#[allow(dead_code)]
pub struct BufIOStreamWrite<W>
where
W: Write,
{
writer: io::BufWriter<W>,
}
impl<W> StreamWrite<io::Error> for BufIOStreamWrite<W>
where
W: Write,
{
fn borrow_write(&mut self) -> &mut impl Write {
&mut self.writer
}
fn write_object<T: Serialize<io::Error>>(&mut self, object: &T) -> Result<(), io::Error> {
object.serialize(&mut self.writer)
}
}
pub trait HeapWrite<E>
where
E: Error + From<io::Error>,
Self: StreamWrite<E>,
{
fn offset(&mut self) -> Result<u64, E>;
fn seek_to(&mut self, pos: u64) -> Result<(), E>;
}
#[allow(dead_code)]
pub struct IOHeapWrite<W>
where
W: Write,
{
writer: io::BufWriter<W>,
}
impl<W> StreamWrite<io::Error> for IOHeapWrite<W>
where
W: Write,
{
fn borrow_write(&mut self) -> &mut impl Write {
&mut self.writer
}
fn write_object<T: Serialize<io::Error>>(&mut self, object: &T) -> Result<(), io::Error> {
object.serialize(&mut self.writer)
}
}
impl<W> HeapWrite<io::Error> for IOHeapWrite<W>
where
W: Write + Seek,
{
fn offset(&mut self) -> Result<u64, io::Error> {
self.writer.stream_position().map_err(Into::into)
}
fn seek_to(&mut self, pos: u64) -> Result<(), io::Error> {
self.writer
.seek(io::SeekFrom::Start(pos))
.map_err(Into::into)
.map(|_| ())
}
}
pub struct StreamWithHeapWrite<WS, WH, E>
where
E: Error + From<io::Error>,
WS: StreamWrite<E>,
WH: HeapWrite<E>,
{
stream: WS,
heap: WH,
_marker: std::marker::PhantomData<E>,
}
impl<WS, WH, E> StreamWithHeapWrite<WS, WH, E>
where
E: Error + From<io::Error>,
WS: StreamWrite<E>,
WH: HeapWrite<E>,
{
pub fn new(stream: WS, heap: WH) -> Self {
Self {
stream,
heap,
_marker: std::marker::PhantomData,
}
}
pub fn write_stream<T: Serialize<E>>(&mut self, object: &T) -> Result<(), E> {
self.stream.write_object(object)
}
pub fn write_heap<T: Serialize<E>>(&mut self, object: &T) -> Result<(), E> {
let offset = self.heap.offset()?;
self.write_stream(&HeapObjectRec { offset })?;
self.heap.write_object(object)?;
Ok(())
}
pub fn write_heap_dangling<T: Serialize<E>>(&mut self, object: &T) -> Result<HeapObjectRec, E> {
let offset = self.heap.offset()?;
self.heap.write_object(object)?;
Ok(HeapObjectRec { offset })
}
}
pub trait HeapedSerialize<RS, RH, E>
where
RS: StreamWrite<E>,
RH: HeapWrite<E>,
E: Error + From<io::Error>,
{
fn serialize(&self, writer: &mut StreamWithHeapWrite<RS, RH, E>) -> Result<(), E>;
}
pub trait HeapedDeserializeOwned<WS, WH, E>
where
WS: StreamRead<E>,
WH: HeapRead<E>,
E: Error + From<io::Error>,
{
fn deserialize(reader: &mut StreamWithHeapRead<WS, WH, E>) -> Result<Self, E>
where
Self: Sized;
}
pub trait HeapedDeserialize<WS, WH, E>
where
WS: StreamRead<E>,
WH: HeapRead<E>,
E: Error + From<io::Error>,
{
fn deserialize(&mut self, reader: &mut StreamWithHeapRead<WS, WH, E>) -> Result<(), E>;
}
#[cfg(test)]
mod tests {
use super::*;
use crate::check_serdes_consistency;
#[test]
fn test_serdes() {
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct Test {
on_stream: u8,
on_heap: u8,
none: Option<u8>,
option: Option<u8>,
}
let mut buf = Vec::new();
let mut heap = Vec::new();
let writer = BufIOStreamWrite {
writer: io::BufWriter::new(std::io::Cursor::new(&mut buf)),
};
let heap_writer = IOHeapWrite {
writer: io::BufWriter::new(std::io::Cursor::new(&mut heap)),
};
let mut combined = StreamWithHeapWrite::new(writer, heap_writer);
let test = Test {
on_stream: 1,
on_heap: 2,
none: None,
option: Some(3),
};
combined.write_stream(&test.on_stream).unwrap();
combined.write_heap(&test.on_heap).unwrap();
combined.write_heap(&test.none).unwrap();
combined.write_heap(&test.option).unwrap();
drop(combined);
let reader = BufIOStreamRead {
reader: io::BufReader::new(std::io::Cursor::new(&buf)),
};
let heap_reader = IOHeapRead {
reader: io::BufReader::new(std::io::Cursor::new(&heap)),
};
let mut combined = StreamWithHeapRead::new(reader, heap_reader);
let mut loopback = Test {
on_stream: 0,
on_heap: 0,
none: None,
option: None,
};
combined.read_stream_ref(&mut loopback.on_stream).unwrap();
combined.read_heap_ref(&mut loopback.on_heap).unwrap();
assert_eq!(combined.heap.offset().unwrap(), 1);
combined.read_heap_ref(&mut loopback.none).unwrap();
assert_eq!(combined.heap.offset().unwrap(), 2);
combined.read_heap_ref(&mut loopback.option).unwrap();
assert_eq!(test, loopback);
}
#[cfg(any(unix, target_os = "wasi"))]
#[test]
fn test_mmap() {
use std::fs::File;
let mut f = File::create("foo.txt").unwrap();
f.write_all(b"Hello, world!").unwrap();
drop(f);
let f = File::open("foo.txt").unwrap();
let mmap = unsafe { MemoryMap::from_file(&f, 0, 13).unwrap() };
let handle: Handle<u8> = unsafe { mmap.get_handle(&HeapObjectRec { offset: 1 }) };
assert_eq!(
unsafe { std::slice::from_raw_parts(handle.ptr, 12) },
b"ello, world!"
);
drop(mmap);
std::fs::remove_file("foo.txt").unwrap();
}
#[test]
fn test_heap_rec_serdes() {
check_serdes_consistency!(HeapObjectRec { offset: 1 });
}
}

114
src/io/mod.rs Normal file
View file

@ -0,0 +1,114 @@
use std::cell::RefCell;
use std::io::{self, Read};
use std::io::{Seek, Write};
pub mod file;
pub mod stream;
pub mod tee;
pub fn humanize_size(bytes: usize) -> String {
let units = ["B", "KB", "MB", "GB"];
let mut rem = bytes;
let unit = units
.iter()
.find(|_| {
if rem < 1024 {
true
} else {
rem >>= 10;
false
}
})
.unwrap_or(&"TB");
format!("{:.2} {}", rem as f64, unit)
}
pub struct CountingReader<R> {
reader: R,
count: RefCell<usize>,
}
impl<R> CountingReader<R> {
pub fn new(reader: R) -> Self {
Self {
reader,
count: RefCell::new(0),
}
}
pub fn count(&self) -> usize {
*self.count.borrow()
}
pub fn reset_count(&self) {
*self.count.borrow_mut() = 0;
}
}
impl<R> Read for CountingReader<R>
where
R: Read,
{
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let count = self.reader.read(buf)?;
*self.count.borrow_mut() += count;
Ok(count)
}
}
impl<R> Seek for CountingReader<R>
where
R: Seek,
{
fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
self.reader.seek(pos)
}
}
pub struct CountingWriter<W> {
writer: W,
count: usize,
}
impl<W> CountingWriter<W> {
pub fn new(writer: W) -> Self {
Self { writer, count: 0 }
}
pub fn count(&self) -> usize {
self.count
}
}
impl<W> Write for CountingWriter<W>
where
W: Write,
{
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
let count = self.writer.write(buf)?;
self.count += count;
Ok(count)
}
fn flush(&mut self) -> io::Result<()> {
self.writer.flush()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_humanize_size() {
assert_eq!(humanize_size(0), "0.00 B");
assert_eq!(humanize_size(1), "1.00 B");
assert_eq!(humanize_size(1024), "1.00 KB");
assert_eq!(humanize_size(1024 * 1024), "1.00 MB");
assert_eq!(humanize_size(1024 * 1024 * 1024), "1.00 GB");
assert_eq!(humanize_size(1024 * 1024 * 1024 * 1024 * 2), "2.00 TB");
}
}

207
src/io/stream.rs Normal file
View file

@ -0,0 +1,207 @@
use std::io::{self, BufRead, Read, Seek, Write};
use crate::{
gff3::{Gff3Line, Gff3Read, Gff3Write},
Error,
};
pub struct Gff3StreamReader<R>
where
R: Read,
{
reader: io::BufReader<R>,
size: Option<usize>,
line_buffer: String,
}
impl<R> Gff3StreamReader<R>
where
R: Read,
{
pub fn new(reader: R) -> Self {
Self {
reader: io::BufReader::new(reader),
size: None,
line_buffer: String::with_capacity(1024),
}
}
pub fn new_with_size(reader: R, size: usize) -> Self {
Self {
reader: io::BufReader::new(reader),
size: Some(size),
line_buffer: String::with_capacity(1024),
}
}
pub fn open(reader: R, size: usize) -> Self {
Self {
reader: io::BufReader::new(reader),
size: Some(size),
line_buffer: String::with_capacity(1024),
}
}
pub(crate) fn reset_buffer(&mut self) {
self.line_buffer.clear();
if self.line_buffer.capacity() > 4096 {
self.line_buffer.shrink_to(1024);
}
}
}
impl<R> Gff3Read<io::Error> for Gff3StreamReader<R>
where
R: Read,
{
fn size(&self) -> Option<usize> {
self.size
}
fn read_line(&mut self) -> Result<Option<Gff3Line>, Error<io::Error>> {
self.reset_buffer();
if self
.reader
.read_line(&mut self.line_buffer)
.map_err(Error::Io)?
> 0
{
let line = &mut self.line_buffer;
if line.ends_with('\n') {
line.pop();
if line.ends_with('\r') {
line.pop();
}
}
let parsed = Gff3Line::parse_str(line).map_err(Error::Parse)?;
Ok(Some(parsed))
} else {
Ok(None)
}
}
}
pub struct Gff3SeekableStreamReader<R>
where
R: Read + Seek,
{
reader: io::BufReader<R>,
size: Option<usize>,
line_buffer: String,
}
impl<R> Gff3SeekableStreamReader<R>
where
R: Read + Seek,
{
pub fn new(reader: R) -> Self {
Self {
reader: io::BufReader::new(reader),
size: None,
line_buffer: String::with_capacity(1024),
}
}
pub fn borrow_reader(&self) -> &R {
self.reader.get_ref()
}
pub fn new_with_size(reader: R, size: usize) -> Self {
Self {
reader: io::BufReader::new(reader),
size: Some(size),
line_buffer: String::with_capacity(1024),
}
}
pub fn open(reader: R) -> Result<Self, io::Error> {
let mut reader = io::BufReader::new(reader);
let mut size = None;
if let Ok(pos) = reader.seek(io::SeekFrom::End(0)) {
size = Some(pos as usize);
reader.seek(io::SeekFrom::Start(0))?;
}
Ok(Self {
reader,
size,
line_buffer: String::with_capacity(1024),
})
}
pub fn open_prebuffered(mut reader: io::BufReader<R>) -> Result<Self, io::Error> {
let mut size = None;
if let Ok(pos) = reader.seek(io::SeekFrom::End(0)) {
size = Some(pos as usize);
reader.seek(io::SeekFrom::Start(0))?;
}
Ok(Self {
reader,
size,
line_buffer: String::with_capacity(1024),
})
}
pub(crate) fn reset_buffer(&mut self) {
self.line_buffer.clear();
if self.line_buffer.capacity() > 4096 {
self.line_buffer.shrink_to(1024);
}
}
}
impl<R> Gff3Read<io::Error> for Gff3SeekableStreamReader<R>
where
R: Read + Seek,
{
fn size(&self) -> Option<usize> {
self.size
}
fn offset(&mut self) -> Option<u64> {
self.reader.stream_position().ok()
}
fn seek_to(&mut self, pos: u64) -> Result<Option<()>, Error<io::Error>> {
self.reader
.seek(io::SeekFrom::Start(pos))
.map(|_| Some(()))
.map_err(Error::Io)
}
fn read_line(&mut self) -> Result<Option<Gff3Line>, Error<io::Error>> {
self.reset_buffer();
if self
.reader
.read_line(&mut self.line_buffer)
.map_err(Error::Io)?
> 0
{
let line = &mut self.line_buffer;
if line.ends_with('\n') {
line.pop();
if line.ends_with('\r') {
line.pop();
}
}
let parsed = Gff3Line::parse_str(line).map_err(Error::Parse)?;
Ok(Some(parsed))
} else {
Ok(None)
}
}
}
pub struct Gff3Writer<W>
where
W: Write,
{
writer: io::BufWriter<W>,
}
impl<W> Gff3Writer<W>
where
W: Write,
{
pub fn new(writer: W) -> Self {
Self {
writer: io::BufWriter::new(writer),
}
}
}
impl<W> Gff3Write<io::Error> for Gff3Writer<W>
where
W: Write,
{
fn write_line(&mut self, line: &Gff3Line) -> Result<(), Error<std::io::Error>> {
writeln!(&mut self.writer, "{}", line).map_err(Error::Io)
}
}

61
src/io/tee.rs Normal file
View file

@ -0,0 +1,61 @@
use crate::{
gff3::{Gff3Line, Gff3Read},
Error,
};
pub struct Gff3BroadcastRead<'a, R, E>
where
E: std::error::Error,
R: Gff3Read<E>,
{
reader: R,
chunk_size: usize,
channels: Vec<ChannelFunc<'a, Gff3Line<'static>>>,
_marker: std::marker::PhantomData<E>,
}
pub type ChannelFunc<'a, O> = Box<dyn FnMut(u64, &[(u64, O)]) + Send + 'a>;
impl<'a, R, E> Gff3BroadcastRead<'a, R, E>
where
E: std::error::Error,
R: Gff3Read<E>,
{
pub fn new(reader: R, chunk_size: usize) -> Self {
Self {
reader,
chunk_size,
channels: Vec::new(),
_marker: std::marker::PhantomData,
}
}
pub fn add_channel(&mut self, channel: ChannelFunc<'a, Gff3Line<'static>>) {
self.channels.push(channel);
}
pub fn run(mut self) -> Result<(), Error<E>> {
let mut chunk = Vec::with_capacity(self.chunk_size);
loop {
let line = self.reader.read_line()?.map(|line| line.to_static());
match line {
Some(line) => {
let offset = self.reader.offset().ok_or(Error::Unseekable)?;
chunk.push((offset, line));
if chunk.len() >= self.chunk_size {
for tx in &mut self.channels {
tx(offset, &chunk);
}
chunk.clear();
}
}
None => {
for tx in &mut self.channels {
let offset = self.reader.offset().ok_or(Error::Unseekable)?;
tx(offset, &chunk);
}
break;
}
}
}
Ok(())
}
}

41
src/lib.rs Normal file
View file

@ -0,0 +1,41 @@
mod bar;
pub mod ds;
pub mod gff3;
pub mod index;
pub mod io;
pub mod macros;
pub mod serdes;
#[derive(thiserror::Error, Debug)]
pub enum Error<E> {
#[error("io error: {0}")]
Io(E),
#[error("utf8 error: {0}")]
Utf8(std::str::Utf8Error),
#[error("parse error: {0}")]
Parse(#[from] ParseErr),
#[error("multiple errors: {0}")]
Multiple(Vec<Error<E>>),
#[error("missing feature: {0}")]
MissingFeature(String),
#[error("missing attribute: {0}")]
MissingAttribute(String),
#[error("internal error: {0}")]
Internal(String),
#[error("unexpected eof")]
UnexpectedEof,
#[error("unseekable stream")]
Unseekable,
}
#[derive(thiserror::Error, Debug)]
pub enum ParseErr {
#[error("missing field: {0}")]
MissingField(&'static str),
#[error("parse int error: {0}")]
ParseInt(#[from] std::num::ParseIntError),
#[error("parse float error: {0}")]
ParseFloat(#[from] std::num::ParseFloatError),
#[error("parse bool error: {0}")]
ParseBool(#[from] std::str::ParseBoolError),
}

37
src/macros.rs Normal file
View file

@ -0,0 +1,37 @@
#[macro_export]
macro_rules! hashbrown_map {
($($key:expr => $value:expr),* $(,)?) => {
{
let mut map = ::hashbrown::HashMap::new();
$(
map.insert($key, $value);
)*
map
}
};
}
#[macro_export]
macro_rules! attr_trie_def {
($key:expr) => {
($key, |line: &gfidx::gff3::Gff3Line| line.get_attr($key))
};
[$($key:expr),* $(,)?] => {
[
$(
attr_trie_def!($key),
)*
]
};
}
#[macro_export]
macro_rules! unsafe_borrow {
(mut $x:expr) => {{
let ret = unsafe { &mut *(::std::ptr::addr_of!($x) as *mut _) };
ret
}};
($x:expr) => {
unsafe { &*(::std::ptr::addr_of!($x) as *const _) }
};
}

432
src/serdes/mod.rs Normal file
View file

@ -0,0 +1,432 @@
use std::{error::Error, io::Write};
use varint_rs::{VarintReader, VarintWriter};
use num_traits::PrimInt;
pub mod tree;
pub trait Serialize<E>
where
E: Error + From<std::io::Error>,
{
fn serialize<W>(&self, writer: &mut W) -> Result<(), E>
where
W: Write;
}
pub trait DeserializeOwned<E>
where
E: Error + From<std::io::Error>,
Self: Sized,
{
type Output;
fn deserialize<R>(reader: &mut R) -> Result<Self::Output, E>
where
R: std::io::Read;
}
pub trait Deserialize<E>
where
E: Error + From<std::io::Error>,
{
fn deserialize<R>(&mut self, reader: &mut R) -> Result<(), E>
where
R: std::io::Read;
}
impl<E> Serialize<E> for bool
where
E: Error + From<std::io::Error>,
{
fn serialize<W>(&self, writer: &mut W) -> Result<(), E>
where
W: Write,
{
writer.write_all(&[*self as u8])?;
Ok(())
}
}
impl<E> DeserializeOwned<E> for bool
where
E: Error + From<std::io::Error>,
{
type Output = bool;
fn deserialize<R>(reader: &mut R) -> Result<Self, E>
where
R: std::io::Read,
{
let mut buf = [0u8; 1];
reader.read_exact(&mut buf)?;
Ok(buf[0] != 0)
}
}
#[macro_export]
macro_rules! impl_deserialize_for_copy {
($( $type:ty ),*) => {
$(
impl<E> Deserialize<E> for $type
where
E: Error + From<std::io::Error>,
{
fn deserialize<R>(&mut self, reader: &mut R) -> Result<(), E>
where
R: std::io::Read,
{
*self = <Self as DeserializeOwned<E>>::deserialize(reader)?;
Ok(())
}
}
)*
};
}
#[test]
fn test_bool_serdes() {
let test_cases = [false, true];
for &test_case in test_cases.iter() {
crate::check_serdes_consistency!(test_case);
}
}
impl_deserialize_for_copy!(bool);
macro_rules! impl_numeric_serdes {
($( $type:ty ),*) => {
$(
impl<E> Serialize<E> for $type
where
E: Error + From<std::io::Error>,
{
fn serialize<W>(&self, writer: &mut W) -> Result<(), E>
where
W: Write,
{
writer.write_all(&self.to_le_bytes())?;
Ok(())
}
}
impl<E> DeserializeOwned<E> for $type
where
E: Error + From<std::io::Error>,
{
type Output = $type;
fn deserialize<R>(reader: &mut R) -> Result<Self, E>
where
R: std::io::Read,
{
let mut buf = [0; std::mem::size_of::<Self>()];
reader.read_exact(&mut buf)?;
Ok(Self::from_le_bytes(buf))
}
}
impl_deserialize_for_copy!($type);
)*
};
}
impl_numeric_serdes!(u8, u16, u32, u64, u128, i8, i16, i32, i64, i128);
impl<E, T> Serialize<E> for Option<T>
where
E: Error + From<std::io::Error>,
T: Serialize<E>,
{
fn serialize<W>(&self, writer: &mut W) -> Result<(), E>
where
W: Write,
{
match self {
Some(value) => {
true.serialize(writer)?;
value.serialize(writer)?;
}
None => false.serialize(writer)?,
}
Ok(())
}
}
impl<E, T, O> DeserializeOwned<E> for Option<T>
where
E: Error + From<std::io::Error>,
T: DeserializeOwned<E, Output = O>,
{
type Output = Option<O>;
fn deserialize<R>(reader: &mut R) -> Result<Self::Output, E>
where
R: std::io::Read,
{
let mut has_value = false;
has_value.deserialize(reader)?;
if has_value {
Ok(Some(<T as DeserializeOwned<E>>::deserialize(reader)?))
} else {
Ok(None)
}
}
}
impl<E, T> Deserialize<E> for Option<T>
where
E: Error + From<std::io::Error>,
T: Deserialize<E> + DeserializeOwned<E, Output = T>,
{
fn deserialize<R>(&mut self, reader: &mut R) -> Result<(), E>
where
R: std::io::Read,
{
let mut has_value = false;
has_value.deserialize(reader)?;
if has_value {
match self {
Some(value) => value.deserialize(reader)?,
None => {
*self = Some(<T as DeserializeOwned<E>>::deserialize(reader)?);
}
}
} else {
*self = None;
}
Ok(())
}
}
impl<E, T> Serialize<E> for &[T]
where
E: Error + From<std::io::Error>,
T: Serialize<E>,
{
fn serialize<W>(&self, writer: &mut W) -> Result<(), E>
where
W: Write,
{
(self.len() as u64).serialize(writer)?;
for item in self.iter() {
item.serialize(writer)?;
}
Ok(())
}
}
impl<E, T, O> DeserializeOwned<E> for Vec<T>
where
E: Error + From<std::io::Error>,
T: DeserializeOwned<E, Output = O>,
{
type Output = Vec<O>;
fn deserialize<R>(reader: &mut R) -> Result<Self::Output, E>
where
R: std::io::Read,
{
let len = <u64 as DeserializeOwned<E>>::deserialize(reader)?;
let mut result = Vec::with_capacity(len as usize);
for _ in 0..len {
result.push(<T as DeserializeOwned<E>>::deserialize(reader)?);
}
Ok(result)
}
}
impl<E, T> Deserialize<E> for Vec<T>
where
E: Error + From<std::io::Error>,
T: Deserialize<E> + DeserializeOwned<E, Output = T>,
{
fn deserialize<R>(&mut self, reader: &mut R) -> Result<(), E>
where
R: std::io::Read,
{
let len = <u64 as DeserializeOwned<E>>::deserialize(reader)?;
self.clear();
self.reserve(len as usize);
for _ in 0..len {
self.push(<T as DeserializeOwned<E>>::deserialize(reader)?);
}
Ok(())
}
}
#[test]
fn test_option_serdes() {
let test_cases: Vec<Option<u8>> = vec![None, Some(0), Some(1), Some(255)];
for &test_case in test_cases.iter() {
crate::check_serdes_consistency!(test_case);
}
}
#[macro_export]
macro_rules! check_serdes_consistency {
($input:expr) => {
let mut buf = Vec::new();
Serialize::<std::io::Error>::serialize(&$input, &mut buf).unwrap();
let mut output = Default::default();
fn hint_same_time<T>(_1: &T, _2: &T) {}
hint_same_time(&$input, &output);
Deserialize::<std::io::Error>::deserialize(&mut output, &mut std::io::Cursor::new(&buf))
.unwrap();
assert_eq!($input, output);
};
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct VarInt<N>(pub N)
where
N: PrimInt;
macro_rules! gen_varint_serdes {
($type:ty, $read_fn:ident, $write_fn:ident, $test_fn:ident) => {
impl<E> Serialize<E> for VarInt<$type>
where
E: Error + From<std::io::Error>,
$type: PrimInt,
{
fn serialize<W>(&self, writer: &mut W) -> Result<(), E>
where
W: Write,
{
VarintWriter::$write_fn(writer, self.0).map_err(E::from)
}
}
impl<E> DeserializeOwned<E> for VarInt<$type>
where
E: Error + From<std::io::Error>,
$type: PrimInt,
{
type Output = VarInt<$type>;
fn deserialize<R>(reader: &mut R) -> Result<Self, E>
where
R: std::io::Read,
{
Ok(VarInt(VarintReader::$read_fn(reader).map_err(E::from)?))
}
}
impl_deserialize_for_copy!(VarInt<$type>);
#[cfg(test)]
#[test]
fn $test_fn() {
let test_cases: Vec<i128> = vec![
-1,
-127,
-128,
-255,
-256,
-16383,
-16384,
-2097151,
-2097152,
-268435455,
-268435456,
0,
1,
127,
128,
255,
256,
16383,
16384,
2097151,
2097152,
268435455,
268435456,
34359738367,
34359738368,
4398046511103,
4398046511104,
562949953421311,
562949953421312,
72057594037927935,
72057594037927936,
9223372036854775807,
9223372036854775808,
];
for &test_case in test_cases.iter() {
if TryInto::<$type>::try_into(test_case).is_err() {
continue;
}
let mut buf = Vec::new();
Serialize::<std::io::Error>::serialize(&VarInt(test_case as $type), &mut buf)
.unwrap();
let result = <VarInt<$type> as DeserializeOwned<std::io::Error>>::deserialize(
&mut std::io::Cursor::new(&buf),
);
assert_eq!(VarInt(test_case as $type), result.unwrap());
let mut result = VarInt(0);
Deserialize::<std::io::Error>::deserialize(
&mut result,
&mut std::io::Cursor::new(&buf),
)
.unwrap();
assert_eq!(VarInt(test_case as $type), result);
}
}
};
}
gen_varint_serdes!(u8, read_u8_varint, write_u8_varint, test_u8_varint_serdes);
gen_varint_serdes!(
u16,
read_u16_varint,
write_u16_varint,
test_u16_varint_serdes
);
gen_varint_serdes!(
u32,
read_u32_varint,
write_u32_varint,
test_u32_varint_serdes
);
gen_varint_serdes!(
u64,
read_u64_varint,
write_u64_varint,
test_u64_varint_serdes
);
gen_varint_serdes!(
u128,
read_u128_varint,
write_u128_varint,
test_u128_varint_serdes
);
gen_varint_serdes!(i8, read_i8_varint, write_i8_varint, test_i8_varint_serdes);
gen_varint_serdes!(
i16,
read_i16_varint,
write_i16_varint,
test_i16_varint_serdes
);
gen_varint_serdes!(
i32,
read_i32_varint,
write_i32_varint,
test_i32_varint_serdes
);
gen_varint_serdes!(
i64,
read_i64_varint,
write_i64_varint,
test_i64_varint_serdes
);
gen_varint_serdes!(
i128,
read_i128_varint,
write_i128_varint,
test_i128_varint_serdes
);

1
src/serdes/tree.rs Normal file
View file

@ -0,0 +1 @@