557 lines
19 KiB
Rust
557 lines
19 KiB
Rust
|
use std::{env, ops::Range, vec};
|
||
|
|
||
|
use chumsky::prelude::*;
|
||
|
use quote::ToTokens;
|
||
|
use text::whitespace;
|
||
|
|
||
|
const FILES_TO_PARSE: &[&str] = &[
|
||
|
"jpeg",
|
||
|
"images",
|
||
|
"sgml",
|
||
|
"riff",
|
||
|
"animation",
|
||
|
"audio",
|
||
|
"matroska",
|
||
|
"vorbis",
|
||
|
"audio",
|
||
|
"msdos",
|
||
|
"webassembly",
|
||
|
"elf",
|
||
|
"mach",
|
||
|
];
|
||
|
|
||
|
const BLACKLISTED: &[&str] = &["fuji-", "canon-", "corel-", "dicom", "garmin"];
|
||
|
|
||
|
// remove extension entries not in this list from safe entries
|
||
|
const SAFE_EXTENSIONS: &[&str] = &[
|
||
|
".png", ".gif", ".jpeg", ".webp", ".avif", ".apng", ".bmp", ".tiff", ".x-icon", ".opus",
|
||
|
".ogg", ".mp4", ".m4v", ".3gpp", ".mpeg", ".webm", ".aac", ".flac", ".wav",
|
||
|
];
|
||
|
|
||
|
const SAFE_WHITELISTED: &[&str] = &[
|
||
|
"png", "gif", "jpeg", "webp", "avif", "apng", "bmp", "tiff", "x-icon", "opus", "ogg", "mp4",
|
||
|
"m4v", "3gpp", "mpeg", "webm", "aac", "flac", "wav", "svg", "rss",
|
||
|
];
|
||
|
|
||
|
// we want to have signatures for these to be able to detect them
|
||
|
const UNSAFE_WHITELISTED: &[&str] = &[
|
||
|
".exe",
|
||
|
".wasm",
|
||
|
"elf",
|
||
|
"mach",
|
||
|
"javascript",
|
||
|
"bios",
|
||
|
"firmware",
|
||
|
"driver",
|
||
|
"mpegurl",
|
||
|
];
|
||
|
|
||
|
fn static_signatures() -> Vec<MIMEAssociation> {
|
||
|
vec![
|
||
|
MIMEAssociation {
|
||
|
mime: "application/x-elf-executable".to_string().into(),
|
||
|
ext: vec![".pie".to_string(), ".elf".to_string(), ".so".to_string()],
|
||
|
safe: false,
|
||
|
signatures: vec![FlattenedFileSignature {
|
||
|
test: vec![0x7f, b'E', b'L', b'F'],
|
||
|
mask: vec![0xff, 0xff, 0xff, 0xff],
|
||
|
}],
|
||
|
},
|
||
|
MIMEAssociation {
|
||
|
mime: "application/x-mach-binary".to_string().into(),
|
||
|
ext: vec![".dylib".to_string(), ".bundle".to_string()],
|
||
|
safe: false,
|
||
|
signatures: vec![FlattenedFileSignature {
|
||
|
test: vec![0xfe, 0xed, 0xfa, 0xce],
|
||
|
mask: vec![0xff, 0xff, 0xff, 0xff],
|
||
|
}],
|
||
|
},
|
||
|
MIMEAssociation {
|
||
|
mime: "application/vnd.microsoft.portable-executable"
|
||
|
.to_string()
|
||
|
.into(),
|
||
|
ext: vec![".exe".to_string(), ".dll".to_string(), ".sys".to_string()],
|
||
|
safe: false,
|
||
|
signatures: vec![FlattenedFileSignature {
|
||
|
test: b"PE\0\0".to_vec(),
|
||
|
mask: vec![0xff, 0xff, 0xff, 0xff],
|
||
|
}],
|
||
|
},
|
||
|
]
|
||
|
}
|
||
|
|
||
|
#[derive(Debug, Clone)]
|
||
|
pub enum MagicFileLine {
|
||
|
Nop,
|
||
|
Unknown,
|
||
|
Magic {
|
||
|
indent: u8,
|
||
|
offset: u64,
|
||
|
ty: MagicType,
|
||
|
},
|
||
|
AssignAttr {
|
||
|
attr: String,
|
||
|
value: String,
|
||
|
},
|
||
|
}
|
||
|
|
||
|
#[derive(Debug, Clone)]
|
||
|
pub enum MagicType {
|
||
|
Unknown(String),
|
||
|
Belong {
|
||
|
test: Vec<u8>,
|
||
|
mask: Option<Vec<u8>>,
|
||
|
},
|
||
|
String {
|
||
|
test: Vec<u8>,
|
||
|
},
|
||
|
}
|
||
|
|
||
|
pub fn parse_string_repr() -> impl Parser<char, Vec<u8>, Error = Simple<char>> {
|
||
|
just('\\')
|
||
|
.ignore_then(choice((
|
||
|
just('\\').to(b'\\'),
|
||
|
just('n').to(b'\n'),
|
||
|
just('r').to(b'\r'),
|
||
|
just('t').to(b'\t'),
|
||
|
just('x').ignore_then(
|
||
|
one_of("0123456789abcdefABCDEF")
|
||
|
.repeated()
|
||
|
.exactly(2)
|
||
|
.map(|s| u8::from_str_radix(&s.iter().collect::<String>(), 16).unwrap()),
|
||
|
),
|
||
|
)))
|
||
|
.or(none_of("\\").map(|c| c as u8))
|
||
|
.repeated()
|
||
|
.at_least(1)
|
||
|
.map(|s| s.to_vec())
|
||
|
.then_ignore(end())
|
||
|
}
|
||
|
|
||
|
pub fn parse_hex_repr() -> impl Parser<char, Vec<u8>, Error = Simple<char>> {
|
||
|
just("0x")
|
||
|
.ignore_then(
|
||
|
one_of("0123456789abcdef")
|
||
|
.repeated()
|
||
|
.exactly(2)
|
||
|
.map(|s| u8::from_str_radix(&s.iter().collect::<String>(), 16).unwrap())
|
||
|
.repeated()
|
||
|
.at_least(1),
|
||
|
)
|
||
|
.map(|s| s.to_vec())
|
||
|
.then_ignore(end())
|
||
|
}
|
||
|
|
||
|
pub fn parse_magic_line() -> impl Parser<char, MagicFileLine, Error = Simple<char>> {
|
||
|
choice((
|
||
|
just('#')
|
||
|
.then_ignore(any().repeated())
|
||
|
.to(MagicFileLine::Nop),
|
||
|
just('>')
|
||
|
.repeated()
|
||
|
.map(|i| i.len() as u8)
|
||
|
.then(
|
||
|
one_of("0123456789")
|
||
|
.repeated()
|
||
|
.at_least(1)
|
||
|
.try_map(|s, span| {
|
||
|
s.iter()
|
||
|
.collect::<String>()
|
||
|
.parse::<u64>()
|
||
|
.map_err(|_| Simple::custom(span, "Failed to parse number"))
|
||
|
})
|
||
|
.or(just("0x").ignore_then(
|
||
|
one_of("0123456789abcdefABCDEF")
|
||
|
.repeated()
|
||
|
.at_least(1)
|
||
|
.try_map(|s, span| {
|
||
|
u64::from_str_radix(&s.iter().collect::<String>(), 16)
|
||
|
.map_err(|_| Simple::custom(span, "Failed to parse number"))
|
||
|
}),
|
||
|
)),
|
||
|
)
|
||
|
.then_ignore(whitespace().at_least(1))
|
||
|
.then(
|
||
|
none_of(" \t\n")
|
||
|
.repeated()
|
||
|
.at_least(1)
|
||
|
.map(String::from_iter),
|
||
|
)
|
||
|
.then_ignore(whitespace().at_least(1))
|
||
|
.then(
|
||
|
none_of(" \t\n")
|
||
|
.repeated()
|
||
|
.at_least(1)
|
||
|
.map(String::from_iter),
|
||
|
)
|
||
|
.try_map(|(((indent, offset), ty), rep), span: Range<usize>| {
|
||
|
Ok(MagicFileLine::Magic {
|
||
|
indent,
|
||
|
offset,
|
||
|
ty: match ty.as_str() {
|
||
|
"string" => MagicType::String {
|
||
|
test: parse_string_repr().parse(rep).map_err(|_| {
|
||
|
Simple::custom(span, "Failed to parse string pattern")
|
||
|
})?,
|
||
|
},
|
||
|
"belong" => MagicType::Belong {
|
||
|
test: parse_hex_repr()
|
||
|
.parse(rep)
|
||
|
.map_err(|_| Simple::custom(span, "Failed to parse hex pattern"))?,
|
||
|
mask: None,
|
||
|
},
|
||
|
s if s.starts_with("belong&") => {
|
||
|
let mask = &s["belong&".len()..];
|
||
|
let span_clone = span.clone();
|
||
|
MagicType::Belong {
|
||
|
test: parse_hex_repr().parse(rep).map_err(|_| {
|
||
|
Simple::custom(span, "Failed to parse hex pattern")
|
||
|
})?,
|
||
|
mask: Some(parse_hex_repr().parse(mask).map_err(|_| {
|
||
|
Simple::custom(span_clone, "Failed to parse hex pattern")
|
||
|
})?),
|
||
|
}
|
||
|
}
|
||
|
_ => MagicType::Unknown(ty),
|
||
|
},
|
||
|
})
|
||
|
})
|
||
|
.then_ignore(any().repeated()),
|
||
|
just("!:")
|
||
|
.ignore_then(
|
||
|
one_of("abcdefghijklmnopqrstuvwxyz")
|
||
|
.repeated()
|
||
|
.at_least(1)
|
||
|
.map(|s| s.iter().collect()),
|
||
|
)
|
||
|
.then_ignore(whitespace().at_least(1))
|
||
|
.then(any().repeated().map(String::from_iter))
|
||
|
.map(|(attr, value)| MagicFileLine::AssignAttr { attr, value }),
|
||
|
))
|
||
|
.then_ignore(whitespace())
|
||
|
.then_ignore(end())
|
||
|
}
|
||
|
|
||
|
#[derive(Debug, Clone, PartialEq, serde::Serialize)]
|
||
|
pub struct FileSignature {
|
||
|
pub offset: u64,
|
||
|
pub test: Vec<u8>,
|
||
|
pub mask: Option<Vec<u8>>,
|
||
|
}
|
||
|
|
||
|
#[derive(Debug, Clone, PartialEq, serde::Serialize)]
|
||
|
pub struct FlattenedFileSignature {
|
||
|
pub test: Vec<u8>,
|
||
|
pub mask: Vec<u8>,
|
||
|
}
|
||
|
|
||
|
impl FlattenedFileSignature {
|
||
|
fn codegen(&self) -> impl ToTokens {
|
||
|
let data = self
|
||
|
.test
|
||
|
.iter()
|
||
|
.copied()
|
||
|
.zip(self.mask.iter().copied())
|
||
|
.map(|(t, m)| {
|
||
|
quote::quote! {
|
||
|
(#t, #m)
|
||
|
}
|
||
|
});
|
||
|
quote::quote! {
|
||
|
FlattenedFileSignature(&[#(#data),*])
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
impl From<FileSignature> for FlattenedFileSignature {
|
||
|
fn from(sig: FileSignature) -> Self {
|
||
|
let len = sig.test.len();
|
||
|
FlattenedFileSignature {
|
||
|
test: std::iter::repeat(0)
|
||
|
.take(sig.offset as usize)
|
||
|
.chain(sig.test)
|
||
|
.collect(),
|
||
|
mask: sig.mask.unwrap_or_else(|| {
|
||
|
std::iter::repeat(0)
|
||
|
.take(sig.offset as usize)
|
||
|
.chain(std::iter::repeat(!0).take(len))
|
||
|
.collect()
|
||
|
}),
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
impl std::ops::BitAnd<FlattenedFileSignature> for FlattenedFileSignature {
|
||
|
type Output = FlattenedFileSignature;
|
||
|
|
||
|
fn bitand(mut self, mut rhs: FlattenedFileSignature) -> Self::Output {
|
||
|
if self.test.len() < rhs.test.len() {
|
||
|
std::mem::swap(&mut self, &mut rhs);
|
||
|
}
|
||
|
let test = self
|
||
|
.test
|
||
|
.iter()
|
||
|
.zip(
|
||
|
rhs.test
|
||
|
.iter()
|
||
|
.chain(std::iter::repeat(&0).take(self.test.len() - rhs.test.len())),
|
||
|
)
|
||
|
.map(|(a, b)| a | b)
|
||
|
.collect();
|
||
|
let mask = self
|
||
|
.mask
|
||
|
.iter()
|
||
|
.zip(
|
||
|
rhs.mask
|
||
|
.iter()
|
||
|
.chain(std::iter::repeat(&0).take(self.test.len() - rhs.test.len())),
|
||
|
)
|
||
|
.map(|(a, b)| a | b)
|
||
|
.collect();
|
||
|
FlattenedFileSignature { test, mask }
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#[derive(Debug, Clone, PartialEq, serde::Serialize)]
|
||
|
pub struct MIMEAssociation {
|
||
|
pub mime: Option<String>,
|
||
|
pub ext: Vec<String>,
|
||
|
pub safe: bool,
|
||
|
pub signatures: Vec<FlattenedFileSignature>,
|
||
|
}
|
||
|
|
||
|
impl MIMEAssociation {
|
||
|
fn codegen(&self) -> impl ToTokens {
|
||
|
let mime = self.mime.as_deref().unwrap_or("");
|
||
|
let ext = self.ext.first().map(|s| s.as_str()).unwrap_or("");
|
||
|
let safe = self.safe;
|
||
|
let signatures = self.signatures.iter().map(|s| s.codegen());
|
||
|
quote::quote! {
|
||
|
MIMEAssociation {
|
||
|
mime: #mime,
|
||
|
ext: #ext,
|
||
|
safe: #safe,
|
||
|
signatures: &[#(#signatures),*],
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
fn build_vec(lines: Vec<MagicFileLine>) -> Vec<MIMEAssociation> {
|
||
|
let mut stack = Vec::new();
|
||
|
|
||
|
let mut out: Vec<MIMEAssociation> = Vec::new();
|
||
|
|
||
|
for line in lines {
|
||
|
match line {
|
||
|
MagicFileLine::Magic { ty, offset, indent } => match ty {
|
||
|
MagicType::Belong { test, mask } => {
|
||
|
stack.truncate(indent as usize);
|
||
|
stack.push(FileSignature { offset, test, mask });
|
||
|
}
|
||
|
MagicType::String { test } => {
|
||
|
stack.truncate(indent as usize);
|
||
|
stack.push(FileSignature {
|
||
|
offset,
|
||
|
test,
|
||
|
mask: None,
|
||
|
});
|
||
|
}
|
||
|
_ => {}
|
||
|
},
|
||
|
MagicFileLine::AssignAttr { attr, value } => match attr.as_str() {
|
||
|
"mime" if !stack.is_empty() => {
|
||
|
let mime = value;
|
||
|
let flattened = stack.iter().map(|sig| sig.clone().into()).fold(
|
||
|
FlattenedFileSignature {
|
||
|
test: Vec::new(),
|
||
|
mask: Vec::new(),
|
||
|
},
|
||
|
|a, b| a & b,
|
||
|
);
|
||
|
if flattened.test.len() > 64 {
|
||
|
eprintln!("Signature too long: {:?}", flattened.test.len());
|
||
|
continue;
|
||
|
}
|
||
|
if let Some(existing) = out
|
||
|
.iter_mut()
|
||
|
.find(|m| m.mime.as_deref().map(|m| m == mime).unwrap_or(false))
|
||
|
{
|
||
|
existing.signatures.push(flattened);
|
||
|
} else {
|
||
|
out.push(MIMEAssociation {
|
||
|
mime: Some(mime),
|
||
|
safe: false,
|
||
|
ext: vec![],
|
||
|
signatures: vec![flattened],
|
||
|
});
|
||
|
}
|
||
|
}
|
||
|
"ext" if !stack.is_empty() => {
|
||
|
let ext = value;
|
||
|
let flattened = stack.iter().map(|sig| sig.clone().into()).fold(
|
||
|
FlattenedFileSignature {
|
||
|
test: Vec::new(),
|
||
|
mask: Vec::new(),
|
||
|
},
|
||
|
|a, b| a & b,
|
||
|
);
|
||
|
if flattened.test.len() > 64 {
|
||
|
eprintln!("Signature too long: {:?}", flattened.test.len());
|
||
|
continue;
|
||
|
}
|
||
|
if let Some(existing) =
|
||
|
out.iter_mut().find(|m| m.signatures.contains(&flattened))
|
||
|
{
|
||
|
existing
|
||
|
.ext
|
||
|
.extend(ext.split('/').map(|s| format!(".{}", s)))
|
||
|
} else {
|
||
|
out.push(MIMEAssociation {
|
||
|
mime: None,
|
||
|
safe: false,
|
||
|
ext: ext.split('/').map(|s| format!(".{}", s)).collect(),
|
||
|
signatures: vec![flattened],
|
||
|
});
|
||
|
}
|
||
|
}
|
||
|
_ => {}
|
||
|
},
|
||
|
_ => {}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
out.iter_mut().for_each(|m| {
|
||
|
m.ext.sort();
|
||
|
m.ext.dedup();
|
||
|
m.signatures.sort_by(|a, b| a.test.cmp(&b.test));
|
||
|
m.signatures.dedup();
|
||
|
});
|
||
|
out.dedup();
|
||
|
|
||
|
out
|
||
|
}
|
||
|
}
|
||
|
|
||
|
const BASE_DIR: &str = "submodules/file/magic/Magdir/";
|
||
|
|
||
|
fn main() {
|
||
|
let signatures = static_signatures()
|
||
|
.into_iter()
|
||
|
.chain(FILES_TO_PARSE.iter().flat_map(|file| {
|
||
|
println!("cargo:rerun-if-changed={}", file);
|
||
|
eprintln!("Using file: {}", file);
|
||
|
let path = format!("{}{}", BASE_DIR, file);
|
||
|
let content = std::fs::read(&path)
|
||
|
.map(|v| String::from_utf8_lossy(&v).to_string())
|
||
|
.unwrap();
|
||
|
let lines = content
|
||
|
.lines()
|
||
|
.filter(|line| !line.is_empty())
|
||
|
.map(|line| {
|
||
|
parse_magic_line().parse(line).unwrap_or_else(|e| {
|
||
|
eprintln!("Failed to parse line: {:?}", line);
|
||
|
eprintln!("Error: {:?}", e);
|
||
|
MagicFileLine::Unknown
|
||
|
})
|
||
|
})
|
||
|
.collect::<Vec<_>>();
|
||
|
MIMEAssociation::build_vec(lines)
|
||
|
.into_iter()
|
||
|
.map(|mut m| {
|
||
|
if m.mime
|
||
|
.as_ref()
|
||
|
.map(|m| UNSAFE_WHITELISTED.iter().any(|u| m.contains(u)))
|
||
|
.unwrap_or(false)
|
||
|
{
|
||
|
m.safe = false;
|
||
|
return m;
|
||
|
}
|
||
|
if m.ext
|
||
|
.iter()
|
||
|
.any(|ext| UNSAFE_WHITELISTED.iter().any(|u| ext.contains(u)))
|
||
|
{
|
||
|
m.safe = false;
|
||
|
return m;
|
||
|
}
|
||
|
if m.mime
|
||
|
.as_ref()
|
||
|
.map(|m| SAFE_WHITELISTED.iter().any(|w| m.contains(w)))
|
||
|
.unwrap_or(false)
|
||
|
{
|
||
|
m.safe = true;
|
||
|
}
|
||
|
if m.ext
|
||
|
.iter()
|
||
|
.any(|ext| SAFE_WHITELISTED.iter().any(|w| ext.contains(w)))
|
||
|
{
|
||
|
m.safe = true;
|
||
|
}
|
||
|
|
||
|
if m.safe {
|
||
|
m.ext
|
||
|
.retain(|ext| SAFE_EXTENSIONS.iter().any(|s| ext.contains(s)));
|
||
|
}
|
||
|
|
||
|
m
|
||
|
})
|
||
|
.filter(|m| {
|
||
|
if let Some(incoming) = &m.mime {
|
||
|
let mime = incoming.to_lowercase();
|
||
|
if BLACKLISTED.iter().any(|b| mime.contains(b)) {
|
||
|
return false;
|
||
|
}
|
||
|
if SAFE_WHITELISTED.iter().any(|w| mime.contains(w))
|
||
|
|| UNSAFE_WHITELISTED.iter().any(|u| mime.contains(u))
|
||
|
{
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
if m.ext
|
||
|
.iter()
|
||
|
.any(|ext| BLACKLISTED.iter().any(|b| ext.contains(b)))
|
||
|
{
|
||
|
return false;
|
||
|
}
|
||
|
if let Some(incoming) = &m.mime {
|
||
|
let mime = incoming.to_lowercase();
|
||
|
if SAFE_WHITELISTED.iter().all(|w| mime.contains(w))
|
||
|
|| UNSAFE_WHITELISTED.iter().any(|u| mime.contains(u))
|
||
|
{
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
if m.ext.iter().any(|ext| {
|
||
|
SAFE_WHITELISTED.iter().any(|w| ext.contains(w))
|
||
|
|| UNSAFE_WHITELISTED.iter().any(|u| ext.contains(u))
|
||
|
}) {
|
||
|
return true;
|
||
|
}
|
||
|
false
|
||
|
})
|
||
|
}))
|
||
|
.collect::<Vec<_>>();
|
||
|
|
||
|
let max_size = signatures
|
||
|
.iter()
|
||
|
.map(|s| s.signatures.iter().map(|s| s.test.len()).max().unwrap())
|
||
|
.max()
|
||
|
.unwrap();
|
||
|
|
||
|
if max_size > 128 {
|
||
|
panic!("Max signature size is too large: {}", max_size);
|
||
|
}
|
||
|
|
||
|
std::fs::write(env::var("OUT_DIR").unwrap() + "/magic.rs", {
|
||
|
let signatures = signatures.iter().map(|s| s.codegen());
|
||
|
|
||
|
quote::quote! {
|
||
|
/// Maximum size of a signature
|
||
|
pub const SNIFF_SIZE: usize = #max_size;
|
||
|
#[allow(clippy::all)]
|
||
|
const MAGICS: &[MIMEAssociation] = &[#(#signatures),*];
|
||
|
}
|
||
|
.into_token_stream()
|
||
|
.to_string()
|
||
|
})
|
||
|
.unwrap();
|
||
|
}
|