use std::{env, ops::Range, vec};

use chumsky::prelude::*;
use quote::ToTokens;
use text::whitespace;

const FILES_TO_PARSE: &[&str] = &[
    "jpeg",
    "images",
    "sgml",
    "riff",
    "animation",
    "audio",
    "matroska",
    "vorbis",
    "audio",
    "msdos",
    "webassembly",
    "elf",
    "mach",
];

const BLACKLISTED: &[&str] = &["fuji-", "canon-", "corel-", "dicom", "garmin"];

// remove extension entries not in this list from safe entries
const SAFE_EXTENSIONS: &[&str] = &[
    ".png", ".gif", ".jpeg", ".webp", ".avif", ".apng", ".bmp", ".tiff", ".x-icon", ".opus",
    ".ogg", ".mp4", ".m4v", ".3gpp", ".mpeg", ".webm", ".aac", ".flac", ".wav",
];

const SAFE_WHITELISTED: &[&str] = &[
    "png", "gif", "jpeg", "webp", "avif", "apng", "bmp", "tiff", "x-icon", "opus", "ogg", "mp4",
    "m4v", "3gpp", "mpeg", "webm", "aac", "flac", "wav", "svg", "rss",
];

// we want to have signatures for these to be able to detect them
const UNSAFE_WHITELISTED: &[&str] = &[
    ".exe",
    ".wasm",
    "elf",
    "mach",
    "javascript",
    "bios",
    "firmware",
    "driver",
    "mpegurl",
];

fn static_signatures() -> Vec<MIMEAssociation> {
    vec![
        MIMEAssociation {
            mime: "application/x-elf-executable".to_string().into(),
            ext: vec![".pie".to_string(), ".elf".to_string(), ".so".to_string()],
            safe: false,
            signatures: vec![FlattenedFileSignature {
                test: vec![0x7f, b'E', b'L', b'F'],
                mask: vec![0xff, 0xff, 0xff, 0xff],
            }],
        },
        MIMEAssociation {
            mime: "application/x-mach-binary".to_string().into(),
            ext: vec![".dylib".to_string(), ".bundle".to_string()],
            safe: false,
            signatures: vec![FlattenedFileSignature {
                test: vec![0xfe, 0xed, 0xfa, 0xce],
                mask: vec![0xff, 0xff, 0xff, 0xff],
            }],
        },
        MIMEAssociation {
            mime: "application/vnd.microsoft.portable-executable"
                .to_string()
                .into(),
            ext: vec![".exe".to_string(), ".dll".to_string(), ".sys".to_string()],
            safe: false,
            signatures: vec![FlattenedFileSignature {
                test: b"PE\0\0".to_vec(),
                mask: vec![0xff, 0xff, 0xff, 0xff],
            }],
        },
    ]
}

#[derive(Debug, Clone)]
pub enum MagicFileLine {
    Nop,
    Unknown,
    Magic {
        indent: u8,
        offset: u64,
        ty: MagicType,
    },
    AssignAttr {
        attr: String,
        value: String,
    },
}

#[derive(Debug, Clone)]
pub enum MagicType {
    Unknown(String),
    Belong {
        test: Vec<u8>,
        mask: Option<Vec<u8>>,
    },
    String {
        test: Vec<u8>,
    },
}

pub fn parse_string_repr() -> impl Parser<char, Vec<u8>, Error = Simple<char>> {
    just('\\')
        .ignore_then(choice((
            just('\\').to(b'\\'),
            just('n').to(b'\n'),
            just('r').to(b'\r'),
            just('t').to(b'\t'),
            just('x').ignore_then(
                one_of("0123456789abcdefABCDEF")
                    .repeated()
                    .exactly(2)
                    .map(|s| u8::from_str_radix(&s.iter().collect::<String>(), 16).unwrap()),
            ),
        )))
        .or(none_of("\\").map(|c| c as u8))
        .repeated()
        .at_least(1)
        .map(|s| s.to_vec())
        .then_ignore(end())
}

pub fn parse_hex_repr() -> impl Parser<char, Vec<u8>, Error = Simple<char>> {
    just("0x")
        .ignore_then(
            one_of("0123456789abcdef")
                .repeated()
                .exactly(2)
                .map(|s| u8::from_str_radix(&s.iter().collect::<String>(), 16).unwrap())
                .repeated()
                .at_least(1),
        )
        .map(|s| s.to_vec())
        .then_ignore(end())
}

pub fn parse_magic_line() -> impl Parser<char, MagicFileLine, Error = Simple<char>> {
    choice((
        just('#')
            .then_ignore(any().repeated())
            .to(MagicFileLine::Nop),
        just('>')
            .repeated()
            .map(|i| i.len() as u8)
            .then(
                one_of("0123456789")
                    .repeated()
                    .at_least(1)
                    .try_map(|s, span| {
                        s.iter()
                            .collect::<String>()
                            .parse::<u64>()
                            .map_err(|_| Simple::custom(span, "Failed to parse number"))
                    })
                    .or(just("0x").ignore_then(
                        one_of("0123456789abcdefABCDEF")
                            .repeated()
                            .at_least(1)
                            .try_map(|s, span| {
                                u64::from_str_radix(&s.iter().collect::<String>(), 16)
                                    .map_err(|_| Simple::custom(span, "Failed to parse number"))
                            }),
                    )),
            )
            .then_ignore(whitespace().at_least(1))
            .then(
                none_of(" \t\n")
                    .repeated()
                    .at_least(1)
                    .map(String::from_iter),
            )
            .then_ignore(whitespace().at_least(1))
            .then(
                none_of(" \t\n")
                    .repeated()
                    .at_least(1)
                    .map(String::from_iter),
            )
            .try_map(|(((indent, offset), ty), rep), span: Range<usize>| {
                Ok(MagicFileLine::Magic {
                    indent,
                    offset,
                    ty: match ty.as_str() {
                        "string" => MagicType::String {
                            test: parse_string_repr().parse(rep).map_err(|_| {
                                Simple::custom(span, "Failed to parse string pattern")
                            })?,
                        },
                        "belong" => MagicType::Belong {
                            test: parse_hex_repr()
                                .parse(rep)
                                .map_err(|_| Simple::custom(span, "Failed to parse hex pattern"))?,
                            mask: None,
                        },
                        s if s.starts_with("belong&") => {
                            let mask = &s["belong&".len()..];
                            let span_clone = span.clone();
                            MagicType::Belong {
                                test: parse_hex_repr().parse(rep).map_err(|_| {
                                    Simple::custom(span, "Failed to parse hex pattern")
                                })?,
                                mask: Some(parse_hex_repr().parse(mask).map_err(|_| {
                                    Simple::custom(span_clone, "Failed to parse hex pattern")
                                })?),
                            }
                        }
                        _ => MagicType::Unknown(ty),
                    },
                })
            })
            .then_ignore(any().repeated()),
        just("!:")
            .ignore_then(
                one_of("abcdefghijklmnopqrstuvwxyz")
                    .repeated()
                    .at_least(1)
                    .map(|s| s.iter().collect()),
            )
            .then_ignore(whitespace().at_least(1))
            .then(any().repeated().map(String::from_iter))
            .map(|(attr, value)| MagicFileLine::AssignAttr { attr, value }),
    ))
    .then_ignore(whitespace())
    .then_ignore(end())
}

#[derive(Debug, Clone, PartialEq, serde::Serialize)]
pub struct FileSignature {
    pub offset: u64,
    pub test: Vec<u8>,
    pub mask: Option<Vec<u8>>,
}

#[derive(Debug, Clone, PartialEq, serde::Serialize)]
pub struct FlattenedFileSignature {
    pub test: Vec<u8>,
    pub mask: Vec<u8>,
}

impl FlattenedFileSignature {
    fn codegen(&self) -> impl ToTokens {
        let data = self
            .test
            .iter()
            .copied()
            .zip(self.mask.iter().copied())
            .map(|(t, m)| {
                quote::quote! {
                    (#t, #m)
                }
            });
        quote::quote! {
            FlattenedFileSignature(&[#(#data),*])
        }
    }
}

impl From<FileSignature> for FlattenedFileSignature {
    fn from(sig: FileSignature) -> Self {
        let len = sig.test.len();
        FlattenedFileSignature {
            test: std::iter::repeat(0)
                .take(sig.offset as usize)
                .chain(sig.test)
                .collect(),
            mask: sig.mask.unwrap_or_else(|| {
                std::iter::repeat(0)
                    .take(sig.offset as usize)
                    .chain(std::iter::repeat(!0).take(len))
                    .collect()
            }),
        }
    }
}

impl std::ops::BitAnd<FlattenedFileSignature> for FlattenedFileSignature {
    type Output = FlattenedFileSignature;

    fn bitand(mut self, mut rhs: FlattenedFileSignature) -> Self::Output {
        if self.test.len() < rhs.test.len() {
            std::mem::swap(&mut self, &mut rhs);
        }
        let test = self
            .test
            .iter()
            .zip(
                rhs.test
                    .iter()
                    .chain(std::iter::repeat(&0).take(self.test.len() - rhs.test.len())),
            )
            .map(|(a, b)| a | b)
            .collect();
        let mask = self
            .mask
            .iter()
            .zip(
                rhs.mask
                    .iter()
                    .chain(std::iter::repeat(&0).take(self.test.len() - rhs.test.len())),
            )
            .map(|(a, b)| a | b)
            .collect();
        FlattenedFileSignature { test, mask }
    }
}

#[derive(Debug, Clone, PartialEq, serde::Serialize)]
pub struct MIMEAssociation {
    pub mime: Option<String>,
    pub ext: Vec<String>,
    pub safe: bool,
    pub signatures: Vec<FlattenedFileSignature>,
}

impl MIMEAssociation {
    fn codegen(&self) -> impl ToTokens {
        let mime = self.mime.as_deref().unwrap_or("");
        let ext = self.ext.first().map(|s| s.as_str()).unwrap_or("");
        let safe = self.safe;
        let signatures = self.signatures.iter().map(|s| s.codegen());
        quote::quote! {
            MIMEAssociation {
                mime: #mime,
                ext: #ext,
                safe: #safe,
                signatures: &[#(#signatures),*],
            }
        }
    }
    fn build_vec(lines: Vec<MagicFileLine>) -> Vec<MIMEAssociation> {
        let mut stack = Vec::new();

        let mut out: Vec<MIMEAssociation> = Vec::new();

        for line in lines {
            match line {
                MagicFileLine::Magic { ty, offset, indent } => match ty {
                    MagicType::Belong { test, mask } => {
                        stack.truncate(indent as usize);
                        stack.push(FileSignature { offset, test, mask });
                    }
                    MagicType::String { test } => {
                        stack.truncate(indent as usize);
                        stack.push(FileSignature {
                            offset,
                            test,
                            mask: None,
                        });
                    }
                    _ => {}
                },
                MagicFileLine::AssignAttr { attr, value } => match attr.as_str() {
                    "mime" if !stack.is_empty() => {
                        let mime = value;
                        let flattened = stack.iter().map(|sig| sig.clone().into()).fold(
                            FlattenedFileSignature {
                                test: Vec::new(),
                                mask: Vec::new(),
                            },
                            |a, b| a & b,
                        );
                        if flattened.test.len() > 64 {
                            eprintln!("Signature too long: {:?}", flattened.test.len());
                            continue;
                        }
                        if let Some(existing) = out
                            .iter_mut()
                            .find(|m| m.mime.as_deref().map(|m| m == mime).unwrap_or(false))
                        {
                            existing.signatures.push(flattened);
                        } else {
                            out.push(MIMEAssociation {
                                mime: Some(mime),
                                safe: false,
                                ext: vec![],
                                signatures: vec![flattened],
                            });
                        }
                    }
                    "ext" if !stack.is_empty() => {
                        let ext = value;
                        let flattened = stack.iter().map(|sig| sig.clone().into()).fold(
                            FlattenedFileSignature {
                                test: Vec::new(),
                                mask: Vec::new(),
                            },
                            |a, b| a & b,
                        );
                        if flattened.test.len() > 64 {
                            eprintln!("Signature too long: {:?}", flattened.test.len());
                            continue;
                        }
                        if let Some(existing) =
                            out.iter_mut().find(|m| m.signatures.contains(&flattened))
                        {
                            existing
                                .ext
                                .extend(ext.split('/').map(|s| format!(".{}", s)))
                        } else {
                            out.push(MIMEAssociation {
                                mime: None,
                                safe: false,
                                ext: ext.split('/').map(|s| format!(".{}", s)).collect(),
                                signatures: vec![flattened],
                            });
                        }
                    }
                    _ => {}
                },
                _ => {}
            }
        }

        out.iter_mut().for_each(|m| {
            m.ext.sort();
            m.ext.dedup();
            m.signatures.sort_by(|a, b| a.test.cmp(&b.test));
            m.signatures.dedup();
        });
        out.dedup();

        out
    }
}

const BASE_DIR: &str = "submodules/file/magic/Magdir/";

fn main() {
    let signatures = static_signatures()
        .into_iter()
        .chain(FILES_TO_PARSE.iter().flat_map(|file| {
            println!("cargo:rerun-if-changed={}", file);
            eprintln!("Using file: {}", file);
            let path = format!("{}{}", BASE_DIR, file);
            let content = std::fs::read(&path)
                .map(|v| String::from_utf8_lossy(&v).to_string())
                .unwrap();
            let lines = content
                .lines()
                .filter(|line| !line.is_empty())
                .map(|line| {
                    parse_magic_line().parse(line).unwrap_or_else(|e| {
                        eprintln!("Failed to parse line: {:?}", line);
                        eprintln!("Error: {:?}", e);
                        MagicFileLine::Unknown
                    })
                })
                .collect::<Vec<_>>();
            MIMEAssociation::build_vec(lines)
                .into_iter()
                .map(|mut m| {
                    if m.mime
                        .as_ref()
                        .map(|m| UNSAFE_WHITELISTED.iter().any(|u| m.contains(u)))
                        .unwrap_or(false)
                    {
                        m.safe = false;
                        return m;
                    }
                    if m.ext
                        .iter()
                        .any(|ext| UNSAFE_WHITELISTED.iter().any(|u| ext.contains(u)))
                    {
                        m.safe = false;
                        return m;
                    }
                    if m.mime
                        .as_ref()
                        .map(|m| SAFE_WHITELISTED.iter().any(|w| m.contains(w)))
                        .unwrap_or(false)
                    {
                        m.safe = true;
                    }
                    if m.ext
                        .iter()
                        .any(|ext| SAFE_WHITELISTED.iter().any(|w| ext.contains(w)))
                    {
                        m.safe = true;
                    }

                    if m.safe {
                        m.ext
                            .retain(|ext| SAFE_EXTENSIONS.iter().any(|s| ext.contains(s)));
                    }

                    m
                })
                .filter(|m| {
                    if let Some(incoming) = &m.mime {
                        let mime = incoming.to_lowercase();
                        if BLACKLISTED.iter().any(|b| mime.contains(b)) {
                            return false;
                        }
                        if SAFE_WHITELISTED.iter().any(|w| mime.contains(w))
                            || UNSAFE_WHITELISTED.iter().any(|u| mime.contains(u))
                        {
                            return true;
                        }
                    }
                    if m.ext
                        .iter()
                        .any(|ext| BLACKLISTED.iter().any(|b| ext.contains(b)))
                    {
                        return false;
                    }
                    if let Some(incoming) = &m.mime {
                        let mime = incoming.to_lowercase();
                        if SAFE_WHITELISTED.iter().all(|w| mime.contains(w))
                            || UNSAFE_WHITELISTED.iter().any(|u| mime.contains(u))
                        {
                            return true;
                        }
                    }
                    if m.ext.iter().any(|ext| {
                        SAFE_WHITELISTED.iter().any(|w| ext.contains(w))
                            || UNSAFE_WHITELISTED.iter().any(|u| ext.contains(u))
                    }) {
                        return true;
                    }
                    false
                })
        }))
        .collect::<Vec<_>>();

    let max_size = signatures
        .iter()
        .map(|s| s.signatures.iter().map(|s| s.test.len()).max().unwrap())
        .max()
        .unwrap();

    if max_size > 128 {
        panic!("Max signature size is too large: {}", max_size);
    }

    std::fs::write(env::var("OUT_DIR").unwrap() + "/magic.rs", {
        let signatures = signatures.iter().map(|s| s.codegen());

        quote::quote! {
            /// Maximum size of a signature
            pub const SNIFF_SIZE: usize = #max_size;
            #[allow(clippy::all)]
            const MAGICS: &[MIMEAssociation] = &[#(#signatures),*];
        }
        .into_token_stream()
        .to_string()
    })
    .unwrap();
}