commit ebf807dcce94a716495b5d936761adff76a8f1db Author: mos Date: Sat Jul 27 16:24:57 2024 +0200 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..ab1a041 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,273 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anstream" +version = "0.6.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" + +[[package]] +name = "anstyle-parse" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "bip" +version = "0.1.0" +dependencies = [ + "byteorder", + "serde", +] + +[[package]] +name = "bipc" +version = "0.1.0" +dependencies = [ + "bip", + "clap", + "serde", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "clap" +version = "4.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35723e6a11662c2afb578bcf0b88bf6ea8e21282a953428f240574fcc3a2b5b3" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49eb96cbfa7cfa35017b7cd548c75b14c3118c98b423041d70562665e07fb0fa" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d029b67f89d30bbb547c89fd5161293c0aec155fc691d7924b64550662db93e" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + +[[package]] +name = "colorchoice" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "serde" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.203" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "901fa70d88b9d6c98022e23b4136f9f3e54e4662c3bc1bd1d84a42a9a0f0c1e9" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..a56c4b6 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,3 @@ +[workspace] +members = ["bip", "bipc"] +resolver = "2" diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..dc71fc6 --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2024 mos + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..1fa70b3 --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +# bip + +bip is a library which provides a framework for parsing plain bip text and also provides +faculties for serialization and deserialization via serde. + +bip is a data format that is converted from a descriptive text representation to a binary +representation with memory and run-time efficiency in mind. This repository consists of two +parts (bip and bipc). + diff --git a/bip/Cargo.toml b/bip/Cargo.toml new file mode 100644 index 0000000..7b6a56f --- /dev/null +++ b/bip/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "bip" +version = "0.1.0" +edition = "2021" + +[dependencies] +byteorder = "1.5.0" +serde = { version = "1.0.203", features = ["derive"] } diff --git a/bip/src/bp.rs b/bip/src/bp.rs new file mode 100644 index 0000000..e66119a --- /dev/null +++ b/bip/src/bp.rs @@ -0,0 +1,253 @@ +use byteorder::{WriteBytesExt, LE}; +use std::collections::HashMap; +use std::fmt; +use std::io::Write; + +use crate::{Node, NodeOp}; + +pub const BP_VERSION: u8 = 0; +pub const HEADER_LEN: usize = 1 + 8; + +type Result = std::result::Result; + +macro_rules! width { + ($c: expr) => { + $c > u32::MAX as u64 + }; +} + +macro_rules! width_sign { + ($c: expr) => { + $c > i32::MAX as i64 || $c < i32::MIN as i64 + }; +} + +#[derive(Debug)] +pub enum Error { + DecodeError, + BadWrite(std::io::Error), + NotDefined, + ExpectedID, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::DecodeError => write!(f, "bad op"), + Self::BadWrite(e) => write!(f, "{}", e), + Self::NotDefined => write!(f, "undefined data type"), + Self::ExpectedID => write!(f, "expected identifier"), + } + } +} + +#[derive(PartialEq)] +pub enum Op { + Const(u8, bool), + Data, + List, + Id, + Str, +} + +impl Op { + pub fn encode(op: Self) -> u8 { + match op { + Self::Const(w, float) => (w << 3) | ((float as u8) << 4), + Self::Data => 0x01, + Self::List => 0x02, + Self::Id => 0x03, + Self::Str => 0x04, + } + } + + pub fn decode(op: u8) -> Result { + match op & 0x7 { + 0x00 => { + let width = ((op & (1 << 3)) != 0) as u8; + let float = (op & (1 << 4)) != 0; + + Ok(Self::Const(width, float)) + } + 0x01 => Ok(Self::Data), + 0x02 => Ok(Self::List), + 0x03 => Ok(Self::Id), + 0x04 => Ok(Self::Str), + _ => Err(Error::DecodeError), + } + } +} + +pub struct Writer { + data: HashMap>, + buf: Vec, +} + +impl Default for Writer { + fn default() -> Self { + Self::new() + } +} + +impl Writer { + pub fn new() -> Self { + Self { + data: HashMap::new(), + buf: vec![0; HEADER_LEN], + } + } + + fn write(&mut self, c: u8) { + self.buf.push(c); + } + + fn write_const(&mut self, c: u64) -> Result<()> { + let width = width!(c) as u8; + + self.write_op(Op::Const(width, false)); + self.buf + .write_uint::(c, (4 * (width + 1)) as usize) + .map_err(Error::BadWrite) + } + + fn write_sign_const(&mut self, c: u64) -> Result<()> { + let c = c as i64; + let width = width_sign!(c) as u8; + + self.write_op(Op::Const(width, true)); + self.buf + .write_int::(c, (4 * (width + 1)) as usize) + .map_err(Error::BadWrite) + } + + fn write_id(&mut self, id: &str) { + self.buf.extend(id.to_owned().into_bytes()); + self.write(0x00); + } + + fn write_op(&mut self, op: Op) { + self.write(Op::encode(op)); + } + + fn parse_data_type(&mut self, node: &Node) -> Vec { + let mut bind_table = vec![]; + + for i in &node.entries { + if let NodeOp::Id(bind) = &i.op { + bind_table.push(bind.clone()); + } + } + + bind_table + } + + fn parse_data(&mut self, data: &Node) -> Result<()> { + match data.op { + NodeOp::Data => self.write_op(Op::Data), + NodeOp::List => self.write_op(Op::List), + _ => unreachable!(), + } + + self.write_const(data.entries.len() as u64)?; + for node in &data.entries { + self.parse(node)?; + } + + Ok(()) + } + + fn parse_id(&mut self, node: &Node) -> Result<()> { + if let NodeOp::Id(id) = &node.op { + let data_rec = match self.data.get(id) { + Some(rec) => rec, + None => return Err(Error::NotDefined), + }; + + let data = Node::new( + node.op.clone(), + data_rec + .iter() + .zip(node.entries.iter().collect::>()) + .map(|(k, v)| { + Node::new( + NodeOp::Assign, + vec![Node::new_op(NodeOp::Id(k.to_string())), v.clone()], + ) + }) + .collect(), + ); + + self.parse_data(&data)?; + Ok(()) + } else { + unreachable!(); + } + } + + fn parse_str(&mut self, s: &str) { + self.write_op(Op::Str); + self.write_id(s); + } + + fn parse_assign(&mut self, node: &Node) -> Result<()> { + if let NodeOp::Id(lhs) = &node.entries[0].op { + let rhs = &node.entries[1]; + match &rhs.op { + NodeOp::Id(s) => { + if s == "data" { + let lhs = lhs.clone(); + let data = self.parse_data_type(rhs); + self.data.insert(lhs, data); + } else { + self.parse_id(rhs)?; + } + } + _ => self.parse(rhs)?, + } + + Ok(()) + } else { + Err(Error::ExpectedID) + } + } + + fn parse(&mut self, node: &Node) -> Result<()> { + match &node.op { + NodeOp::Data | NodeOp::List => self.parse_data(node)?, + NodeOp::Const(n, false) => self.write_const(*n)?, + NodeOp::Const(n, true) => self.write_sign_const(*n)?, + NodeOp::Assign => self.parse_assign(node)?, + NodeOp::Id(_) => self.parse_id(node)?, + NodeOp::Str(s) => self.parse_str(s), + }; + + Ok(()) + } + + pub fn parse_tree(&mut self, node_tree: &[Node]) -> Result<&mut Self> { + self.write(BP_VERSION); + + // environment entries + self.write_op(Op::Data); + self.write_const(node_tree.len() as u64)?; + + for node in node_tree { + self.parse(node)?; + } + + let width = width!(self.buf.len() as u64) as u8; + self.buf[0] |= width << 7; + let len = &self.buf.len().to_le_bytes(); + let copy_len = (4 * (width + 1)) as usize; + + if copy_len == 4 { + self.buf.drain(1..HEADER_LEN - (copy_len - 1)); + } + self.buf[1..copy_len + 1].copy_from_slice(&len[..copy_len]); + Ok(self) + } + + pub fn write_to(&self, writer: &mut W) -> std::result::Result<(), std::io::Error> { + writer.write_all(&self.buf) + } +} diff --git a/bip/src/de.rs b/bip/src/de.rs new file mode 100644 index 0000000..a4108df --- /dev/null +++ b/bip/src/de.rs @@ -0,0 +1,402 @@ +use byteorder::{ReadBytesExt, LE}; +use std::io::BufRead; +use std::io::Cursor; +use std::io::Read; + +use crate::bp; +use crate::error::{Error, Result}; +use serde::de::{self, DeserializeSeed, MapAccess, SeqAccess, Visitor}; +use serde::Deserialize; + +pub struct Deserializer { + buf: R, +} + +impl Deserializer { + pub fn from_reader(buf: R) -> Self { + Self { buf } + } +} + +fn read(reader: &mut R) -> Result { + reader.read_u8().map_err(|_| Error::Eof) +} + +fn read_uint(reader: &mut R, len: usize) -> Result { + reader.read_uint::(len).map_err(|_| Error::Eof) +} + +fn read_bp(reader: &mut R) -> Result> { + let header = read(reader)?; + let width = header & (1 << 7); + let _version = header & 0x7f; + + let len = read_uint(reader, (4 * (width + 1)) as usize)?; + let mut buf = Vec::with_capacity(len as usize); + + reader + .take(len) + .read_to_end(&mut buf) + .map_err(Error::BadRead)?; + Ok(buf) +} + +pub fn from_reader<'a, R: BufRead, T>(mut reader: R) -> Result +where + T: Deserialize<'a>, +{ + let buf = read_bp(&mut reader)?; + let mut deserializer = Deserializer::from_reader(Cursor::new(buf)); + let t = T::deserialize(&mut deserializer)?; + Ok(t) +} + +impl Deserializer { + fn read(&mut self) -> Result { + read(&mut self.buf) + } + + fn read_uint(&mut self, len: usize) -> Result { + read_uint(&mut self.buf, len) + } + + fn parse_id(&mut self) -> Result { + let mut buf: Vec = vec![]; + + if self.parse_op()? != bp::Op::Str { + return Err(Error::ExpectedStr); + } + + if self.buf.read_until(0x00, &mut buf).is_ok() { + Ok(std::str::from_utf8(&buf[..buf.len() - 1]) + .map_err(Error::BadUtf8Read)? + .to_string()) + } else { + Err(Error::Eof) + } + } + + fn parse_const(&mut self) -> Result { + let op = self.parse_op()?; + + if let bp::Op::Const(width, _float) = op { + Ok(self.read_uint((4 * (width + 1)) as usize)?) + } else { + Err(Error::ExpectedConst) + } + } + + fn parse_data(&mut self) -> Result { + if self.parse_op()? != bp::Op::Data { + Err(Error::ExpectedData) + } else { + self.parse_const() + } + } + + fn parse_list(&mut self) -> Result { + if self.parse_op()? != bp::Op::List { + Err(Error::ExpectedList) + } else { + self.parse_const() + } + } + + fn parse_op(&mut self) -> Result { + bp::Op::decode(self.read()?).map_err(|_| Error::BadOp) + } +} + +impl<'de, 'a, R: BufRead> de::Deserializer<'de> for &'a mut Deserializer { + type Error = Error; + + fn deserialize_i64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i64(self.parse_const()? as i64) + } + + fn deserialize_u8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_u8(self.parse_const()? as u8) + } + + fn deserialize_u16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_u16(self.parse_const()? as u16) + } + + fn deserialize_u32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let c = self.parse_const()?; + + if c > u32::MAX as u64 { + Err(Error::BadWidth) + } else { + visitor.visit_u32(c as u32) + } + } + + fn deserialize_u64(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_u64(self.parse_const()?) + } + + fn deserialize_i8(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i8(self.parse_const()? as i8) + } + + fn deserialize_i16(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i16(self.parse_const()? as i16) + } + + fn deserialize_i32(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_i32(self.parse_const()? as i32) + } + + fn deserialize_f32(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotImplemented) + } + + fn deserialize_f64(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotImplemented) + } + + fn deserialize_map(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let entries = self.parse_data()?; + let value = visitor.visit_seq(DataEntries::new(self, entries))?; + + Ok(value) + } + + fn deserialize_struct( + self, + _name: &'static str, + _fields: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_map(visitor) + } + + fn deserialize_bool(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_bool(self.parse_const()? != 0) + } + + fn deserialize_str(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotImplemented) + } + + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_string(self.parse_id()?) + } + + fn deserialize_char(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotImplemented) + } + + fn deserialize_bytes(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotImplemented) + } + + fn deserialize_byte_buf(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotImplemented) + } + + fn deserialize_option(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotImplemented) + } + + fn deserialize_unit(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotImplemented) + } + + fn deserialize_identifier(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotImplemented) + } + + fn deserialize_tuple(self, len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + let entries = self.parse_list()?; + + if entries as usize != len { + Err(Error::BadEntryCount) + } else { + visitor.visit_seq(DataEntries::new(self, entries)) + } + } + + fn deserialize_unit_struct(self, _name: &'static str, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotImplemented) + } + + fn deserialize_newtype_struct(self, _name: &'static str, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotImplemented) + } + + fn deserialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + _visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotImplemented) + } + + fn deserialize_enum( + self, + _name: &'static str, + _variants: &'static [&'static str], + _visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotImplemented) + } + + fn deserialize_any(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotImplemented) + } + + fn deserialize_ignored_any(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotImplemented) + } + + fn deserialize_seq(self, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(Error::NotImplemented) + } +} + +struct DataEntries<'a, R: BufRead> { + de: &'a mut Deserializer, + entries: u64, +} + +impl<'a, R: BufRead> DataEntries<'a, R> { + fn new(de: &'a mut Deserializer, entries: u64) -> Self { + DataEntries { de, entries } + } +} + +impl<'de, 'a, R: BufRead> SeqAccess<'de> for DataEntries<'a, R> { + type Error = Error; + + fn next_element_seed(&mut self, seed: T) -> Result> + where + T: DeserializeSeed<'de>, + { + if self.entries > 0 { + seed.deserialize(&mut *self.de).map(Some) + } else { + Ok(None) + } + } + + fn size_hint(&self) -> Option { + Some(self.entries as usize) + } +} + +impl<'de, 'a, R: BufRead> MapAccess<'de> for DataEntries<'a, R> { + type Error = Error; + + fn next_key_seed(&mut self, seed: K) -> Result> + where + K: DeserializeSeed<'de>, + { + if self.entries > 0 { + self.entries -= 1; + seed.deserialize(&mut *self.de).map(Some) + } else { + Ok(None) + } + } + + fn next_value_seed(&mut self, seed: V) -> Result + where + V: DeserializeSeed<'de>, + { + seed.deserialize(&mut *self.de) + } + + fn size_hint(&self) -> Option { + Some(self.entries as usize) + } +} diff --git a/bip/src/error.rs b/bip/src/error.rs new file mode 100644 index 0000000..2170448 --- /dev/null +++ b/bip/src/error.rs @@ -0,0 +1,55 @@ +use serde::{de, ser}; +use std::fmt::{self, Display}; + +pub type Result = std::result::Result; + +#[derive(Debug)] +pub enum Error { + Message(String), + Eof, + BadOp, + BadEntryCount, + BadWidth, + BadRead(std::io::Error), + BadUtf8Read(std::str::Utf8Error), + ExpectedData, + ExpectedList, + ExpectedConst, + ExpectedId, + ExpectedStr, + NotImplemented, +} + +impl ser::Error for Error { + fn custom(msg: T) -> Self { + Error::Message(msg.to_string()) + } +} + +impl de::Error for Error { + fn custom(msg: T) -> Self { + Error::Message(msg.to_string()) + } +} + +impl Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Error::Message(msg) => write!(f, "{}", msg), + Error::Eof => write!(f, "unexpected EOF"), + Error::BadOp => write!(f, "bad op"), + Error::BadEntryCount => write!(f, "wrong amount of entries"), + Error::BadWidth => write!(f, "wrong width for given type"), + Error::BadRead(e) => write!(f, "{:#?}", e), + Error::BadUtf8Read(e) => write!(f, "{:#?}", e), + Error::ExpectedData => write!(f, "expected data"), + Error::ExpectedList => write!(f, "expected list"), + Error::ExpectedConst => write!(f, "expected const"), + Error::ExpectedId => write!(f, "expected id"), + Error::ExpectedStr => write!(f, "expected str"), + Error::NotImplemented => write!(f, "not implemented"), + } + } +} + +impl std::error::Error for Error {} diff --git a/bip/src/lib.rs b/bip/src/lib.rs new file mode 100644 index 0000000..67e567c --- /dev/null +++ b/bip/src/lib.rs @@ -0,0 +1,272 @@ +use std::fmt; +use std::iter::*; + +pub mod bp; +pub mod de; +pub mod error; + +type ParseResult = Result; + +#[derive(Debug)] +pub enum ParseError { + Eof, + ExpectedParens, + ExpectedSym, + ExpectedTerm, +} + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Eof => write!(f, "unexpected EOF"), + Self::ExpectedParens => write!(f, "expected parenthesis"), + Self::ExpectedSym => write!(f, "expected symbol"), + Self::ExpectedTerm => write!(f, "expected semicolon"), + } + } +} + +#[derive(PartialEq)] +pub enum Symbol { + Eq, + Term, + Id(Option), + Int(u64), + SInt(u64), + Str(String), + Parens(char), +} + +#[derive(Clone, PartialEq)] +enum NodeOp { + Const(u64, bool), + Data, + List, + Assign, + Id(String), + Str(String), +} + +#[derive(Clone)] +pub struct Node { + op: NodeOp, + entries: Vec, +} + +pub struct Parser<'a> { + symbols: &'a [Symbol], + pos: usize, +} + +impl Node { + fn new(op: NodeOp, entries: Vec) -> Node { + Node { op, entries } + } + + fn new_op(op: NodeOp) -> Node { + Node { + op, + entries: vec![], + } + } +} + +impl Symbol { + fn take>( + it: &mut Peekable, + predicate: fn(c: char) -> bool, + ) -> Vec { + let mut v = Vec::new(); + + while let Some(c) = it.peek() { + if !predicate(*c) { + break; + } + + v.push(*c); + it.next(); + } + + v + } + + pub fn parse(s: &str) -> Result, String> { + let mut syms = Vec::new(); + let mut it = s.chars().peekable(); + + while let Some(c) = it.peek() { + match c { + '-' | '+' | '0'..='9' => { + let mut sign = 1; + + while let Some(c) = it.peek() { + match c { + '-' => sign *= -1, + '+' => sign *= 1, + _ => break, + } + + it.next(); + } + + let v = Symbol::take(&mut it, |c| c.is_numeric()); + + if let Ok(n) = v.iter().collect::().parse::() { + let n = (n as i64 * sign) as u64; + + syms.push(if sign > 0 { + Self::Int(n) + } else { + Self::SInt(n) + }); + } else { + return Err("invalid integer".to_string()); + } + + continue; + } + '{' | '}' | '(' | ')' => syms.push(Self::Parens(*c)), + ';' => syms.push(Self::Term), + '=' => syms.push(Self::Eq), + '"' => { + it.next(); + + let v = Symbol::take(&mut it, |c| c != '"'); + syms.push(Self::Str(v.iter().collect())); + } + ' ' | '\t' | '\r' => {} + _ => { + if !c.is_alphanumeric() { + return Err(format!("invalid symbol {}", c)); + } + + let v = Symbol::take(&mut it, |c| c.is_alphanumeric()); + + syms.push(Self::Id(Some(v.iter().collect()))); + continue; + } + } + + it.next(); + } + + Ok(syms) + } +} + +impl<'a> Parser<'a> { + fn read(&self) -> Option<&Symbol> { + if self.pos >= self.symbols.len() { + None + } else { + Some(&self.symbols[self.pos]) + } + } + + fn peek(&self) -> Result<&Symbol, ParseError> { + self.read().ok_or(ParseError::Eof) + } + + fn expect(&self, sym: Symbol, err: ParseError) -> Result<(), ParseError> { + if *self.peek()? != sym { + Err(err) + } else { + Ok(()) + } + } + + fn next(&mut self) { + self.pos += 1; + } + + fn parse_term(&mut self) -> Result { + match self.peek()? { + Symbol::Int(n) => Ok(Node::new_op(NodeOp::Const(*n, false))), + Symbol::SInt(n) => Ok(Node::new_op(NodeOp::Const(*n, true))), + Symbol::Id(s) => { + let id = s.clone().unwrap(); + + Ok(Node::new_op(NodeOp::Id(id))) + } + Symbol::Parens(c) => { + let (mut node, close) = match c { + '{' => (Node::new_op(NodeOp::Data), Symbol::Parens('}')), + '(' => (Node::new_op(NodeOp::List), Symbol::Parens(')')), + _ => return Err(ParseError::Eof), + }; + + self.next(); + loop { + if *self.peek()? == close { + break; + } + + node.entries.push(self.parse_expr()?); + } + self.expect(close, ParseError::ExpectedParens)?; + self.next(); + Ok(node) + } + Symbol::Str(s) => Ok(Node::new_op(NodeOp::Str(s.to_string()))), + _ => Err(ParseError::ExpectedSym), + } + } + + fn parse_expr(&mut self) -> ParseResult { + let mut lhs = self.parse_term()?; + + if lhs.op == NodeOp::Data || lhs.op == NodeOp::List { + return Ok(lhs); + } + + self.next(); + match *self.peek()? { + Symbol::Eq => { + self.next(); + return Ok(Node::new(NodeOp::Assign, vec![lhs, self.parse_expr()?])); + } + Symbol::Term => { + self.next(); + return Ok(lhs); + } + Symbol::Id(_) | Symbol::Int(_) => { + loop { + if *self.peek()? == Symbol::Term { + break; + } + + lhs.entries.push(self.parse_term()?); + self.next(); + } + self.expect(Symbol::Term, ParseError::ExpectedTerm)?; + self.next(); + return Ok(lhs); + } + _ => (), + } + + Err(ParseError::Eof) + } + + pub fn parse(&mut self) -> Result, ParseError> { + let mut node_tree = vec![]; + + loop { + let node = match self.parse_expr() { + Ok(s) => s, + Err(e) => { + return match e { + ParseError::Eof => Ok(node_tree), + _ => Err(e), + } + } + }; + + node_tree.push(node); + } + } + + pub fn new(symbols: &'a [Symbol]) -> Self { + Self { symbols, pos: 0 } + } +} diff --git a/bipc/Cargo.toml b/bipc/Cargo.toml new file mode 100644 index 0000000..47c9b64 --- /dev/null +++ b/bipc/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "bipc" +version = "0.1.0" +edition = "2021" + +[dependencies] +bip = { path = "../bip" } +clap = { version = "4.5.11", features = ["derive"] } +serde = { version = "1.0.203", features = ["derive"] } diff --git a/bipc/src/lib.rs b/bipc/src/lib.rs new file mode 100644 index 0000000..031b367 --- /dev/null +++ b/bipc/src/lib.rs @@ -0,0 +1,77 @@ +use bip::bp; +use std::fmt; +use std::io::{BufRead, BufReader}; + +pub type Result = std::result::Result; + +#[derive(Debug)] +pub enum Error { + ReadError, + BadPath, + BadOutPath, + BadFile, + BadRead(String), + BadWrite(std::io::Error), + BpError(bp::Error), + ParseError(bip::ParseError), +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::ReadError => write!(f, "errors found"), + Self::BadPath => write!(f, "bad file path"), + Self::BadOutPath => write!(f, "bad output file path"), + Self::BadFile => write!(f, "invalid text"), + Self::BadRead(e) => write!(f, "{e}"), + Self::BadWrite(e) => write!(f, "{e}"), + Self::BpError(e) => write!(f, "{e}"), + Self::ParseError(e) => write!(f, "{e}"), + } + } +} + +fn read_bip(buf: R) -> Result> { + let mut errors = vec![]; + let syms: Vec = buf + .lines() + .map(|l| { + l.map_or(Err(Error::BadFile), |l| { + bip::Symbol::parse(&l).map_err(Error::BadRead) + }) + }) + .filter_map(|r| r.map_err(|e| errors.push(e)).ok()) + .flatten() + .collect(); + + let err_count = errors.iter().fold(0, |count, err| { + eprintln!("error: {err}"); + count + 1 + }); + + if err_count > 0 { + eprintln!("{err_count} errors"); + Err(Error::ReadError) + } else { + bip::Parser::new(&syms).parse().map_err(Error::ParseError) + } +} + +pub fn run(path: &str, outfile: &str) -> Result<()> { + let f = match std::fs::File::open(path) { + Ok(f) => f, + Err(_) => return Err(Error::BadPath), + }; + + let node_tree = read_bip(BufReader::new(f))?; + let mut out = match std::fs::File::create(outfile) { + Ok(f) => f, + Err(_) => return Err(Error::BadOutPath), + }; + + bp::Writer::new() + .parse_tree(&node_tree) + .map_err(Error::BpError)? + .write_to(&mut out) + .map_err(Error::BadWrite) +} diff --git a/bipc/src/main.rs b/bipc/src/main.rs new file mode 100644 index 0000000..6489fc1 --- /dev/null +++ b/bipc/src/main.rs @@ -0,0 +1,20 @@ +use clap::Parser; + +#[derive(Parser)] +struct Args { + /// Path for converted output + #[arg(short, long, default_value = "out")] + outfile: String, + + /// Path to bip file + path: String, +} + +fn main() { + let args = Args::parse(); + + if let Err(e) = bipc::run(&args.path, &args.outfile) { + eprintln!("{}: {e}", args.path); + std::process::exit(1); + } +}