diff options
| author | Jonas Maier <> | 2026-03-07 11:35:38 +0100 |
|---|---|---|
| committer | Jonas Maier <> | 2026-03-07 11:35:38 +0100 |
| commit | c36bf58bd0d3d8d2b89211c0bfccab68dad53d66 (patch) | |
| tree | 4c7bd138a280aed1120f85e4110beb51a8356b24 /src/parse/mod.rs | |
| parent | b3ea0f7580a41f0c7769ba610a6219a5fc7c9eb6 (diff) | |
| download | pish-c36bf58bd0d3d8d2b89211c0bfccab68dad53d66.tar.gz | |
finish parsing stuff, add first parsing test
Diffstat (limited to 'src/parse/mod.rs')
| -rw-r--r-- | src/parse/mod.rs | 777 |
1 files changed, 777 insertions, 0 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs new file mode 100644 index 0000000..4f38f9b --- /dev/null +++ b/src/parse/mod.rs @@ -0,0 +1,777 @@ +use crate::BString; + +#[cfg(test)] +mod test; + +pub trait Stage : PartialEq { + type Str: std::fmt::Debug + Clone + PartialEq; +} + +pub trait CmdDisplay { + fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()>; +} + +#[derive(Debug, Clone, PartialEq)] +pub struct PreExpansion; +#[derive(Debug, Clone, PartialEq)] +pub struct PostExpansion; + +impl Stage for PreExpansion { + type Str = ExpString; +} + +impl Stage for PostExpansion { + type Str = BString; +} + +type Res<T, E> = std::result::Result<T, E>; + +pub trait Expander { + type Error; + fn expand_var(&mut self, v: BString) -> Res<BString, Self::Error>; + fn expand_cmd(&mut self, c: Ast<PostExpansion>) -> Res<BString, Self::Error>; +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Ast<T: Stage> { + FunDecl(FunDecl<T>), + VarAssign(VarAssign<T>), + Pipes(Pipes<T>), +} + +pub fn decl(name: ExpString, body: Ast<PreExpansion>) -> Ast<PreExpansion> { + Ast::FunDecl(FunDecl { + name: name, + body: FunBody { + body: Box::new(body), + }, + }) +} + +pub fn assign(var: ExpString, val: ExpString) -> Ast<PreExpansion> { + Ast::VarAssign(VarAssign { var, val }) +} + +pub fn pipes<const N: usize>(cmds: [Command<PreExpansion>; N]) -> Ast<PreExpansion> { + Ast::Pipes(Pipes { + cmds: cmds.to_vec(), + }) +} + +pub fn estr(x: &[u8]) -> ExpString { + ExpString { + parts: vec![StringPart::Boring(x.to_vec())], + } +} + +pub fn str<const N: usize>(parts: [StringPart; N]) -> ExpString { + ExpString { + parts: parts.to_vec(), + } +} + +pub fn plain(x: &[u8]) -> StringPart { + StringPart::Boring(x.to_vec()) +} + +pub fn var(x: &[u8]) -> StringPart { + StringPart::Var(VarName { name: x.to_vec() }) +} + +pub fn cmdp(x: Ast<PreExpansion>) -> StringPart { + StringPart::Cmd(x) +} + +pub fn cmd<const N: usize>(x: [ExpString; N]) -> Command<PreExpansion> { + Command { + cmd: x[0].clone(), + args: x[1..].to_vec(), + } +} + +impl CmdDisplay for Ast<PreExpansion> { + fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { + match self { + Ast::FunDecl(fun_decl) => { + write!(w, "decl(")?; + fun_decl.name.cdisplay(w)?; + write!(w, ", ")?; + fun_decl.body.body.cdisplay(w)?; + write!(w, ")")?; + } + Ast::VarAssign(var_assign) => { + write!(w, "assign(")?; + var_assign.var.cdisplay(w)?; + write!(w, ", ")?; + var_assign.val.cdisplay(w)?; + write!(w, ")")?; + } + Ast::Pipes(pipes) => { + write!(w, "pipes([")?; + for cmd in pipes.cmds.iter() { + cmd.cdisplay(w)?; + write!(w, ",")?; + } + write!(w, "])")?; + } + } + Ok(()) + } +} + +impl CmdDisplay for ExpString { + fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { + if self.parts.len() == 1 && self.parts[0].is_boring() { + write!( + w, + "estr(b\"{}\")", + self.parts[0].clone().unwrap_boring().escape_ascii() + ) + } else { + write!(w, "str([")?; + let mut first = true; + for part in self.parts.iter() { + if !first { + write!(w, ",")?; + } + first = false; + part.cdisplay(w)?; + } + write!(w, "])") + } + } +} + +impl CmdDisplay for StringPart { + fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { + match self { + StringPart::Boring(items) => { + write!(w, "plain(")?; + items.as_slice().cdisplay(w)?; + write!(w, ")") + } + StringPart::Var(var_name) => { + write!(w, "var(")?; + var_name.name.as_slice().cdisplay(w)?; + write!(w, ")") + }, + StringPart::Cmd(ast) => { + write!(w, "cmdp(")?; + ast.cdisplay(w)?; + write!(w, ")") + } + } + } +} + +impl CmdDisplay for Command<PreExpansion> { + fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { + write!(w, "cmd([")?; + self.cmd.cdisplay(w)?; + for arg in self.args.iter() { + write!(w, ", ")?; + arg.cdisplay(w)?; + } + write!(w, "])") + } +} + +impl CmdDisplay for &[u8] { + fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { + write!(w, "b\"")?; + write!(w, "{}", self.escape_ascii())?; + write!(w, "\"") + } +} + +impl Ast<PreExpansion> { + pub fn expand<E: Expander>(self, e: &mut E) -> Res<Ast<PostExpansion>, E::Error> { + match self { + Ast::VarAssign(va) => Ok(Ast::VarAssign(va.expand(e)?)), + Ast::Pipes(pipes) => Ok(Ast::Pipes(pipes.expand(e)?)), + Ast::FunDecl(fd) => Ok(Ast::FunDecl(fd.expand(e)?)), + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct FunBody { + pub body: Box<Ast<PreExpansion>>, +} + +impl Parse for FunBody { + fn parse(b: &mut Cursor<'_>) -> Result<Self> { + b.spaces(); + + if b.is_empty() { + return Err(ParseError::Eof); + } + + if b.peek() != b'{' { + return Err(ParseError::Expected('{')); + } + + b.adv(); + let body = Box::new(Ast::parse(b)?); + if b.is_empty() { + if b.is_completion() { + Ok(Self { body }) + } else { + Err(ParseError::Eof) + } + } else if b.peek() == b'}' { + Ok(Self { body }) + } else { + Err(ParseError::Expected('}')) + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct FunDecl<S: Stage> { + pub name: S::Str, + pub body: FunBody, +} + +impl Parse for FunDecl<PreExpansion> { + fn parse(b: &mut Cursor<'_>) -> Result<Self> { + if !b.buf.starts_with(b"fun ") && !b.buf.starts_with(b"fun\t") { + return Err(ParseError::NotAFunDecl); + } + b.advance(4); + b.spaces(); + let name = ExpString::parse(b)?; + let body = FunBody::parse(b)?; + Ok(Self { name, body }) + } +} + +impl FunDecl<PreExpansion> { + fn expand<E: Expander>(self, e: &mut E) -> Res<FunDecl<PostExpansion>, E::Error> { + Ok(FunDecl { + name: self.name.expand(e)?, + body: self.body, + }) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct VarAssign<S: Stage> { + pub var: S::Str, + pub val: S::Str, +} + +impl Parse for VarAssign<PreExpansion> { + fn parse(b: &mut Cursor<'_>) -> Result<Self> { + if !b.buf.starts_with(b"set ") && !b.buf.starts_with(b"set\t") { + return Err(ParseError::NotAVarAssign); + } + b.advance(4); + b.spaces(); + let var = ExpString::parse(b)?; + b.spaces(); + + if b.is_empty() { + return Err(ParseError::Eof); + } + let eq = b.adv(); + if eq != b'=' { + return Err(ParseError::Expected('=')); + } + let val = ExpString::parse(b)?; + + Ok(Self { var, val }) + } +} + +impl VarAssign<PreExpansion> { + fn expand<E: Expander>(self, e: &mut E) -> Res<VarAssign<PostExpansion>, E::Error> { + Ok(VarAssign { + var: self.var.expand(e)?, + val: self.val.expand(e)?, + }) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Pipes<T: Stage> { + pub cmds: Vec<Command<T>>, +} + +impl Pipes<PreExpansion> { + fn expand<E: Expander>(self, e: &mut E) -> Res<Pipes<PostExpansion>, E::Error> { + let mut cmds = Vec::with_capacity(self.cmds.len()); + for cmd in self.cmds.into_iter() { + cmds.push(cmd.expand(e)?); + } + Ok(Pipes { cmds }) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum StringPart { + Boring(BString), + Var(VarName), + Cmd(Ast<PreExpansion>), +} + +impl StringPart { + pub fn is_boring(&self) -> bool { + matches!(self, StringPart::Boring(..)) + } + pub fn unwrap_boring(self) -> BString { + match self { + StringPart::Boring(items) => items, + _ => panic!("unwrap on non-boring value"), + } + } +} + +#[derive(Debug, Clone, PartialEq)] +/// `"hi ${var} $(cmd) "` gets mapped to `[Boring("hi "), Var("var"), String(" "), Cmd(...), Boring(" ")]` +pub struct ExpString { + parts: Vec<StringPart>, +} + +impl ExpString { + fn expand<E: Expander>(self, e: &mut E) -> Res<BString, E::Error> { + let mut out = BString::new(); + for part in self.parts.into_iter() { + let mut x = match part { + StringPart::Boring(items) => items, + StringPart::Var(v) => e.expand_var(v.name)?, + StringPart::Cmd(ast) => { + let exp = ast.expand(e)?; + e.expand_cmd(exp)? + } + }; + out.append(&mut x); + } + Ok(out) + } +} + +fn is_symbol(x: u8) -> bool { + match x { + b'|' | b'{' | b'}' | b'$' | b'(' | b')' | b'\'' | b'"' => true, + _ => false, + } +} + +fn is_var_begin(x: u8) -> bool { + x.is_ascii_alphabetic() +} +fn is_var_name(x: u8) -> bool { + x.is_ascii_alphanumeric() || x == b'_' +} + +#[derive(Debug, Clone, PartialEq)] +pub struct VarName { + name: BString, +} + +impl Parse for VarName { + fn parse(b: &mut Cursor<'_>) -> Result<Self> { + if b.is_empty() { + return Err(ParseError::Eof); + } + + if !is_var_begin(b.peek()) { + return Err(ParseError::ExpectedAlphabetic); + } + + let mut name = BString::new(); + while b.has() { + let x = b.peek(); + if is_var_name(x) { + b.adv(); + name.push(x) + } else { + break; + } + } + + Ok(Self { name }) + } +} + +impl Parse for ExpString { + fn parse(b: &mut Cursor<'_>) -> Result<Self> { + b.spaces(); + if b.is_empty() { + return Err(ParseError::NotAString); + } + + let mut delim = b.peek(); + if delim == b'\'' || delim == b'"' { + b.adv(); + } else if is_symbol(delim) && delim != b'$' { + return Err(ParseError::NotAString); + } else { + delim = b' '; + } + + let mut parts = Vec::new(); + let p = &mut parts; + let mut escaping = false; + + let add_char = |p: &mut Vec<StringPart>, x: u8| match p.last_mut() { + Some(StringPart::Boring(v)) => v.push(x), + _ => p.push(StringPart::Boring(vec![x])), + }; + + while b.has() { + let x = b.peek(); + + if escaping { + add_char(p, x); + escaping = false; + b.adv(); + continue; + } + + if x == delim || (b.peek_space() && delim == b' ') { + if delim != b' ' { + b.adv(); + } + return Ok(Self { parts }); + } + + if delim == b' ' && is_symbol(x) && x != b'$' { + return Ok(Self { parts }); + } + + b.adv(); + + if delim == b'\'' { + // no fancy stuff here + add_char(p, x); + continue; + } + + if x == b'\\' { + escaping = true; + continue; + } + + if x == b'$' { + if !b.has() { + add_char(p, x); + continue; + } + + let x = b.peek(); + + if x == b'?' || x == b'!' { + b.adv(); + p.push(StringPart::Var(VarName { name: vec![x] })) + } else if is_var_begin(x) { + let v = VarName::parse(b)?; + p.push(StringPart::Var(v)); + } else if x == b'{' { + b.adv(); + let v = VarName::parse(b)?; + + if !b.has() { + return Err(ParseError::Eof); + } else if b.peek() == b':' { + todo!(": in var expansion") + } + + if !b.has() { + return Err(ParseError::Eof); + } else if b.peek() != b'}' { + return Err(ParseError::Incomplete); + } + + b.adv(); + p.push(StringPart::Var(v)); + } else if x == b'(' { + b.adv(); + let cmd = Ast::parse(b)?; + b.spaces(); + if b.is_empty() { + return Err(ParseError::Eof); + } else if b.peek() == b')' { + p.push(StringPart::Cmd(cmd)); + } else { + return Err(ParseError::Expected(')')); + } + } else { + // doesn't seem to be a variable or expansion, just add $ back into the string + add_char(p, b'$'); + continue; + } + + continue; + } + + add_char(p, x); + } + + if b.is_completion() || delim == b' ' { + Ok(Self { parts }) + } else { + Err(ParseError::Eof) + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Command<T: Stage> { + pub cmd: T::Str, + pub args: Vec<T::Str>, +} + +impl Command<PreExpansion> { + fn expand<E: Expander>(self, e: &mut E) -> Res<Command<PostExpansion>, E::Error> { + let cmd = self.cmd.expand(e)?; + let mut args = Vec::with_capacity(self.args.len()); + for arg in self.args.into_iter() { + args.push(arg.expand(e)?); + } + Ok(Command { cmd, args }) + } +} + +#[allow(unused)] +#[derive(Debug)] +pub enum ParseError { + /// "clean" EOF, i.e. not in the middle of something + Eof, + + /// "unclean" EOF, i.e. EOF after beginning a quoted string + Incomplete, + + ExpectedAlphabetic, + + Unknown(u8), + + Expected(char), + + NotAString, + + NotAFunDecl, + + NotAVarAssign, +} + +type Result<T> = std::result::Result<T, ParseError>; + +pub fn do_parse(x: &[u8]) -> Res<Ast<PreExpansion>, (ParseError, &[u8])> { + let mut c = Cursor::new(x, ParseMode::Command); + match Ast::parse(&mut c) { + Ok(ast) => Ok(ast), + Err(e) => Err((e, c.buf)), + } +} + +pub enum CompletionKind { + Command, + Argument, + None, +} + +pub struct CompletionContext { + pub kind: CompletionKind, + pub partial: BString, +} + +impl CompletionContext { + pub fn none() -> Self { + Self { + kind: CompletionKind::None, + partial: BString::new(), + } + } +} + +fn expstr_cc(s: &ExpString, kind: CompletionKind) -> CompletionContext { + if s.parts.len() > 1 || !s.parts[0].is_boring() { + CompletionContext::none() + } else { + CompletionContext { + kind, + partial: s.parts[0].clone().unwrap_boring().clone(), + } + } +} + +pub fn completion_context<'a>(x: &'a [u8]) -> CompletionContext { + let mut cursor = Cursor::new(x, ParseMode::Completion); + let ast = Ast::parse(&mut cursor); + match ast { + Ok(Ast::Pipes(pipes)) if cursor.spaced == false => { + if let Some(cmd) = pipes.cmds.last() { + if cmd.args.is_empty() { + expstr_cc(&cmd.cmd, CompletionKind::Command) + } else { + expstr_cc(&cmd.args[cmd.args.len() - 1], CompletionKind::Argument) + } + } else { + CompletionContext::none() + } + } + _ => CompletionContext::none(), + } +} + +trait Parse: Sized { + fn parse(b: &mut Cursor<'_>) -> Result<Self>; +} + +enum ParseMode { + Command, + Completion, +} + +struct Cursor<'a> { + buf: &'a [u8], + mode: ParseMode, + + /// if the last byte that was consumed was whitespace or part of a word + spaced: bool, + + backtrace: bool, +} + +impl<'a> Cursor<'a> { + fn new(buf: &'a [u8], mode: ParseMode) -> Self { + Self { + buf, + mode, + spaced: false, + backtrace: false, + } + } + + // non empty + fn has(&self) -> bool { + !self.buf.is_empty() + } + + fn is_empty(&self) -> bool { + self.buf.is_empty() + } + + fn bt(&self, word: &str) { + if self.backtrace { + let bt = std::backtrace::Backtrace::capture(); + let bt = format!("{bt}"); + println!("{word} {}\r", self.buf[0] as char); + for l in bt.lines().skip(4).take(2) { + println!("{l}\r"); + } + println!("\r"); + } + } + + fn peek(&self) -> u8 { + self.bt("peek"); + self.buf[0] + } + + fn adv(&mut self) -> u8 { + self.bt("adv"); + let out = self.buf[0]; + self.buf = &self.buf[1..]; + self.spaced = false; + out + } + + fn advance(&mut self, amt: usize) -> &[u8] { + self.bt(&format!("adv({amt})")); + let out = &self.buf[..amt]; + self.buf = &self.buf[amt..]; + self.spaced = false; + out + } + + fn peek_space(&self) -> bool { + if self.buf.is_empty() { + return false; + } + matches!(self.buf[0], b' ' | b'\t' | b'\n' | b'\r') + } + + fn spaces(&mut self) { + while self.peek_space() { + self.adv(); + self.spaced = true; + } + } + + fn is_completion(&self) -> bool { + match self.mode { + ParseMode::Completion => true, + _ => false, + } + } + + fn parse<T: Parse>(&mut self) -> Result<T> { + T::parse(self) + } +} + +impl Parse for Ast<PreExpansion> { + fn parse(b: &mut Cursor<'_>) -> Result<Self> { + b.spaces(); + + let orig_len = b.buf.len(); + let x = VarAssign::parse(b); + if let Ok(va) = x { + return Ok(Self::VarAssign(va)); + } else if b.buf.len() != orig_len { + x?; + } + + let orig_len = b.buf.len(); + let x = FunDecl::parse(b); + if let Ok(fd) = x { + return Ok(Self::FunDecl(fd)); + } else if b.buf.len() != orig_len { + x?; + } + + Ok(Self::Pipes(b.parse()?)) + } +} + +impl Parse for Command<PreExpansion> { + fn parse(b: &mut Cursor<'_>) -> Result<Self> { + let path: ExpString = b.parse()?; + let mut args = Vec::new(); + loop { + match ExpString::parse(b) { + Ok(arg) => args.push(arg), + Err(ParseError::NotAString) => break, + Err(e) => Err(e)?, + } + } + let x = Ok(Self { cmd: path, args }); + x + } +} + +impl Parse for Pipes<PreExpansion> { + fn parse(b: &mut Cursor<'_>) -> Result<Self> { + let mut cmds: Vec<Command<PreExpansion>> = vec![b.parse()?]; + + loop { + b.spaces(); + if b.is_empty() { + return Ok(Pipes { cmds }); + } + + let c = b.peek(); + if c == b'|' { + b.adv(); + cmds.push(b.parse()?); + } else if is_symbol(c) { + return Ok(Pipes { cmds }); + } else { + Err(ParseError::Unknown(c))?; + } + } + } +} |
