use pish_derive::Variants; use crate::{BString, PushAll, bstr, variants::Variants}; #[cfg(test)] mod test; mod span; pub mod regex; pub trait Stage: PartialEq { type Str: std::fmt::Debug + Clone + PartialEq; } pub trait CmdDisplay { fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()>; } #[derive(Debug, Clone, PartialEq)] pub struct PreExpansion; #[derive(Debug, Clone, PartialEq)] pub struct PostExpansion; impl Stage for PreExpansion { type Str = ExpString; } impl Stage for PostExpansion { type Str = BString; } type Res = std::result::Result; #[allow(clippy::type_complexity)] pub trait Expander { type Error; fn expand_var(&mut self, v: BString, default: Option) -> Res; fn expand_cmd(&mut self, c: Ast) -> Res; type AliasAge; fn expand_alias( &mut self, cmd: &bstr, age: Option, ) -> Res)>, Self::Error> { let _ = cmd; let _ = age; Ok(None) } } #[derive(Debug, Clone, PartialEq)] pub struct Block { pub commands: Vec>, pub finished_parsing: bool, } #[derive(Debug, Clone, PartialEq)] pub struct Script { pub stmts: Vec>, } impl Parse for Script { fn parse(b: &mut Cursor<'_>) -> Result { let mut stmts = Vec::new(); loop { b.spaces(); if b.is_empty() { break; } match Ast::parse(b) { Ok(s) => stmts.push(s), Err(ParseError::Eof) => break, Err(e) => Err(e)?, } } Ok(Script { stmts }) } } #[derive(Debug, Clone, PartialEq)] pub enum Ast { FunDecl(FunDecl), VarAssign(VarAssign), Pipes(Pipes), If(If), While(While), Case(Case), } #[derive(Debug, Clone, PartialEq)] enum IfParseProgress { Condition, TrueBlock, FalseBlock, Done, } impl IfParseProgress { pub fn is_done(&self) -> bool { matches!(self, Self::Done) } } #[derive(Debug, Clone, PartialEq)] pub struct If { pub condition: Pipes, pub true_block: Block, pub false_block: Block, parse_progress: IfParseProgress, } #[derive(Debug, Clone, PartialEq)] pub struct While { pub condition: Pipes, pub block: Block, } impl If { fn expand(self, e: &mut E) -> Res, E::Error> { Ok(If { condition: self.condition.expand(e)?, true_block: self.true_block, false_block: self.false_block, parse_progress: self.parse_progress, }) } } pub fn decl(name: ExpString, body: Block) -> Ast { Ast::FunDecl(FunDecl { name, body }) } pub fn assign(var: ExpString, val: ExpString) -> Ast { Ast::VarAssign(VarAssign { var, val }) } pub fn pipes(cmds: [Command; N]) -> Ast { Ast::Pipes(Pipes { cmds: cmds.to_vec(), }) } pub fn cond( condition: Ast, true_block: Block, false_block: Block, ) -> Ast { let Ast::Pipes(condition) = condition else { panic!() }; Ast::If(If { condition, true_block, false_block, parse_progress: IfParseProgress::Done, }) } pub fn whil(condition: Ast, block: Block) -> Ast { let Ast::Pipes(condition) = condition else { panic!() }; Ast::While(While { condition, block }) } pub fn estr(x: &[u8]) -> ExpString { ExpString { parts: vec![StringPart::Boring(x.to_vec())], delim: StringDelimiter::None, } } pub fn str(parts: [StringPart; N]) -> ExpString { ExpString { parts: parts.to_vec(), delim: StringDelimiter::None, } } pub fn plain(x: &[u8]) -> StringPart { StringPart::Boring(x.to_vec()) } pub fn var(x: &[u8]) -> StringPart { StringPart::Var(Var { name: VarName { name: x.to_vec() }, default: None, already_complete: true, }) } pub fn var_default(x: &[u8], default: ExpString) -> StringPart { StringPart::Var(Var { name: VarName { name: x.to_vec() }, default: Some(default), already_complete: true, }) } pub fn cmdp(x: Ast) -> StringPart { StringPart::Cmd(CmdInterp { cmd: x, already_complete: true, }) } pub fn cmd(x: [ExpString; N]) -> Command { Command { cmd: x[0].clone(), args: x[1..].to_vec(), } } pub fn block(x: [Ast; N]) -> Block { Block { commands: x.to_vec(), finished_parsing: true, } } impl CmdDisplay for Block { fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { write!(w, "block([")?; let mut prev = false; for cmd in self.commands.iter() { if prev { write!(w, ",")?; } prev = true; cmd.cdisplay(w)?; } write!(w, "])") } } impl CmdDisplay for Ast { fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { match self { Ast::FunDecl(fun_decl) => { write!(w, "decl(")?; fun_decl.name.cdisplay(w)?; write!(w, ", ")?; fun_decl.body.cdisplay(w)?; write!(w, ")")?; } Ast::VarAssign(var_assign) => { write!(w, "assign(")?; var_assign.var.cdisplay(w)?; write!(w, ", ")?; var_assign.val.cdisplay(w)?; write!(w, ")")?; } Ast::Pipes(pipes) => { write!(w, "pipes([")?; for cmd in pipes.cmds.iter() { cmd.cdisplay(w)?; write!(w, ",")?; } write!(w, "])")?; } Ast::If(i) => { write!(w, "cond(")?; Ast::Pipes(i.condition.clone()).cdisplay(w)?; write!(w, ", ")?; i.true_block.cdisplay(w)?; write!(w, ", ")?; i.false_block.cdisplay(w)?; write!(w, ")")?; } Ast::While(l) => { write!(w, "whil(")?; Ast::Pipes(l.condition.clone()).cdisplay(w)?; write!(w, ", ")?; l.block.cdisplay(w)?; write!(w, ")")?; } Ast::Case(c) => { write!(w, "case(")?; c.discriminant.cdisplay(w)?; write!(w, ", [")?; let mut first = true; for case in c.branches.iter() { if !first { write!(w, ", ")?; } first = false; case.cdisplay(w)?; } write!(w, "])")?; } } Ok(()) } } impl CmdDisplay for ExpString { fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { if self.parts.len() == 1 && self.parts[0].is_boring() { write!( w, "estr(b\"{}\")", self.parts[0].clone().unwrap_boring().escape_ascii() ) } else { write!(w, "str([")?; let mut first = true; for part in self.parts.iter() { if !first { write!(w, ",")?; } first = false; part.cdisplay(w)?; } write!(w, "])") } } } impl CmdDisplay for StringPart { fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { match self { StringPart::Boring(items) => { write!(w, "plain(")?; items.as_slice().cdisplay(w)?; write!(w, ")") } StringPart::Var(var) => { if let Some(default) = &var.default { write!(w, "var_default(")?; var.name.name.as_slice().cdisplay(w)?; write!(w, ",")?; default.cdisplay(w)?; write!(w, ")") } else { write!(w, "var(")?; var.name.name.as_slice().cdisplay(w)?; write!(w, ")") } } StringPart::Cmd(ast) => { write!(w, "cmdp(")?; ast.cmd.cdisplay(w)?; write!(w, ")") } } } } impl CmdDisplay for Command { fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { write!(w, "cmd([")?; self.cmd.cdisplay(w)?; for arg in self.args.iter() { write!(w, ", ")?; arg.cdisplay(w)?; } write!(w, "])") } } impl CmdDisplay for &[u8] { fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { write!(w, "\"{}\"", self.escape_ascii()) } } impl Ast { pub fn expand(self, e: &mut E) -> Res, E::Error> { match self { Ast::VarAssign(va) => Ok(Ast::VarAssign(va.expand(e)?)), Ast::Pipes(pipes) => Ok(Ast::Pipes(pipes.expand(e)?)), Ast::FunDecl(fd) => Ok(Ast::FunDecl(fd.expand(e)?)), Ast::If(i) => Ok(Ast::If(i.expand(e)?)), Ast::While(w) => Ok(Ast::While(w)), Ast::Case(c) => Ok(Ast::Case(c.expand(e)?)), } } } #[derive(Debug, Clone, PartialEq)] pub struct FunBody { pub body: Box>, } impl Parse for FunBody { fn parse(b: &mut Cursor<'_>) -> Result { b.spaces(); if b.is_empty() { return Err(ParseError::Eof); } if b.peek() != b'{' { return Err(ParseError::Expected('{')); } b.adv(); let body = Box::new(Ast::parse(b)?); b.spaces(); if b.is_empty() { if b.is_completion() { Ok(Self { body }) } else { Err(ParseError::Eof) } } else if b.peek() == b'}' { Ok(Self { body }) } else { Err(ParseError::Expected('}')) } } } #[derive(Debug, Clone, PartialEq)] pub struct FunDecl { pub name: S::Str, pub body: Block, } impl Parse for Block { fn parse(b: &mut Cursor<'_>) -> Result { let mut commands = Vec::new(); b.expect_keyword(Keyword::OpenBrace)?; loop { while { b.spaces(); b.has() && b.peek() == b';' } { b.adv(); } if b.has() && b"})".contains(&b.peek()) || b.is_completion() && b.is_empty() { break; } let cmd = Ast::parse(b)?; commands.push(cmd); } let finished_parsing = match b.expect_keyword(Keyword::CloseBrace) { Ok(_) => true, Err(_) if b.is_completion() => false, Err(e) => Err(e)?, }; Ok(Self { commands, finished_parsing, }) } } impl Parse for FunDecl { fn parse(b: &mut Cursor<'_>) -> Result { if b.consume_keyword(Keyword::Fun).is_err() { return Err(ParseError::NotAFunDecl); } b.spaces(); let name = ExpString::parse(b)?; let body = Block::parse(b)?; Ok(Self { name, body }) } } impl FunDecl { fn expand(self, e: &mut E) -> Res, E::Error> { Ok(FunDecl { name: self.name.expand(e)?, body: self.body, }) } } #[derive(Debug, Clone, PartialEq)] pub struct VarAssign { pub var: S::Str, pub val: S::Str, } impl Parse for VarAssign { fn parse(b: &mut Cursor<'_>) -> Result { if b.consume_keyword(Keyword::Set).is_err() { return Err(ParseError::NotAVarAssign); } b.spaces(); let var = ExpString::parse(b)?; b.spaces(); if b.is_empty() { return Err(ParseError::Eof); } let eq = b.adv(); if eq != b'=' { return Err(ParseError::Expected('=')); } let val = ExpString::parse(b)?; Ok(Self { var, val }) } } impl VarAssign { fn expand(self, e: &mut E) -> Res, E::Error> { Ok(VarAssign { var: self.var.expand(e)?, val: self.val.expand(e)?, }) } } #[derive(Debug, Clone, PartialEq)] pub struct Pipes { pub cmds: Vec>, } impl Pipes { pub fn expand(self, e: &mut E) -> Res, E::Error> { let mut cmds = Vec::with_capacity(self.cmds.len()); for cmd in self.cmds.into_iter() { cmds.push(cmd.expand(e)?); } Ok(Pipes { cmds }) } } #[derive(Debug, Clone, PartialEq)] pub enum StringPart { Boring(BString), Var(Var), Cmd(CmdInterp), } #[derive(Debug, Clone, PartialEq)] pub struct CmdInterp { pub cmd: Ast, pub already_complete: bool, } #[derive(Debug, Clone, PartialEq)] pub struct Var { name: VarName, default: Option, /// if pressing tab right after the parsed variable should not try to complete the variable /// /// i.e. `${HOM}` -> true, `$HOM` -> false, `${HOM` -> false already_complete: bool, } impl Var { pub fn new(name: VarName) -> Self { Self { name, default: None, already_complete: false, } } } impl StringPart { pub fn is_boring(&self) -> bool { matches!(self, StringPart::Boring(..)) } pub fn is_command(&self) -> bool { matches!(self, StringPart::Cmd(..)) } pub fn unwrap_boring(self) -> BString { match self { StringPart::Boring(items) => items, _ => panic!("unwrap on non-boring value"), } } } #[derive(Debug, Clone, PartialEq)] /// `"hi ${var} $(cmd) "` gets mapped to `[Boring("hi "), Var("var"), String(" "), Cmd(...), Boring(" ")]` pub struct ExpString { parts: Vec, delim: StringDelimiter, } impl ExpString { pub fn expand(self, e: &mut E) -> Res { let mut out = BString::new(); for part in self.parts.into_iter() { let mut x = match part { StringPart::Boring(items) => items, StringPart::Var(v) => { let default = match v.default { Some(default) => Some(default.expand(e)?), None => None, }; e.expand_var(v.name.name, default)? } StringPart::Cmd(ast) => { let exp = ast.cmd.expand(e)?; e.expand_cmd(exp)? } }; out.append(&mut x); } Ok(out) } pub fn has_commands(&self) -> bool { self.parts.iter().any(|part| part.is_command()) } /// vars that are directly mentioned in this string interpolation, i.e. does not look into commands pub fn vars(&self) -> Vec { self.parts .iter() .filter_map(|part| match part { StringPart::Var(var) => Some(var.name.name.clone()), _ => None, }) .collect() } } fn is_symbol(x: u8) -> bool { matches!( x, b';' | b'|' | b'{' | b'}' | b'$' | b'(' | b')' | b'\'' | b'"' ) } fn is_var_begin(x: u8) -> bool { x.is_ascii_alphanumeric() } fn is_var_name(x: u8) -> bool { x.is_ascii_alphanumeric() || x == b'_' } #[derive(Debug, Clone, PartialEq)] pub struct VarName { name: BString, } impl Parse for VarName { fn parse(b: &mut Cursor<'_>) -> Result { if b.is_empty() { return Err(ParseError::Eof); } let mut name = BString::new(); if b.peek().is_ascii_digit() { while b.has() && b.peek().is_ascii_digit() { name.push(b.adv()); } return Ok(Self { name }); } if !is_var_begin(b.peek()) { return Err(ParseError::ExpectedAlphabetic); } while b.has() { let x = b.peek(); if is_var_name(x) { b.adv(); name.push(x) } else { break; } } Ok(Self { name }) } } #[derive(Clone, Debug, PartialEq)] pub enum StringDelimiter { /// no delimiter, i.e. when parsing a simple command like `echo foo` None, /// double quotes, allows interpolation, `echo "foo $var"` Interp, /// single quotes, does not allow interpolation `echo 'foo $vardoesnotexpand'` Strict, /// triple quotes with custom prefix/suffix ``` /// echo DELIM""" /// basically /// a /// heredoc /// with /// $variables /// """DELIM /// ``` InterpCustom(BString), /// triple single quotes with custom prefix/suffix ``` /// echo FOO''' /// basically /// a /// heredoc /// without /// variables /// '''FOO /// ``` StrictCustom(BString), } /// gets the largest ident this slice starts with, might be empty fn peek_ident(b: &[u8]) -> &[u8] { if b.is_empty() || !b[0].is_ascii_alphabetic() { return &[]; } let mut out = &b[..1]; for i in 1..b.len() { if b[i].is_ascii_alphanumeric() { out = &b[..=i]; } else { break; } } out } impl StringDelimiter { fn try_begin(b: &mut Cursor<'_>) -> Option { if !b.has() { return None; } let ident = peek_ident(b.buf); if b.buf[ident.len()..].starts_with(b"\"\"\"") { b.advance(ident.len() + 3); if b.has() && b.peek() == b'\n' { b.adv(); } return Some(Self::InterpCustom(ident.to_vec())); } if b.buf[ident.len()..].starts_with(b"'''") { b.advance(ident.len() + 3); if b.has() && b.peek() == b'\n' { b.adv(); } return Some(Self::StrictCustom(ident.to_vec())); } // at this point we know it's not a custom identifier with triple quotes let x = b.peek(); if !x.is_ascii_whitespace() && (!is_symbol(x) || x == b'$') { return Some(Self::None); } if x == b'"' { b.adv(); return Some(Self::Interp); } if x == b'\'' { b.adv(); return Some(Self::Strict); } None } /// if the current string ends right at the cursor, consumes the string closing tokens and returns true /// otherwise, consumes no tokens and returns false fn try_end(&self, b: &mut Cursor<'_>) -> bool { if !b.has() { return matches!(self, Self::None); } let x = b.peek(); let buf = &mut b.buf; match self { StringDelimiter::None if x.is_ascii_whitespace() || is_symbol(x) && x != b'$' => true, StringDelimiter::Interp if x == b'"' => { b.adv(); true } StringDelimiter::Strict if x == b'\'' => { b.adv(); true } StringDelimiter::InterpCustom(delim) if buf.len() >= 3 && &buf[..3] == b"\"\"\"" && buf[3..].starts_with(delim) => { b.advance(3 + delim.len()); true } StringDelimiter::StrictCustom(delim) if buf.len() >= 3 && &buf[..3] == b"'''" && buf[3..].starts_with(delim) => { b.advance(3 + delim.len()); true } _ => false, } } fn is_strict(&self) -> bool { matches!(self, Self::Strict | Self::StrictCustom(_)) } fn is_none(&self) -> bool { matches!(self, Self::None) } /// assuming that `s` will be placed in the middle of some specifically delimited string pub fn escape(&self, mut s: &bstr, out: &mut BString) { while !s.is_empty() { let first = s[0]; match self { StringDelimiter::None => { if matches!( first, b' ' | b'$' | b'\\' | b'\'' | b'"' | b'|' | b'{' | b'}' | b';' ) { out.push(b'\\'); } } StringDelimiter::Interp | StringDelimiter::InterpCustom(_) => { if matches!(first, b'$' | b'\\' | b'"' | b'|') { out.push(b'\\'); } } StringDelimiter::Strict => { if first == b'\'' { out.push_all(b"'\\'"); } } StringDelimiter::StrictCustom(delim) => { if s.starts_with(b"'''") && s[3..].starts_with(delim) { out.push_all(b"'''"); out.push_all(delim); out.push_all(b"\\'\\'\\'"); out.push_all(delim); out.push_all(b"''"); out.push_all(delim); out.push_all(b"'''"); s = &s[3 + delim.len()..]; continue; } } } out.push(first); s = &s[1..]; } } pub fn write_opening_delimiter(&self, out: &mut BString) { match self { StringDelimiter::None => (), StringDelimiter::Interp => out.push(b'"'), StringDelimiter::Strict => out.push(b'\''), StringDelimiter::InterpCustom(delim) => { out.push_all(delim); out.push_all(b"\"\"\""); } StringDelimiter::StrictCustom(delim) => { out.push_all(delim); out.push_all(b"'''"); } } } pub fn write_closing_delimiter(&self, out: &mut BString) { match self { StringDelimiter::None => (), StringDelimiter::Interp => out.push(b'"'), StringDelimiter::Strict => out.push(b'\''), StringDelimiter::InterpCustom(delim) => { out.push_all(b"\"\"\""); out.push_all(delim); } StringDelimiter::StrictCustom(delim) => { out.push_all(b"'''"); out.push_all(delim); } } } } fn parse_escape_code(b: &mut Cursor<'_>) -> Result> { if !b.has() { return Err(ParseError::Eof); } let x = b.adv(); let y = match x { b'\n' => return Ok(None), b'n' => b'\n', b'r' => b'\r', b't' => b'\t', b'e' => 0x1b, // escape b'x' => { // parse two hex digits if b.buf.len() < 2 { Err(ParseError::Eof)?; } let x1 = b.adv(); let x2 = b.adv(); if !x1.is_ascii_hexdigit() || !x2.is_ascii_hexdigit() { Err(ParseError::NotHexDigit)?; } let x1 = (x1 as char).to_digit(16).unwrap_or(0); let x2 = (x2 as char).to_digit(16).unwrap_or(0); ((x1 << 4) | x2) as u8 } _ => x, }; Ok(Some(y)) } impl Parse for ExpString { fn parse(b: &mut Cursor<'_>) -> Result { b.spaces(); if b.is_empty() { return Err(ParseError::NotAString); } let mut parts = Vec::new(); let p = &mut parts; let add_char = |p: &mut Vec, x: u8| match p.last_mut() { Some(StringPart::Boring(v)) => v.push(x), _ => p.push(StringPart::Boring(vec![x])), }; let mut already_parsed = false; let mut last_delim = StringDelimiter::None; 'outer: loop { let begin = b.loc(); let Some(delim) = StringDelimiter::try_begin(b) else { break; }; last_delim = delim.clone(); already_parsed = true; while !delim.try_end(b) { if !b.has() { if b.is_completion() { b.highlight_from(begin, OtherHighlights::String); break 'outer; } else { return Err(ParseError::Eof); } } let begin = b.loc(); let x = b.adv(); if x == b'\\' && !delim.is_strict() { if let Some(x) = parse_escape_code(b)? { add_char(p, x); b.highlight_from(begin, OtherHighlights::Escapes); } } else if x == b'$' && !delim.is_strict() { if !b.has() { add_char(p, b'$'); continue; } let x = b.peek(); if is_var_begin(x) { let v = VarName::parse(b)?; p.push(StringPart::Var(Var::new(v))); b.highlight_from(begin, OtherHighlights::Variable); continue; } b.adv(); match x { b'?' | b'!' => { b.highlight_from(begin, OtherHighlights::Variable); p.push(StringPart::Var(Var::new(VarName { name: vec![x] }))) } b'{' => { let v = VarName::parse(b)?; let mut default = None; if !b.has() { if !b.is_completion() { return Err(ParseError::Eof); } } else if b.peek() == b':' { b.adv(); if !b.has() { return Err(ParseError::Eof); } if b.peek() == b'-' { b.adv(); default = Some(ExpString::parse(b)?); } else { todo!(": in var expansion") } } if !b.has() { if !b.is_completion() { return Err(ParseError::Eof); } } else if b.peek() != b'}' { return Err(ParseError::Expected('}')); } let already_complete = b.has(); if already_complete { b.adv(); } b.highlight_from(begin, OtherHighlights::Variable); p.push(StringPart::Var(Var { name: v, default, already_complete, })); } b'(' => { let cmd = Ast::parse(b)?; b.spaces(); if b.is_empty() && !b.is_completion() { return Err(ParseError::Expected(')')); } if b.has() && b.peek() == b')' { b.adv(); p.push(StringPart::Cmd(CmdInterp { cmd, already_complete: true, })); } else if b.is_completion() { p.push(StringPart::Cmd(CmdInterp { cmd, already_complete: false, })) } else { return Err(ParseError::Expected(')')); } } x => { add_char(p, b'$'); add_char(p, x); } } } else if delim.is_none() && x == b'~' && p.is_empty() && (!b.has() || b.peek().is_ascii_whitespace() || b.peek() == b'/') { p.push(StringPart::Var(Var { name: VarName { name: b"HOME".to_vec(), }, default: None, already_complete: true, })); b.highlight_from(begin, OtherHighlights::Variable); } else { add_char(p, x); } } if !delim.is_none() { b.highlight_from(begin, OtherHighlights::String); } } if already_parsed { Ok(Self { parts, delim: last_delim, }) } else { Err(ParseError::NotAString) } } } impl Parse for Vec { fn parse(b: &mut Cursor<'_>) -> Result { let mut strings = Vec::new(); loop { match ExpString::parse(b) { Ok(s) => strings.push(s), Err(ParseError::Eof) => break, Err(ParseError::NotAString) => break, Err(e) => Err(e)?, } } Ok(strings) } } #[derive(Debug, Clone, PartialEq)] pub struct Command { pub cmd: T::Str, pub args: Vec, } impl Command { fn full_alias_expansion(&mut self, e: &mut E) -> Res<(), E::Error> { self.args.reverse(); let mut age = None; while self.cmd.parts.len() == 1 && let StringPart::Boring(s) = &self.cmd.parts[0] { if let Some((new_age, exp)) = e.expand_alias(s, age.take())? { age = Some(new_age); self.cmd = exp.first().unwrap().clone(); for e in exp.into_iter().skip(1).rev() { self.args.push(e); } } else { break; } } self.args.reverse(); Ok(()) } fn expand(mut self, e: &mut E) -> Res, E::Error> { self.full_alias_expansion(e)?; let cmd = self.cmd.expand(e)?; let mut args = Vec::with_capacity(self.args.len()); for arg in self.args.into_iter() { args.push(arg.expand(e)?); } Ok(Command { cmd, args }) } } #[allow(unused)] #[derive(Debug, PartialEq)] pub enum ParseError { /// "clean" EOF, i.e. not in the middle of something Eof, /// "unclean" EOF, i.e. EOF after beginning a quoted string Incomplete, ExpectedAlphabetic, Unknown(u8), Expected(char), NotAString, NotAFunDecl, NotAVarAssign, NotHexDigit, NotABlock, NotAnIf, NotAWhile, ExpectedKeyword(Keyword), } type Result = std::result::Result; pub fn do_parse(x: &[u8]) -> Res, (ParseError, &[u8])> { let mut c = Cursor::new(x, ParseMode::Command); match Ast::parse(&mut c) { Ok(ast) => Ok(ast), Err(e) => Err((e, c.buf)), } } #[derive(Debug, PartialEq, Clone)] pub enum CompletionKind { Command, PathCommand, Argument, Variable, None, } pub struct CompletionContext { pub kind: CompletionKind, pub partial: BString, pub delim: StringDelimiter, } impl CompletionContext { pub fn none() -> Self { Self { kind: CompletionKind::None, partial: BString::new(), delim: StringDelimiter::None, } } } impl Block { fn completion(&self, e: &mut E) -> CompletionContext { if let Some(cmd) = self.commands.last() { cmd.completion(e) } else { CompletionContext::none() } } fn empty() -> Self { Self { commands: Vec::with_capacity(0), finished_parsing: true, } } } impl Ast { fn completion(&self, e: &mut E) -> CompletionContext { match self { Ast::FunDecl(fd) => fd.body.completion(e), Ast::VarAssign(va) => va.val.completion(e, CompletionKind::Argument), Ast::Pipes(p) => p.completion(e), Ast::If(i) => i.completion(e), Ast::While(_) => todo!(), Ast::Case(_) => todo!(), } } } impl If { fn completion(&self, e: &mut E) -> CompletionContext { match self.parse_progress { IfParseProgress::Condition => self.condition.completion(e), IfParseProgress::TrueBlock => self.true_block.completion(e), IfParseProgress::FalseBlock => self.false_block.completion(e), IfParseProgress::Done => CompletionContext::none(), } } } impl ExpString { fn completion(&self, e: &mut E, mut kind: CompletionKind) -> CompletionContext { if let Some(StringPart::Var(var)) = self.parts.last() && !var.already_complete { CompletionContext { kind: CompletionKind::Variable, partial: var.name.name.clone(), delim: self.delim.clone(), } } else if let Some(StringPart::Cmd(cmd)) = self.parts.last() && !cmd.already_complete { cmd.cmd.completion(e) } else if let Ok(s) = self.clone().expand(e) { if s.contains(&b'/') && kind == CompletionKind::Command { kind = CompletionKind::PathCommand; } CompletionContext { kind, partial: s, delim: self.delim.clone(), } } else { CompletionContext::none() } } } impl Pipes { fn completion(&self, e: &mut E) -> CompletionContext { let Some(cmd) = self.cmds.last() else { return CompletionContext::none(); }; if let Some(arg) = cmd.args.last() { arg.completion(e, CompletionKind::Argument) } else { cmd.cmd.completion(e, CompletionKind::Command) } } } pub fn completion_context(x: &[u8], e: &mut E) -> CompletionContext { let mut cursor = Cursor::new(x, ParseMode::Completion); let ast = Ast::parse(&mut cursor); let Ok(ast) = ast else { return CompletionContext::none(); }; if cursor.spaced { return CompletionContext::none(); } ast.completion(e) } pub trait Parse: Sized { fn parse(b: &mut Cursor<'_>) -> Result; fn parse_from_bytes(x: &[u8]) -> Result { let mut c = Cursor::new(x, ParseMode::Command); let parsed = Self::parse(&mut c)?; if c.has() { return Err(ParseError::Unknown(c.buf[0])); } Ok(parsed) } } #[derive(Copy, Clone)] pub enum ParseMode { Command, Completion, } #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum HighlightKind { None, Keyword(Keyword), Other(OtherHighlights), } impl From for HighlightKind { fn from(value: Keyword) -> Self { Self::Keyword(value) } } impl From for HighlightKind { fn from(value: OtherHighlights) -> Self { Self::Other(value) } } #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Variants)] pub enum OtherHighlights { String, Variable, Regex, RegexSymbol, Escapes, SyntaxError, } impl OtherHighlights { pub fn identifier(&self) -> &bstr { match self { OtherHighlights::String => b"string", OtherHighlights::Variable => b"var", OtherHighlights::Escapes => b"escape", OtherHighlights::Regex => b"regex", OtherHighlights::RegexSymbol => b"regexsym", OtherHighlights::SyntaxError => b"syntax_error", } } } impl HighlightKind { /// all highlight kind variants *except* None pub fn variants() -> impl Iterator { let a = Keyword::VARIANTS .iter() .cloned() .map(HighlightKind::Keyword); let b = OtherHighlights::VARIANTS .iter() .cloned() .map(HighlightKind::Other); a.chain(b) } /// an unique identifier such that we can refer to that in the builtin `pish_theme` pub fn identifier(&self) -> &bstr { match self { HighlightKind::None => b"default", HighlightKind::Keyword(keyword) => keyword.identifier(), HighlightKind::Other(other) => other.identifier(), } } pub fn from_identifier(ident: &bstr) -> Vec { match ident { b"keywords" => { return Keyword::VARIANTS .iter() .cloned() .map(HighlightKind::Keyword) .collect(); } b"braces" => { return vec![ HighlightKind::Keyword(Keyword::OpenBrace), HighlightKind::Keyword(Keyword::CloseBrace), ]; } b"all" | b"everything" => return Self::variants().collect(), _ => (), } Self::variants() .filter(|x| x.identifier() == ident) .collect() } pub fn all_identifiers() -> Vec { let kw = Keyword::VARIANTS.iter().map(Keyword::identifier); let ot = OtherHighlights::VARIANTS .iter() .map(OtherHighlights::identifier); let groups = [&b"keywords"[..], b"braces", b"all", b"everything"]; kw.chain(ot) .chain(groups) .map(|ident| ident.to_vec()) .collect() } } #[test] fn no_two_highlight_kinds_share_an_identifier() { use std::collections::HashSet; let unique_identifiers: HashSet = HighlightKind::variants() .map(|x| x.identifier().to_vec()) .collect(); assert_eq!(unique_identifiers.len(), HighlightKind::variants().count()); } pub struct Highlight { pub span: span::Span, pub kind: HighlightKind, } pub struct Cursor<'a> { buf: &'a [u8], mode: ParseMode, /// if the last byte that was consumed was whitespace or part of a word spaced: bool, pub backtrace: bool, pub highlights: Vec, file: span::FileId, buf_start: u64, buf_len: u32, } #[derive(Default)] struct SpaceStats { space: u32, tab: u32, lf: u32, cr: u32, } impl<'a> Cursor<'a> { pub fn new(buf: &'a [u8], mode: ParseMode) -> Self { assert!( buf.len() < u32::MAX as usize, "cannot support larger parse buffers for now - what are you even doing." ); Self { buf, mode, spaced: false, backtrace: false, highlights: Vec::new(), file: span::FileId::new(), buf_start: buf.as_ptr() as u64, buf_len: buf.len() as u32, } } pub fn remaining(&self) -> &[u8] { self.buf } // non empty fn has(&self) -> bool { !self.buf.is_empty() } fn is_empty(&self) -> bool { self.buf.is_empty() } fn bt(&self, word: &str) { if self.backtrace { let bt = std::backtrace::Backtrace::capture(); let bt = format!("{bt}"); if self.buf.is_empty() { println!("{word} \r"); } else { println!("{word} {}\r", self.buf[0] as char); } for l in bt.lines().skip(4).take(8) { println!("{l}\r"); } println!("\r"); } } fn peek(&self) -> u8 { self.bt("peek"); self.buf[0] } fn adv(&mut self) -> u8 { self.bt("adv"); let out = self.buf[0]; self.buf = &self.buf[1..]; self.spaced = false; out } fn advance(&mut self, amt: usize) -> &[u8] { self.bt(&format!("adv({amt})")); let out = &self.buf[..amt]; self.buf = &self.buf[amt..]; self.spaced = false; out } fn peek_space(&self) -> bool { if self.buf.is_empty() { return false; } matches!(self.buf[0], b' ' | b'\t' | b'\n' | b'\r') } fn peek_comment(&self) -> bool { self.has() && self.peek() == b'#' } fn consume_comment(&mut self) { assert_eq!(self.adv(), b'#'); while self.has() && self.peek() != b'\n' { self.adv(); } } fn spaces(&mut self) { while { if self.peek_comment() { self.consume_comment(); true } else { self.peek_space() } } { self.adv(); self.spaced = true; } } fn loc_u32(&self) -> u32 { let now_loc = self.buf.as_ptr() as u64; assert!(now_loc >= self.buf_start, "not the original buffer"); let relative_loc = (now_loc - self.buf_start) as u32; assert!(relative_loc <= self.buf_len, "not the original buffer"); relative_loc } fn loc(&self) -> span::SpanFrom { self.file.from(self.loc_u32()) } fn highlight_from(&mut self, from: span::SpanFrom, kind: impl Into) { self.highlight(from.to(self.loc_u32()), kind) } fn highlight(&mut self, span: span::Span, kind: impl Into) { self.highlights.push(Highlight { span, kind: kind.into(), }); } fn spaces_stats(&mut self) -> SpaceStats { let mut stats = SpaceStats::default(); while self.has() && b" \t\n\r".contains(&self.buf[0]) { match self.buf[0] { b' ' => stats.space += 1, b'\t' => stats.tab += 1, b'\n' => stats.lf += 1, b'\r' => stats.cr += 1, _ => unreachable!(), } self.adv(); } stats } /// returns true if the next thing in the buffer is whitespace (including at least one newline) /// /// does not modify the buffer fn whitespace_newline(&mut self) -> bool { let x = self.buf; let s = self.spaces_stats(); self.buf = x; s.lf > 0 } fn is_completion(&self) -> bool { matches!(self.mode, ParseMode::Completion) } fn parse(&mut self) -> Result { T::parse(self) } fn expect_keyword(&mut self, kw: Keyword) -> Result<()> { if !self.is_completion() { return self.consume_keyword(kw); } // very lax parsing that consumes everything in its way until the keyword arrives self.spaces(); let begin = self.loc(); loop { let end = self.loc_u32(); if self.consume_keyword(kw).is_ok() { if end > begin.start { self.highlight(begin.to(end), OtherHighlights::SyntaxError); } return Ok(()); } else if self.has() { self.adv(); } else { break; } } self.highlight_from(begin, OtherHighlights::SyntaxError); Err(ParseError::ExpectedKeyword(kw)) } fn consume_keyword(&mut self, kw: Keyword) -> Result<()> { let bytes = kw.as_bytes(); if self.backtrace { self.bt(&format!("keyword {kw:?}")); } self.spaces(); let span = self.loc().with_len(bytes.len() as u32); let result = if self.buf.starts_with(bytes) { if kw.requires_space() { if self.buf.len() > bytes.len() && self.buf[bytes.len()].is_ascii_whitespace() { self.buf = &self.buf[bytes.len() + 1..]; self.spaces(); Ok(()) } else if self.is_completion() && self.buf.len() == bytes.len() { self.buf = &self.buf[bytes.len()..]; Ok(()) } else { Err(ParseError::ExpectedKeyword(kw)) } } else { self.buf = &self.buf[bytes.len()..]; self.spaces(); Ok(()) } } else { Err(ParseError::ExpectedKeyword(kw)) }; if result.is_ok() { self.highlight(span, kw); } result } } #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Variants)] pub enum Keyword { If, While, Else, Elif, OpenBrace, CloseBrace, Case, Fun, Set, } impl Keyword { fn as_bytes(&self) -> &bstr { match self { Keyword::If => b"if", Keyword::While => b"while", Keyword::Elif => b"elif", Keyword::Else => b"else", Keyword::OpenBrace => b"{", Keyword::CloseBrace => b"}", Keyword::Case => b"case", Keyword::Fun => b"fun", Keyword::Set => b"set", } } fn requires_space(&self) -> bool { match self { Keyword::If => true, Keyword::While => true, Keyword::Elif => true, Keyword::Else => false, Keyword::OpenBrace => false, Keyword::CloseBrace => false, Keyword::Case => true, Keyword::Fun => true, Keyword::Set => true, } } pub fn identifier(&self) -> &bstr { self.as_bytes() } } impl If { fn parse_internal(b: &mut Cursor<'_>, first_keyword: Keyword) -> Result { b.consume_keyword(first_keyword)?; let mut res = If { condition: Pipes::parse(b)?, true_block: Block::empty(), false_block: Block::empty(), parse_progress: IfParseProgress::Condition, }; b.spaces(); if b.is_completion() && b.is_empty() { return Ok(res); } res.true_block = Block::parse(b)?; if res.true_block.finished_parsing { res.parse_progress = IfParseProgress::Done; } else { res.parse_progress = IfParseProgress::TrueBlock; } b.spaces(); if b.is_completion() && b.is_empty() { return Ok(res); } res.false_block = if b.consume_keyword(Keyword::Else).is_ok() { Block::parse(b)? } else if let Ok(elif) = Self::parse_internal(b, Keyword::Elif) { Block { finished_parsing: elif.parse_progress.is_done(), commands: vec![Ast::If(elif)], } } else { Block::empty() }; if res.false_block.finished_parsing { res.parse_progress = IfParseProgress::Done; } else { res.parse_progress = IfParseProgress::FalseBlock; } Ok(res) } } impl Parse for If { fn parse(b: &mut Cursor<'_>) -> Result { Self::parse_internal(b, Keyword::If) } } impl Parse for While { fn parse(b: &mut Cursor<'_>) -> Result { b.consume_keyword(Keyword::While)?; let condition = Pipes::parse(b)?; let block = Block::parse(b)?; Ok(Self { condition, block }) } } impl Ast { fn parse_inner(b: &mut Cursor<'_>) -> Result { b.spaces(); let orig_len = b.buf.len(); let x = If::parse(b); if let Ok(cond) = x { return Ok(Self::If(cond)); } else if b.buf.len() != orig_len { x?; } let orig_len = b.buf.len(); let x = While::parse(b); if let Ok(x) = x { return Ok(Self::While(x)); } else if b.buf.len() != orig_len { x?; } let orig_len = b.buf.len(); let x = VarAssign::parse(b); if let Ok(va) = x { return Ok(Self::VarAssign(va)); } else if b.buf.len() != orig_len { x?; } let orig_len = b.buf.len(); let x = FunDecl::parse(b); if let Ok(fd) = x { return Ok(Self::FunDecl(fd)); } else if b.buf.len() != orig_len { x?; } let orig_len = b.buf.len(); let x = Case::parse(b); if let Ok(c) = x { return Ok(Self::Case(c)); } else if b.buf.len() != orig_len { x?; } Ok(Self::Pipes(b.parse()?)) } } impl Parse for Ast { fn parse(b: &mut Cursor<'_>) -> Result { let begin = b.loc(); let result = Ast::parse_inner(b); if result.is_ok() { b.highlight_from(begin, HighlightKind::None); } result } } impl Parse for Command { fn parse(b: &mut Cursor<'_>) -> Result { let path: ExpString = b.parse()?; let mut args = Vec::new(); while !b.whitespace_newline() { match ExpString::parse(b) { Ok(arg) => args.push(arg), Err(ParseError::NotAString) => break, Err(e) => Err(e)?, } } Ok(Self { cmd: path, args }) } } impl Parse for Pipes { fn parse(b: &mut Cursor<'_>) -> Result { let mut cmds: Vec> = vec![b.parse()?]; loop { let space_stats = b.spaces_stats(); if b.is_empty() { return Ok(Pipes { cmds }); } let c = b.peek(); if c == b'|' { b.adv(); cmds.push(b.parse()?); } else if c == b';' { while b.has() && b.peek() == b';' { b.adv(); b.spaces(); } return Ok(Pipes { cmds }); } else if space_stats.lf > 0 || is_symbol(c) { return Ok(Pipes { cmds }); } else { Err(ParseError::Unknown(c))?; } } } } #[derive(Debug, Clone, PartialEq)] pub struct CaseBranch { pub pattern: regex::Pattern, pub block: Block, } #[derive(Debug, Clone, PartialEq)] pub struct Case { pub discriminant: T::Str, pub branches: Vec, } impl CmdDisplay for CaseBranch { fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> { write!(w, "case_branch(\"{:?}\", ", self.pattern)?; self.block.cdisplay(w)?; write!(w, ")") } } impl Case { fn expand(self, e: &mut E) -> Res, E::Error> { Ok(Case { discriminant: self.discriminant.expand(e)?, branches: self.branches, }) } } impl Parse for CaseBranch { fn parse(b: &mut Cursor<'_>) -> Result { b.spaces(); let pattern = regex::Pattern::parse(b)?; let block = Block::parse(b)?; Ok(Self { pattern, block }) } } impl Parse for Case { fn parse(b: &mut Cursor<'_>) -> Result { b.consume_keyword(Keyword::Case)?; let discriminant = ExpString::parse(b)?; b.consume_keyword(Keyword::OpenBrace)?; let mut branches = Vec::new(); loop { b.spaces(); if b.consume_keyword(Keyword::CloseBrace).is_ok() { break; } branches.push(CaseBranch::parse(b)?); } Ok(Self { discriminant, branches, }) } }