aboutsummaryrefslogtreecommitdiffstats
path: root/src/parse
diff options
context:
space:
mode:
authorJonas Maier <>2026-03-07 11:35:38 +0100
committerJonas Maier <>2026-03-07 11:35:38 +0100
commitc36bf58bd0d3d8d2b89211c0bfccab68dad53d66 (patch)
tree4c7bd138a280aed1120f85e4110beb51a8356b24 /src/parse
parentb3ea0f7580a41f0c7769ba610a6219a5fc7c9eb6 (diff)
downloadpish-c36bf58bd0d3d8d2b89211c0bfccab68dad53d66.tar.gz
finish parsing stuff, add first parsing test
Diffstat (limited to 'src/parse')
-rw-r--r--src/parse/mod.rs777
-rw-r--r--src/parse/test.rs16
2 files changed, 793 insertions, 0 deletions
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
new file mode 100644
index 0000000..4f38f9b
--- /dev/null
+++ b/src/parse/mod.rs
@@ -0,0 +1,777 @@
+use crate::BString;
+
+#[cfg(test)]
+mod test;
+
+pub trait Stage : PartialEq {
+ type Str: std::fmt::Debug + Clone + PartialEq;
+}
+
+pub trait CmdDisplay {
+ fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()>;
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct PreExpansion;
+#[derive(Debug, Clone, PartialEq)]
+pub struct PostExpansion;
+
+impl Stage for PreExpansion {
+ type Str = ExpString;
+}
+
+impl Stage for PostExpansion {
+ type Str = BString;
+}
+
+type Res<T, E> = std::result::Result<T, E>;
+
+pub trait Expander {
+ type Error;
+ fn expand_var(&mut self, v: BString) -> Res<BString, Self::Error>;
+ fn expand_cmd(&mut self, c: Ast<PostExpansion>) -> Res<BString, Self::Error>;
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum Ast<T: Stage> {
+ FunDecl(FunDecl<T>),
+ VarAssign(VarAssign<T>),
+ Pipes(Pipes<T>),
+}
+
+pub fn decl(name: ExpString, body: Ast<PreExpansion>) -> Ast<PreExpansion> {
+ Ast::FunDecl(FunDecl {
+ name: name,
+ body: FunBody {
+ body: Box::new(body),
+ },
+ })
+}
+
+pub fn assign(var: ExpString, val: ExpString) -> Ast<PreExpansion> {
+ Ast::VarAssign(VarAssign { var, val })
+}
+
+pub fn pipes<const N: usize>(cmds: [Command<PreExpansion>; N]) -> Ast<PreExpansion> {
+ Ast::Pipes(Pipes {
+ cmds: cmds.to_vec(),
+ })
+}
+
+pub fn estr(x: &[u8]) -> ExpString {
+ ExpString {
+ parts: vec![StringPart::Boring(x.to_vec())],
+ }
+}
+
+pub fn str<const N: usize>(parts: [StringPart; N]) -> ExpString {
+ ExpString {
+ parts: parts.to_vec(),
+ }
+}
+
+pub fn plain(x: &[u8]) -> StringPart {
+ StringPart::Boring(x.to_vec())
+}
+
+pub fn var(x: &[u8]) -> StringPart {
+ StringPart::Var(VarName { name: x.to_vec() })
+}
+
+pub fn cmdp(x: Ast<PreExpansion>) -> StringPart {
+ StringPart::Cmd(x)
+}
+
+pub fn cmd<const N: usize>(x: [ExpString; N]) -> Command<PreExpansion> {
+ Command {
+ cmd: x[0].clone(),
+ args: x[1..].to_vec(),
+ }
+}
+
+impl CmdDisplay for Ast<PreExpansion> {
+ fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> {
+ match self {
+ Ast::FunDecl(fun_decl) => {
+ write!(w, "decl(")?;
+ fun_decl.name.cdisplay(w)?;
+ write!(w, ", ")?;
+ fun_decl.body.body.cdisplay(w)?;
+ write!(w, ")")?;
+ }
+ Ast::VarAssign(var_assign) => {
+ write!(w, "assign(")?;
+ var_assign.var.cdisplay(w)?;
+ write!(w, ", ")?;
+ var_assign.val.cdisplay(w)?;
+ write!(w, ")")?;
+ }
+ Ast::Pipes(pipes) => {
+ write!(w, "pipes([")?;
+ for cmd in pipes.cmds.iter() {
+ cmd.cdisplay(w)?;
+ write!(w, ",")?;
+ }
+ write!(w, "])")?;
+ }
+ }
+ Ok(())
+ }
+}
+
+impl CmdDisplay for ExpString {
+ fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> {
+ if self.parts.len() == 1 && self.parts[0].is_boring() {
+ write!(
+ w,
+ "estr(b\"{}\")",
+ self.parts[0].clone().unwrap_boring().escape_ascii()
+ )
+ } else {
+ write!(w, "str([")?;
+ let mut first = true;
+ for part in self.parts.iter() {
+ if !first {
+ write!(w, ",")?;
+ }
+ first = false;
+ part.cdisplay(w)?;
+ }
+ write!(w, "])")
+ }
+ }
+}
+
+impl CmdDisplay for StringPart {
+ fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> {
+ match self {
+ StringPart::Boring(items) => {
+ write!(w, "plain(")?;
+ items.as_slice().cdisplay(w)?;
+ write!(w, ")")
+ }
+ StringPart::Var(var_name) => {
+ write!(w, "var(")?;
+ var_name.name.as_slice().cdisplay(w)?;
+ write!(w, ")")
+ },
+ StringPart::Cmd(ast) => {
+ write!(w, "cmdp(")?;
+ ast.cdisplay(w)?;
+ write!(w, ")")
+ }
+ }
+ }
+}
+
+impl CmdDisplay for Command<PreExpansion> {
+ fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> {
+ write!(w, "cmd([")?;
+ self.cmd.cdisplay(w)?;
+ for arg in self.args.iter() {
+ write!(w, ", ")?;
+ arg.cdisplay(w)?;
+ }
+ write!(w, "])")
+ }
+}
+
+impl CmdDisplay for &[u8] {
+ fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> {
+ write!(w, "b\"")?;
+ write!(w, "{}", self.escape_ascii())?;
+ write!(w, "\"")
+ }
+}
+
+impl Ast<PreExpansion> {
+ pub fn expand<E: Expander>(self, e: &mut E) -> Res<Ast<PostExpansion>, E::Error> {
+ match self {
+ Ast::VarAssign(va) => Ok(Ast::VarAssign(va.expand(e)?)),
+ Ast::Pipes(pipes) => Ok(Ast::Pipes(pipes.expand(e)?)),
+ Ast::FunDecl(fd) => Ok(Ast::FunDecl(fd.expand(e)?)),
+ }
+ }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct FunBody {
+ pub body: Box<Ast<PreExpansion>>,
+}
+
+impl Parse for FunBody {
+ fn parse(b: &mut Cursor<'_>) -> Result<Self> {
+ b.spaces();
+
+ if b.is_empty() {
+ return Err(ParseError::Eof);
+ }
+
+ if b.peek() != b'{' {
+ return Err(ParseError::Expected('{'));
+ }
+
+ b.adv();
+ let body = Box::new(Ast::parse(b)?);
+ if b.is_empty() {
+ if b.is_completion() {
+ Ok(Self { body })
+ } else {
+ Err(ParseError::Eof)
+ }
+ } else if b.peek() == b'}' {
+ Ok(Self { body })
+ } else {
+ Err(ParseError::Expected('}'))
+ }
+ }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct FunDecl<S: Stage> {
+ pub name: S::Str,
+ pub body: FunBody,
+}
+
+impl Parse for FunDecl<PreExpansion> {
+ fn parse(b: &mut Cursor<'_>) -> Result<Self> {
+ if !b.buf.starts_with(b"fun ") && !b.buf.starts_with(b"fun\t") {
+ return Err(ParseError::NotAFunDecl);
+ }
+ b.advance(4);
+ b.spaces();
+ let name = ExpString::parse(b)?;
+ let body = FunBody::parse(b)?;
+ Ok(Self { name, body })
+ }
+}
+
+impl FunDecl<PreExpansion> {
+ fn expand<E: Expander>(self, e: &mut E) -> Res<FunDecl<PostExpansion>, E::Error> {
+ Ok(FunDecl {
+ name: self.name.expand(e)?,
+ body: self.body,
+ })
+ }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct VarAssign<S: Stage> {
+ pub var: S::Str,
+ pub val: S::Str,
+}
+
+impl Parse for VarAssign<PreExpansion> {
+ fn parse(b: &mut Cursor<'_>) -> Result<Self> {
+ if !b.buf.starts_with(b"set ") && !b.buf.starts_with(b"set\t") {
+ return Err(ParseError::NotAVarAssign);
+ }
+ b.advance(4);
+ b.spaces();
+ let var = ExpString::parse(b)?;
+ b.spaces();
+
+ if b.is_empty() {
+ return Err(ParseError::Eof);
+ }
+ let eq = b.adv();
+ if eq != b'=' {
+ return Err(ParseError::Expected('='));
+ }
+ let val = ExpString::parse(b)?;
+
+ Ok(Self { var, val })
+ }
+}
+
+impl VarAssign<PreExpansion> {
+ fn expand<E: Expander>(self, e: &mut E) -> Res<VarAssign<PostExpansion>, E::Error> {
+ Ok(VarAssign {
+ var: self.var.expand(e)?,
+ val: self.val.expand(e)?,
+ })
+ }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct Pipes<T: Stage> {
+ pub cmds: Vec<Command<T>>,
+}
+
+impl Pipes<PreExpansion> {
+ fn expand<E: Expander>(self, e: &mut E) -> Res<Pipes<PostExpansion>, E::Error> {
+ let mut cmds = Vec::with_capacity(self.cmds.len());
+ for cmd in self.cmds.into_iter() {
+ cmds.push(cmd.expand(e)?);
+ }
+ Ok(Pipes { cmds })
+ }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum StringPart {
+ Boring(BString),
+ Var(VarName),
+ Cmd(Ast<PreExpansion>),
+}
+
+impl StringPart {
+ pub fn is_boring(&self) -> bool {
+ matches!(self, StringPart::Boring(..))
+ }
+ pub fn unwrap_boring(self) -> BString {
+ match self {
+ StringPart::Boring(items) => items,
+ _ => panic!("unwrap on non-boring value"),
+ }
+ }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+/// `"hi ${var} $(cmd) "` gets mapped to `[Boring("hi "), Var("var"), String(" "), Cmd(...), Boring(" ")]`
+pub struct ExpString {
+ parts: Vec<StringPart>,
+}
+
+impl ExpString {
+ fn expand<E: Expander>(self, e: &mut E) -> Res<BString, E::Error> {
+ let mut out = BString::new();
+ for part in self.parts.into_iter() {
+ let mut x = match part {
+ StringPart::Boring(items) => items,
+ StringPart::Var(v) => e.expand_var(v.name)?,
+ StringPart::Cmd(ast) => {
+ let exp = ast.expand(e)?;
+ e.expand_cmd(exp)?
+ }
+ };
+ out.append(&mut x);
+ }
+ Ok(out)
+ }
+}
+
+fn is_symbol(x: u8) -> bool {
+ match x {
+ b'|' | b'{' | b'}' | b'$' | b'(' | b')' | b'\'' | b'"' => true,
+ _ => false,
+ }
+}
+
+fn is_var_begin(x: u8) -> bool {
+ x.is_ascii_alphabetic()
+}
+fn is_var_name(x: u8) -> bool {
+ x.is_ascii_alphanumeric() || x == b'_'
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct VarName {
+ name: BString,
+}
+
+impl Parse for VarName {
+ fn parse(b: &mut Cursor<'_>) -> Result<Self> {
+ if b.is_empty() {
+ return Err(ParseError::Eof);
+ }
+
+ if !is_var_begin(b.peek()) {
+ return Err(ParseError::ExpectedAlphabetic);
+ }
+
+ let mut name = BString::new();
+ while b.has() {
+ let x = b.peek();
+ if is_var_name(x) {
+ b.adv();
+ name.push(x)
+ } else {
+ break;
+ }
+ }
+
+ Ok(Self { name })
+ }
+}
+
+impl Parse for ExpString {
+ fn parse(b: &mut Cursor<'_>) -> Result<Self> {
+ b.spaces();
+ if b.is_empty() {
+ return Err(ParseError::NotAString);
+ }
+
+ let mut delim = b.peek();
+ if delim == b'\'' || delim == b'"' {
+ b.adv();
+ } else if is_symbol(delim) && delim != b'$' {
+ return Err(ParseError::NotAString);
+ } else {
+ delim = b' ';
+ }
+
+ let mut parts = Vec::new();
+ let p = &mut parts;
+ let mut escaping = false;
+
+ let add_char = |p: &mut Vec<StringPart>, x: u8| match p.last_mut() {
+ Some(StringPart::Boring(v)) => v.push(x),
+ _ => p.push(StringPart::Boring(vec![x])),
+ };
+
+ while b.has() {
+ let x = b.peek();
+
+ if escaping {
+ add_char(p, x);
+ escaping = false;
+ b.adv();
+ continue;
+ }
+
+ if x == delim || (b.peek_space() && delim == b' ') {
+ if delim != b' ' {
+ b.adv();
+ }
+ return Ok(Self { parts });
+ }
+
+ if delim == b' ' && is_symbol(x) && x != b'$' {
+ return Ok(Self { parts });
+ }
+
+ b.adv();
+
+ if delim == b'\'' {
+ // no fancy stuff here
+ add_char(p, x);
+ continue;
+ }
+
+ if x == b'\\' {
+ escaping = true;
+ continue;
+ }
+
+ if x == b'$' {
+ if !b.has() {
+ add_char(p, x);
+ continue;
+ }
+
+ let x = b.peek();
+
+ if x == b'?' || x == b'!' {
+ b.adv();
+ p.push(StringPart::Var(VarName { name: vec![x] }))
+ } else if is_var_begin(x) {
+ let v = VarName::parse(b)?;
+ p.push(StringPart::Var(v));
+ } else if x == b'{' {
+ b.adv();
+ let v = VarName::parse(b)?;
+
+ if !b.has() {
+ return Err(ParseError::Eof);
+ } else if b.peek() == b':' {
+ todo!(": in var expansion")
+ }
+
+ if !b.has() {
+ return Err(ParseError::Eof);
+ } else if b.peek() != b'}' {
+ return Err(ParseError::Incomplete);
+ }
+
+ b.adv();
+ p.push(StringPart::Var(v));
+ } else if x == b'(' {
+ b.adv();
+ let cmd = Ast::parse(b)?;
+ b.spaces();
+ if b.is_empty() {
+ return Err(ParseError::Eof);
+ } else if b.peek() == b')' {
+ p.push(StringPart::Cmd(cmd));
+ } else {
+ return Err(ParseError::Expected(')'));
+ }
+ } else {
+ // doesn't seem to be a variable or expansion, just add $ back into the string
+ add_char(p, b'$');
+ continue;
+ }
+
+ continue;
+ }
+
+ add_char(p, x);
+ }
+
+ if b.is_completion() || delim == b' ' {
+ Ok(Self { parts })
+ } else {
+ Err(ParseError::Eof)
+ }
+ }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct Command<T: Stage> {
+ pub cmd: T::Str,
+ pub args: Vec<T::Str>,
+}
+
+impl Command<PreExpansion> {
+ fn expand<E: Expander>(self, e: &mut E) -> Res<Command<PostExpansion>, E::Error> {
+ let cmd = self.cmd.expand(e)?;
+ let mut args = Vec::with_capacity(self.args.len());
+ for arg in self.args.into_iter() {
+ args.push(arg.expand(e)?);
+ }
+ Ok(Command { cmd, args })
+ }
+}
+
+#[allow(unused)]
+#[derive(Debug)]
+pub enum ParseError {
+ /// "clean" EOF, i.e. not in the middle of something
+ Eof,
+
+ /// "unclean" EOF, i.e. EOF after beginning a quoted string
+ Incomplete,
+
+ ExpectedAlphabetic,
+
+ Unknown(u8),
+
+ Expected(char),
+
+ NotAString,
+
+ NotAFunDecl,
+
+ NotAVarAssign,
+}
+
+type Result<T> = std::result::Result<T, ParseError>;
+
+pub fn do_parse(x: &[u8]) -> Res<Ast<PreExpansion>, (ParseError, &[u8])> {
+ let mut c = Cursor::new(x, ParseMode::Command);
+ match Ast::parse(&mut c) {
+ Ok(ast) => Ok(ast),
+ Err(e) => Err((e, c.buf)),
+ }
+}
+
+pub enum CompletionKind {
+ Command,
+ Argument,
+ None,
+}
+
+pub struct CompletionContext {
+ pub kind: CompletionKind,
+ pub partial: BString,
+}
+
+impl CompletionContext {
+ pub fn none() -> Self {
+ Self {
+ kind: CompletionKind::None,
+ partial: BString::new(),
+ }
+ }
+}
+
+fn expstr_cc(s: &ExpString, kind: CompletionKind) -> CompletionContext {
+ if s.parts.len() > 1 || !s.parts[0].is_boring() {
+ CompletionContext::none()
+ } else {
+ CompletionContext {
+ kind,
+ partial: s.parts[0].clone().unwrap_boring().clone(),
+ }
+ }
+}
+
+pub fn completion_context<'a>(x: &'a [u8]) -> CompletionContext {
+ let mut cursor = Cursor::new(x, ParseMode::Completion);
+ let ast = Ast::parse(&mut cursor);
+ match ast {
+ Ok(Ast::Pipes(pipes)) if cursor.spaced == false => {
+ if let Some(cmd) = pipes.cmds.last() {
+ if cmd.args.is_empty() {
+ expstr_cc(&cmd.cmd, CompletionKind::Command)
+ } else {
+ expstr_cc(&cmd.args[cmd.args.len() - 1], CompletionKind::Argument)
+ }
+ } else {
+ CompletionContext::none()
+ }
+ }
+ _ => CompletionContext::none(),
+ }
+}
+
+trait Parse: Sized {
+ fn parse(b: &mut Cursor<'_>) -> Result<Self>;
+}
+
+enum ParseMode {
+ Command,
+ Completion,
+}
+
+struct Cursor<'a> {
+ buf: &'a [u8],
+ mode: ParseMode,
+
+ /// if the last byte that was consumed was whitespace or part of a word
+ spaced: bool,
+
+ backtrace: bool,
+}
+
+impl<'a> Cursor<'a> {
+ fn new(buf: &'a [u8], mode: ParseMode) -> Self {
+ Self {
+ buf,
+ mode,
+ spaced: false,
+ backtrace: false,
+ }
+ }
+
+ // non empty
+ fn has(&self) -> bool {
+ !self.buf.is_empty()
+ }
+
+ fn is_empty(&self) -> bool {
+ self.buf.is_empty()
+ }
+
+ fn bt(&self, word: &str) {
+ if self.backtrace {
+ let bt = std::backtrace::Backtrace::capture();
+ let bt = format!("{bt}");
+ println!("{word} {}\r", self.buf[0] as char);
+ for l in bt.lines().skip(4).take(2) {
+ println!("{l}\r");
+ }
+ println!("\r");
+ }
+ }
+
+ fn peek(&self) -> u8 {
+ self.bt("peek");
+ self.buf[0]
+ }
+
+ fn adv(&mut self) -> u8 {
+ self.bt("adv");
+ let out = self.buf[0];
+ self.buf = &self.buf[1..];
+ self.spaced = false;
+ out
+ }
+
+ fn advance(&mut self, amt: usize) -> &[u8] {
+ self.bt(&format!("adv({amt})"));
+ let out = &self.buf[..amt];
+ self.buf = &self.buf[amt..];
+ self.spaced = false;
+ out
+ }
+
+ fn peek_space(&self) -> bool {
+ if self.buf.is_empty() {
+ return false;
+ }
+ matches!(self.buf[0], b' ' | b'\t' | b'\n' | b'\r')
+ }
+
+ fn spaces(&mut self) {
+ while self.peek_space() {
+ self.adv();
+ self.spaced = true;
+ }
+ }
+
+ fn is_completion(&self) -> bool {
+ match self.mode {
+ ParseMode::Completion => true,
+ _ => false,
+ }
+ }
+
+ fn parse<T: Parse>(&mut self) -> Result<T> {
+ T::parse(self)
+ }
+}
+
+impl Parse for Ast<PreExpansion> {
+ fn parse(b: &mut Cursor<'_>) -> Result<Self> {
+ b.spaces();
+
+ let orig_len = b.buf.len();
+ let x = VarAssign::parse(b);
+ if let Ok(va) = x {
+ return Ok(Self::VarAssign(va));
+ } else if b.buf.len() != orig_len {
+ x?;
+ }
+
+ let orig_len = b.buf.len();
+ let x = FunDecl::parse(b);
+ if let Ok(fd) = x {
+ return Ok(Self::FunDecl(fd));
+ } else if b.buf.len() != orig_len {
+ x?;
+ }
+
+ Ok(Self::Pipes(b.parse()?))
+ }
+}
+
+impl Parse for Command<PreExpansion> {
+ fn parse(b: &mut Cursor<'_>) -> Result<Self> {
+ let path: ExpString = b.parse()?;
+ let mut args = Vec::new();
+ loop {
+ match ExpString::parse(b) {
+ Ok(arg) => args.push(arg),
+ Err(ParseError::NotAString) => break,
+ Err(e) => Err(e)?,
+ }
+ }
+ let x = Ok(Self { cmd: path, args });
+ x
+ }
+}
+
+impl Parse for Pipes<PreExpansion> {
+ fn parse(b: &mut Cursor<'_>) -> Result<Self> {
+ let mut cmds: Vec<Command<PreExpansion>> = vec![b.parse()?];
+
+ loop {
+ b.spaces();
+ if b.is_empty() {
+ return Ok(Pipes { cmds });
+ }
+
+ let c = b.peek();
+ if c == b'|' {
+ b.adv();
+ cmds.push(b.parse()?);
+ } else if is_symbol(c) {
+ return Ok(Pipes { cmds });
+ } else {
+ Err(ParseError::Unknown(c))?;
+ }
+ }
+ }
+}
diff --git a/src/parse/test.rs b/src/parse/test.rs
new file mode 100644
index 0000000..6cd7793
--- /dev/null
+++ b/src/parse/test.rs
@@ -0,0 +1,16 @@
+use super::*;
+
+fn parse(x: &[u8]) -> Ast<PreExpansion> {
+ do_parse(x).unwrap()
+}
+
+#[test]
+fn command_interp() {
+ assert_eq!(
+ parse(br#"echo "$(echo hi)""#),
+ pipes([cmd([
+ estr(b"echo"),
+ str([cmdp(pipes([cmd([estr(b"echo"), estr(b"hi")]),])),])
+ ]),])
+ );
+}