aboutsummaryrefslogtreecommitdiffstats
path: root/src/parse.rs
diff options
context:
space:
mode:
authorJonas Maier <>2026-03-07 11:35:38 +0100
committerJonas Maier <>2026-03-07 11:35:38 +0100
commitc36bf58bd0d3d8d2b89211c0bfccab68dad53d66 (patch)
tree4c7bd138a280aed1120f85e4110beb51a8356b24 /src/parse.rs
parentb3ea0f7580a41f0c7769ba610a6219a5fc7c9eb6 (diff)
downloadpish-c36bf58bd0d3d8d2b89211c0bfccab68dad53d66.tar.gz
finish parsing stuff, add first parsing test
Diffstat (limited to 'src/parse.rs')
-rw-r--r--src/parse.rs769
1 files changed, 0 insertions, 769 deletions
diff --git a/src/parse.rs b/src/parse.rs
deleted file mode 100644
index 61d268d..0000000
--- a/src/parse.rs
+++ /dev/null
@@ -1,769 +0,0 @@
-use crate::BString;
-
-pub trait Stage {
- type Str: std::fmt::Debug + Clone;
-}
-
-pub trait CmdDisplay {
- fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()>;
-}
-
-#[derive(Debug, Clone)]
-pub struct PreExpansion;
-#[derive(Debug, Clone)]
-pub struct PostExpansion;
-
-impl Stage for PreExpansion {
- type Str = ExpString;
-}
-
-impl Stage for PostExpansion {
- type Str = BString;
-}
-
-type Res<T, E> = std::result::Result<T, E>;
-
-pub trait Expander {
- type Error;
- fn expand_var(&mut self, v: BString) -> Res<BString, Self::Error>;
- fn expand_cmd(&mut self, c: Ast<PostExpansion>) -> Res<BString, Self::Error>;
-}
-
-#[derive(Debug, Clone)]
-pub enum Ast<T: Stage> {
- FunDecl(FunDecl<T>),
- VarAssign(VarAssign<T>),
- Pipes(Pipes<T>),
-}
-
-pub fn decl(name: ExpString, body: Ast<PreExpansion>) -> Ast<PreExpansion> {
- Ast::FunDecl(FunDecl {
- name: name,
- body: FunBody {
- body: Box::new(body),
- },
- })
-}
-
-pub fn assign(var: ExpString, val: ExpString) -> Ast<PreExpansion> {
- Ast::VarAssign(VarAssign { var, val })
-}
-
-pub fn pipes<const N: usize>(cmds: [Command<PreExpansion>; N]) -> Ast<PreExpansion> {
- Ast::Pipes(Pipes {
- cmds: cmds.to_vec(),
- })
-}
-
-pub fn estr(x: &[u8]) -> ExpString {
- ExpString {
- parts: vec![StringPart::Boring(x.to_vec())],
- }
-}
-
-pub fn str<const N: usize>(parts: [StringPart; N]) -> ExpString {
- ExpString {
- parts: parts.to_vec(),
- }
-}
-
-pub fn plain(x: &[u8]) -> StringPart {
- StringPart::Boring(x.to_vec())
-}
-
-pub fn var(x: &[u8]) -> StringPart {
- StringPart::Var(VarName { name: x.to_vec() })
-}
-
-pub fn cmdp(x: Ast<PreExpansion>) -> StringPart {
- StringPart::Cmd(x)
-}
-
-pub fn cmd<const N: usize>(x: [ExpString; N]) -> Command<PreExpansion> {
- Command {
- cmd: x[0].clone(),
- args: x[1..].to_vec(),
- }
-}
-
-impl CmdDisplay for Ast<PreExpansion> {
- fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> {
- match self {
- Ast::FunDecl(fun_decl) => {
- write!(w, "decl(")?;
- fun_decl.name.cdisplay(w)?;
- write!(w, ", ")?;
- fun_decl.body.body.cdisplay(w)?;
- write!(w, ")")?;
- }
- Ast::VarAssign(var_assign) => {
- write!(w, "assign(")?;
- var_assign.var.cdisplay(w)?;
- write!(w, ", ")?;
- var_assign.val.cdisplay(w)?;
- write!(w, ")")?;
- }
- Ast::Pipes(pipes) => {
- write!(w, "pipes([")?;
- for cmd in pipes.cmds.iter() {
- cmd.cdisplay(w)?;
- write!(w, ",")?;
- }
- write!(w, "])")?;
- }
- }
- Ok(())
- }
-}
-
-impl CmdDisplay for ExpString {
- fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> {
- if self.parts.len() == 1 && self.parts[0].is_boring() {
- write!(
- w,
- "estr({})",
- self.parts[0].clone().unwrap_boring().escape_ascii()
- )
- } else {
- write!(w, "str([")?;
- for part in self.parts.iter() {
- part.cdisplay(w)?;
- }
- write!(w, "])")
- }
- }
-}
-
-impl CmdDisplay for StringPart {
- fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> {
- match self {
- StringPart::Boring(items) => {
- write!(w, "bstr(")?;
- items.as_slice().cdisplay(w)?;
- write!(w, ")")
- }
- StringPart::Var(var_name) => {
- write!(w, "var(")?;
- var_name.name.as_slice().cdisplay(w)?;
- write!(w, ")")
- },
- StringPart::Cmd(ast) => {
- write!(w, "cmdp(")?;
- ast.cdisplay(w)?;
- write!(w, ")")
- }
- }
- }
-}
-
-impl CmdDisplay for Command<PreExpansion> {
- fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> {
- write!(w, "cmd([")?;
- self.cmd.cdisplay(w)?;
- for arg in self.args.iter() {
- write!(w, ", ")?;
- arg.cdisplay(w)?;
- }
- write!(w, "])")
- }
-}
-
-impl CmdDisplay for &[u8] {
- fn cdisplay(&self, w: &mut dyn std::io::Write) -> std::io::Result<()> {
- write!(w, "b\"")?;
- write!(w, "{}", self.escape_ascii())?;
- write!(w, "\"")
- }
-}
-
-impl Ast<PreExpansion> {
- pub fn expand<E: Expander>(self, e: &mut E) -> Res<Ast<PostExpansion>, E::Error> {
- match self {
- Ast::VarAssign(va) => Ok(Ast::VarAssign(va.expand(e)?)),
- Ast::Pipes(pipes) => Ok(Ast::Pipes(pipes.expand(e)?)),
- Ast::FunDecl(fd) => Ok(Ast::FunDecl(fd.expand(e)?)),
- }
- }
-}
-
-#[derive(Debug, Clone)]
-pub struct FunBody {
- pub body: Box<Ast<PreExpansion>>,
-}
-
-impl Parse for FunBody {
- fn parse(b: &mut Cursor<'_>) -> Result<Self> {
- b.spaces();
-
- if b.is_empty() {
- return Err(ParseError::Eof);
- }
-
- if b.peek() != b'{' {
- return Err(ParseError::Expected('{'));
- }
-
- b.adv();
- let body = Box::new(Ast::parse(b)?);
- if b.is_empty() {
- if b.is_completion() {
- Ok(Self { body })
- } else {
- Err(ParseError::Eof)
- }
- } else if b.peek() == b'}' {
- Ok(Self { body })
- } else {
- Err(ParseError::Expected('}'))
- }
- }
-}
-
-#[derive(Debug, Clone)]
-pub struct FunDecl<S: Stage> {
- pub name: S::Str,
- pub body: FunBody,
-}
-
-impl Parse for FunDecl<PreExpansion> {
- fn parse(b: &mut Cursor<'_>) -> Result<Self> {
- if !b.buf.starts_with(b"fun ") && !b.buf.starts_with(b"fun\t") {
- return Err(ParseError::NotAFunDecl);
- }
- b.advance(4);
- b.spaces();
- let name = ExpString::parse(b)?;
- let body = FunBody::parse(b)?;
- Ok(Self { name, body })
- }
-}
-
-impl FunDecl<PreExpansion> {
- fn expand<E: Expander>(self, e: &mut E) -> Res<FunDecl<PostExpansion>, E::Error> {
- Ok(FunDecl {
- name: self.name.expand(e)?,
- body: self.body,
- })
- }
-}
-
-#[derive(Debug, Clone)]
-pub struct VarAssign<S: Stage> {
- pub var: S::Str,
- pub val: S::Str,
-}
-
-impl Parse for VarAssign<PreExpansion> {
- fn parse(b: &mut Cursor<'_>) -> Result<Self> {
- if !b.buf.starts_with(b"set ") && !b.buf.starts_with(b"set\t") {
- return Err(ParseError::NotAVarAssign);
- }
- b.advance(4);
- b.spaces();
- let var = ExpString::parse(b)?;
- b.spaces();
-
- if b.is_empty() {
- return Err(ParseError::Eof);
- }
- let eq = b.adv();
- if eq != b'=' {
- return Err(ParseError::Expected('='));
- }
- let val = ExpString::parse(b)?;
-
- Ok(Self { var, val })
- }
-}
-
-impl VarAssign<PreExpansion> {
- fn expand<E: Expander>(self, e: &mut E) -> Res<VarAssign<PostExpansion>, E::Error> {
- Ok(VarAssign {
- var: self.var.expand(e)?,
- val: self.val.expand(e)?,
- })
- }
-}
-
-#[derive(Debug, Clone)]
-pub struct Pipes<T: Stage> {
- pub cmds: Vec<Command<T>>,
-}
-
-impl Pipes<PreExpansion> {
- fn expand<E: Expander>(self, e: &mut E) -> Res<Pipes<PostExpansion>, E::Error> {
- let mut cmds = Vec::with_capacity(self.cmds.len());
- for cmd in self.cmds.into_iter() {
- cmds.push(cmd.expand(e)?);
- }
- Ok(Pipes { cmds })
- }
-}
-
-#[derive(Debug, Clone)]
-pub enum StringPart {
- Boring(BString),
- Var(VarName),
- Cmd(Ast<PreExpansion>),
-}
-
-impl StringPart {
- pub fn is_boring(&self) -> bool {
- matches!(self, StringPart::Boring(..))
- }
- pub fn unwrap_boring(self) -> BString {
- match self {
- StringPart::Boring(items) => items,
- _ => panic!("unwrap on non-boring value"),
- }
- }
-}
-
-#[derive(Debug, Clone)]
-/// `"hi ${var} $(cmd) "` gets mapped to `[Boring("hi "), Var("var"), String(" "), Cmd(...), Boring(" ")]`
-pub struct ExpString {
- parts: Vec<StringPart>,
-}
-
-impl ExpString {
- fn expand<E: Expander>(self, e: &mut E) -> Res<BString, E::Error> {
- let mut out = BString::new();
- for part in self.parts.into_iter() {
- let mut x = match part {
- StringPart::Boring(items) => items,
- StringPart::Var(v) => e.expand_var(v.name)?,
- StringPart::Cmd(ast) => {
- let exp = ast.expand(e)?;
- e.expand_cmd(exp)?
- }
- };
- out.append(&mut x);
- }
- Ok(out)
- }
-}
-
-fn is_symbol(x: u8) -> bool {
- match x {
- b'|' | b'{' | b'}' | b'$' | b'(' | b')' | b'\'' | b'"' => true,
- _ => false,
- }
-}
-
-fn is_var_begin(x: u8) -> bool {
- x.is_ascii_alphabetic()
-}
-fn is_var_name(x: u8) -> bool {
- x.is_ascii_alphanumeric() || x == b'_'
-}
-
-#[derive(Debug, Clone)]
-pub struct VarName {
- name: BString,
-}
-
-impl Parse for VarName {
- fn parse(b: &mut Cursor<'_>) -> Result<Self> {
- if b.is_empty() {
- return Err(ParseError::Eof);
- }
-
- if !is_var_begin(b.peek()) {
- return Err(ParseError::ExpectedAlphabetic);
- }
-
- let mut name = BString::new();
- while b.has() {
- let x = b.peek();
- if is_var_name(x) {
- b.adv();
- name.push(x)
- } else {
- break;
- }
- }
-
- Ok(Self { name })
- }
-}
-
-impl Parse for ExpString {
- fn parse(b: &mut Cursor<'_>) -> Result<Self> {
- b.spaces();
- if b.is_empty() {
- return Err(ParseError::NotAString);
- }
-
- let mut delim = b.peek();
- if delim == b'\'' || delim == b'"' {
- b.adv();
- } else if is_symbol(delim) && delim != b'$' {
- return Err(ParseError::NotAString);
- } else {
- delim = b' ';
- }
-
- let mut parts = Vec::new();
- let p = &mut parts;
- let mut escaping = false;
-
- let add_char = |p: &mut Vec<StringPart>, x: u8| match p.last_mut() {
- Some(StringPart::Boring(v)) => v.push(x),
- _ => p.push(StringPart::Boring(vec![x])),
- };
-
- while b.has() {
- let x = b.peek();
-
- if escaping {
- add_char(p, x);
- escaping = false;
- b.adv();
- continue;
- }
-
- if x == delim || (b.peek_space() && delim == b' ') {
- if delim != b' ' {
- b.adv();
- }
- return Ok(Self { parts });
- }
-
- if delim == b' ' && is_symbol(x) && x != b'$' {
- return Ok(Self { parts });
- }
-
- b.adv();
-
- if delim == b'\'' {
- // no fancy stuff here
- add_char(p, x);
- continue;
- }
-
- if x == b'\\' {
- escaping = true;
- continue;
- }
-
- if x == b'$' {
- if !b.has() {
- add_char(p, x);
- continue;
- }
-
- let x = b.peek();
-
- if x == b'?' || x == b'!' {
- b.adv();
- p.push(StringPart::Var(VarName { name: vec![x] }))
- } else if is_var_begin(x) {
- let v = VarName::parse(b)?;
- p.push(StringPart::Var(v));
- } else if x == b'{' {
- b.adv();
- let v = VarName::parse(b)?;
-
- if !b.has() {
- return Err(ParseError::Eof);
- } else if b.peek() == b':' {
- todo!(": in var expansion")
- }
-
- if !b.has() {
- return Err(ParseError::Eof);
- } else if b.peek() != b'}' {
- return Err(ParseError::Incomplete);
- }
-
- b.adv();
- p.push(StringPart::Var(v));
- } else if x == b'(' {
- b.adv();
- let cmd = Ast::parse(b)?;
- b.spaces();
- if b.is_empty() {
- return Err(ParseError::Eof);
- } else if b.peek() == b')' {
- p.push(StringPart::Cmd(cmd));
- } else {
- return Err(ParseError::Expected(')'));
- }
- } else {
- // doesn't seem to be a variable or expansion, just add $ back into the string
- add_char(p, b'$');
- continue;
- }
-
- continue;
- }
-
- add_char(p, x);
- }
-
- if b.is_completion() || delim == b' ' {
- Ok(Self { parts })
- } else {
- Err(ParseError::Eof)
- }
- }
-}
-
-#[derive(Debug, Clone)]
-pub struct Command<T: Stage> {
- pub cmd: T::Str,
- pub args: Vec<T::Str>,
-}
-
-impl Command<PreExpansion> {
- fn expand<E: Expander>(self, e: &mut E) -> Res<Command<PostExpansion>, E::Error> {
- let cmd = self.cmd.expand(e)?;
- let mut args = Vec::with_capacity(self.args.len());
- for arg in self.args.into_iter() {
- args.push(arg.expand(e)?);
- }
- Ok(Command { cmd, args })
- }
-}
-
-#[allow(unused)]
-#[derive(Debug)]
-pub enum ParseError {
- /// "clean" EOF, i.e. not in the middle of something
- Eof,
-
- /// "unclean" EOF, i.e. EOF after beginning a quoted string
- Incomplete,
-
- ExpectedAlphabetic,
-
- Unknown(u8),
-
- Expected(char),
-
- NotAString,
-
- NotAFunDecl,
-
- NotAVarAssign,
-}
-
-type Result<T> = std::result::Result<T, ParseError>;
-
-pub fn do_parse(x: &[u8]) -> Res<Ast<PreExpansion>, (ParseError, &[u8])> {
- let mut c = Cursor::new(x, ParseMode::Command);
- match Ast::parse(&mut c) {
- Ok(ast) => Ok(ast),
- Err(e) => Err((e, c.buf)),
- }
-}
-
-pub enum CompletionKind {
- Command,
- Argument,
- None,
-}
-
-pub struct CompletionContext {
- pub kind: CompletionKind,
- pub partial: BString,
-}
-
-impl CompletionContext {
- pub fn none() -> Self {
- Self {
- kind: CompletionKind::None,
- partial: BString::new(),
- }
- }
-}
-
-fn expstr_cc(s: &ExpString, kind: CompletionKind) -> CompletionContext {
- if s.parts.len() > 1 || !s.parts[0].is_boring() {
- CompletionContext::none()
- } else {
- CompletionContext {
- kind,
- partial: s.parts[0].clone().unwrap_boring().clone(),
- }
- }
-}
-
-pub fn completion_context<'a>(x: &'a [u8]) -> CompletionContext {
- let mut cursor = Cursor::new(x, ParseMode::Completion);
- let ast = Ast::parse(&mut cursor);
- match ast {
- Ok(Ast::Pipes(pipes)) if cursor.spaced == false => {
- if let Some(cmd) = pipes.cmds.last() {
- if cmd.args.is_empty() {
- expstr_cc(&cmd.cmd, CompletionKind::Command)
- } else {
- expstr_cc(&cmd.args[cmd.args.len() - 1], CompletionKind::Argument)
- }
- } else {
- CompletionContext::none()
- }
- }
- _ => CompletionContext::none(),
- }
-}
-
-trait Parse: Sized {
- fn parse(b: &mut Cursor<'_>) -> Result<Self>;
-}
-
-enum ParseMode {
- Command,
- Completion,
-}
-
-struct Cursor<'a> {
- buf: &'a [u8],
- mode: ParseMode,
-
- /// if the last byte that was consumed was whitespace or part of a word
- spaced: bool,
-
- backtrace: bool,
-}
-
-impl<'a> Cursor<'a> {
- fn new(buf: &'a [u8], mode: ParseMode) -> Self {
- Self {
- buf,
- mode,
- spaced: false,
- backtrace: false,
- }
- }
-
- // non empty
- fn has(&self) -> bool {
- !self.buf.is_empty()
- }
-
- fn is_empty(&self) -> bool {
- self.buf.is_empty()
- }
-
- fn bt(&self, word: &str) {
- if self.backtrace {
- let bt = std::backtrace::Backtrace::capture();
- let bt = format!("{bt}");
- println!("{word} {}\r", self.buf[0] as char);
- for l in bt.lines().skip(4).take(2) {
- println!("{l}\r");
- }
- println!("\r");
- }
- }
-
- fn peek(&self) -> u8 {
- self.bt("peek");
- self.buf[0]
- }
-
- fn adv(&mut self) -> u8 {
- self.bt("adv");
- let out = self.buf[0];
- self.buf = &self.buf[1..];
- self.spaced = false;
- out
- }
-
- fn advance(&mut self, amt: usize) -> &[u8] {
- self.bt(&format!("adv({amt})"));
- let out = &self.buf[..amt];
- self.buf = &self.buf[amt..];
- self.spaced = false;
- out
- }
-
- fn peek_space(&self) -> bool {
- if self.buf.is_empty() {
- return false;
- }
- matches!(self.buf[0], b' ' | b'\t' | b'\n' | b'\r')
- }
-
- fn spaces(&mut self) {
- while self.peek_space() {
- self.adv();
- self.spaced = true;
- }
- }
-
- fn is_completion(&self) -> bool {
- match self.mode {
- ParseMode::Completion => true,
- _ => false,
- }
- }
-
- fn parse<T: Parse>(&mut self) -> Result<T> {
- T::parse(self)
- }
-}
-
-impl Parse for Ast<PreExpansion> {
- fn parse(b: &mut Cursor<'_>) -> Result<Self> {
- b.spaces();
-
- let orig_len = b.buf.len();
- let x = VarAssign::parse(b);
- if let Ok(va) = x {
- return Ok(Self::VarAssign(va));
- } else if b.buf.len() != orig_len {
- x?;
- }
-
- let orig_len = b.buf.len();
- let x = FunDecl::parse(b);
- if let Ok(fd) = x {
- return Ok(Self::FunDecl(fd));
- } else if b.buf.len() != orig_len {
- x?;
- }
-
- Ok(Self::Pipes(b.parse()?))
- }
-}
-
-impl Parse for Command<PreExpansion> {
- fn parse(b: &mut Cursor<'_>) -> Result<Self> {
- let path: ExpString = b.parse()?;
- let mut args = Vec::new();
- loop {
- match ExpString::parse(b) {
- Ok(arg) => args.push(arg),
- Err(ParseError::NotAString) => break,
- Err(e) => Err(e)?,
- }
- }
- let x = Ok(Self { cmd: path, args });
- x
- }
-}
-
-impl Parse for Pipes<PreExpansion> {
- fn parse(b: &mut Cursor<'_>) -> Result<Self> {
- let mut cmds: Vec<Command<PreExpansion>> = vec![b.parse()?];
-
- loop {
- b.spaces();
- if b.is_empty() {
- return Ok(Pipes { cmds });
- }
-
- let c = b.peek();
- if c == b'|' {
- b.adv();
- cmds.push(b.parse()?);
- } else if is_symbol(c) {
- return Ok(Pipes { cmds });
- } else {
- Err(ParseError::Unknown(c))?;
- }
- }
- }
-}