From a9bf864158e67353f57047cdc4d6b0e325d73eae Mon Sep 17 00:00:00 2001 From: Jonas Maier <> Date: Tue, 21 Apr 2026 19:59:02 +0200 Subject: maybe soon better escape code parsing --- Cargo.lock | 30 ++++++ Cargo.toml | 1 + src/ansi.rs | 137 ------------------------- src/ansi/mod.rs | 293 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 1 + src/run/builtin.rs | 37 +++++++ src/run/mod.rs | 1 + 7 files changed, 363 insertions(+), 137 deletions(-) delete mode 100644 src/ansi.rs create mode 100644 src/ansi/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 9ffec25..ce62280 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -62,6 +62,7 @@ dependencies = [ "nix", "pish_derive", "sqlite", + "terminfo-lean", "termios", ] @@ -143,6 +144,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "terminfo-lean" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be1edf3004e37d1801b97add9c9923547c850b12431d4290e242915629109211" +dependencies = [ + "thiserror", +] + [[package]] name = "termios" version = "0.3.3" @@ -152,6 +162,26 @@ dependencies = [ "libc", ] +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "unicode-ident" version = "1.0.24" diff --git a/Cargo.toml b/Cargo.toml index d95a2ed..5931d53 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,3 +11,4 @@ sqlite = "0.37.0" termios = "0.3" pish_derive = { path = "./pish_derive" } nix = { version = "0.31.2", features = ["poll", "signal"] } +terminfo-lean = "0.1.2" diff --git a/src/ansi.rs b/src/ansi.rs deleted file mode 100644 index 522c752..0000000 --- a/src/ansi.rs +++ /dev/null @@ -1,137 +0,0 @@ -use std::io::Read; - -use crate::cursor::Direction; - -pub enum KeyboardInput { - Eof, - Key(u8), - CtrlA, - CtrlB, - CtrlC, - CtrlE, - CtrlD, - CtrlL, - CtrlR, - Arrow(Direction), - CtrlArrow(Direction), - DeleteLeft, - DeleteRight, - CtrlDeleteRight, - Home, - End, -} - -fn read1() -> Option { - let mut buf = [0]; - match std::io::stdin().lock().read_exact(&mut buf) { - Ok(_) => Some(buf[0]), - Err(_) => None, - } -} - -fn byte_to_dir(b: u8) -> Option { - use Direction::*; - match b { - b'A' => Some(Up), - b'B' => Some(Down), - b'C' => Some(Right), - b'D' => Some(Left), - _ => None, - } -} - -fn read_escape(debug: bool) -> KeyboardInput { - use Direction::*; - use KeyboardInput::*; - - let mut seq = vec![match read1() { - Some(x) => x, - None => return Eof, - }]; - - if seq[0] == b'[' { - // still more - while { - let last = seq[seq.len() - 1]; - !(0x40..=0x7E).contains(&last) || seq.len() == 1 - } { - seq.push(match read1() { - Some(x) => x, - None => return Eof, - }); - } - - if debug { - println!("escape: {}", seq.escape_ascii()); - } - - match seq[1] { - b'3' => { - if seq.len() > 2 && seq[2] == b'~' { - DeleteRight - } else { - todo!("unhandled: {}", seq.escape_ascii()); - } - } - b'H' => Home, - b'F' => End, - b'd' => CtrlDeleteRight, - - // Ctrl Arrow - b'1' => { - if seq[1..].starts_with(b"1;5") { - if seq.len() == 4 { - todo!("idk what this is."); - } - match seq[4] { - b'A' => CtrlArrow(Up), - b'B' => CtrlArrow(Down), - b'C' => CtrlArrow(Right), - b'D' => CtrlArrow(Left), - _ => todo!("unhandled {}", seq.escape_ascii()), - } - } else { - todo!("unhandled {}", seq[1..].escape_ascii()) - } - } - - x => { - if let Some(dir) = byte_to_dir(x) { - Arrow(dir) - } else { - todo!("escape characters {}", seq[1..].escape_ascii()) - } - } - } - } else { - if debug { - println!("escape: {}", seq.escape_ascii()); - } - match seq[0] { - b'd' => CtrlDeleteRight, - x => todo!("unhandled escape code: ESC {x}"), - } - } -} - -pub fn read(debug: bool) -> KeyboardInput { - use KeyboardInput::*; - - let Some(x) = read1() else { - return KeyboardInput::Eof; - }; - - match x { - 1 => CtrlA, - 2 => CtrlB, - 3 => CtrlC, - 4 => CtrlD, - 8 | 127 => DeleteLeft, - 12 => CtrlL, - 18 => CtrlR, - 27 => read_escape(debug), - b'\t' | b'\r' => Key(x), - x if !x.is_ascii_control() => Key(x), - x => todo!("unimplemented control code: {x}"), - } -} diff --git a/src/ansi/mod.rs b/src/ansi/mod.rs new file mode 100644 index 0000000..4fc550b --- /dev/null +++ b/src/ansi/mod.rs @@ -0,0 +1,293 @@ +use std::{collections::BTreeMap, io::Read, os::unix::ffi::OsStrExt, sync::RwLock}; + +use crate::cursor::Direction; + +pub enum KeyboardInput { + Eof, + Key(u8), + CtrlA, + CtrlB, + CtrlC, + CtrlE, + CtrlD, + CtrlL, + CtrlR, + Arrow(Direction), + CtrlArrow(Direction), + DeleteLeft, + DeleteRight, + CtrlDeleteRight, + Home, + End, +} + +fn read1() -> Option { + let mut buf = [0]; + match std::io::stdin().lock().read_exact(&mut buf) { + Ok(_) => Some(buf[0]), + Err(_) => None, + } +} + +fn byte_to_dir(b: u8) -> Option { + use Direction::*; + match b { + b'A' => Some(Up), + b'B' => Some(Down), + b'C' => Some(Right), + b'D' => Some(Left), + _ => None, + } +} + +fn read_escape(debug: bool) -> KeyboardInput { + use Direction::*; + use KeyboardInput::*; + + let mut seq = vec![match read1() { + Some(x) => x, + None => return Eof, + }]; + + if seq[0] == b'[' { + // still more + while { + let last = seq[seq.len() - 1]; + !(0x40..=0x7E).contains(&last) || seq.len() == 1 + } { + seq.push(match read1() { + Some(x) => x, + None => return Eof, + }); + } + + if debug { + println!("escape: {}", seq.escape_ascii()); + } + + match seq[1] { + b'3' => { + if seq.len() > 2 && seq[2] == b'~' { + DeleteRight + } else { + todo!("unhandled: {}", seq.escape_ascii()); + } + } + b'H' => Home, + b'F' => End, + b'd' => CtrlDeleteRight, + + // Ctrl Arrow + b'1' => { + if seq[1..].starts_with(b"1;5") { + if seq.len() == 4 { + todo!("idk what this is."); + } + match seq[4] { + b'A' => CtrlArrow(Up), + b'B' => CtrlArrow(Down), + b'C' => CtrlArrow(Right), + b'D' => CtrlArrow(Left), + _ => todo!("unhandled {}", seq.escape_ascii()), + } + } else { + todo!("unhandled {}", seq[1..].escape_ascii()) + } + } + + x => { + if let Some(dir) = byte_to_dir(x) { + Arrow(dir) + } else { + todo!("escape characters {}", seq[1..].escape_ascii()) + } + } + } + } else { + if debug { + println!("escape: {}", seq.escape_ascii()); + } + match seq[0] { + b'd' => CtrlDeleteRight, + x => todo!("unhandled escape code: ESC {x}"), + } + } +} + +pub fn read(debug: bool) -> KeyboardInput { + use KeyboardInput::*; + + let Some(x) = read1() else { + return KeyboardInput::Eof; + }; + + match x { + 1 => CtrlA, + 2 => CtrlB, + 3 => CtrlC, + 4 => CtrlD, + 5 => CtrlE, + 8 | 127 => DeleteLeft, + 12 => CtrlL, + 18 => CtrlR, + 27 => read_escape(debug), + b'\t' | b'\r' => Key(x), + x if !x.is_ascii_control() => Key(x), + x => todo!("unimplemented control code: {x}"), + } +} + +struct EscapingStdinReader<'a> { + buf: Vec, + trie: &'a EscapeTrie, +} + +enum ByteProcessingResult<'a> { + Done(KbInput<'a>), + Continue(EscapingStdinReader<'a>), +} + +impl<'a> EscapingStdinReader<'a> { + pub fn new(trie: &'a EscapeTrie) -> Self { + Self { + buf: Vec::new(), + trie, + } + } + + pub fn process_byte(mut self, byte: u8) -> ByteProcessingResult<'a> { + match self.trie { + EscapeTrie::Done(_) => ByteProcessingResult::Done(KbInput::Key([byte])), + EscapeTrie::More(trie) => { + self.buf.push(byte); + match trie.get(&byte) { + Some(EscapeTrie::Done(keys)) => { + ByteProcessingResult::Done(KbInput::Escape(Escape { + keys: &keys[..], + value: self.buf, + })) + } + Some(trie) => { + self.trie = trie; + ByteProcessingResult::Continue(self) + } + None => ByteProcessingResult::Done(KbInput::InvalidEscape(self.buf)), + } + } + } + } +} + +enum EscapeTrie { + Done(Vec<&'static str>), + More(BTreeMap), +} + +enum KbInput<'a> { + Key([u8; 1]), + Escape(Escape<'a>), + InvalidEscape(Vec), +} + +impl<'a> KbInput<'a> { + pub fn as_bytes(&'a self) -> &'a [u8] { + match self { + KbInput::Key(x) => &x[..], + KbInput::Escape(e) => &e.value[..], + KbInput::InvalidEscape(e) => &e[..], + } + } +} + +struct Escape<'a> { + keys: &'a [&'a str], + value: Vec, +} + +use terminfo_lean::parse::Terminfo; + +static TERMINFO: RwLock>> = RwLock::new(None); + +fn parse_terminfo() -> Result, ()> { + let term = std::env::var_os("TERM").unwrap_or_else(|| "xterm".into()); + let terminfo_file_path = terminfo_lean::locate::locate(&term) + .map_err(|e| println!("failed to locate terminfo file for terminal {term:?}: {e:?}",))?; + let mut terminfo_file = std::fs::File::open(&terminfo_file_path).map_err(|e| { + println!("failed to open terminfo file at location {terminfo_file_path:?}: {e:?}") + })?; + let mut buf = Vec::new(); + terminfo_file.read_to_end(&mut buf).map_err(|e| { + println!("failed to read terminfo file at location {terminfo_file_path:?}: {e:?}") + })?; + buf.shrink_to_fit(); + let terminfo = terminfo_lean::parse::parse(buf.leak()).map_err(|e| { + println!("failed to parse terminfo file at location {terminfo_file_path:?}: {e:?}") + })?; + Ok(terminfo) +} + +fn parse_terminfo_backup() -> Terminfo<'static> { + todo!("panic-safe backup terminfo") +} + +pub fn setup() { + let ti = parse_terminfo().unwrap_or_else(|_| { + println!("using backup terminfo (might not be correct for this terminal)"); + parse_terminfo_backup() + }); + let ti = Box::leak(Box::new(ti)); + TERMINFO.clear_poison(); + *TERMINFO.write().unwrap() = Some(ti); +} + +pub fn ti() -> &'static Terminfo<'static> { + TERMINFO.read().unwrap().unwrap() +} + +fn is_parametrized(x: &[u8]) -> bool { + let mut pct = false; + for &b in x { + if b == b'%' { + pct = !pct; + } else if pct { + return true; + } + } + false +} + +fn trie_from_words(words: Vec<(&'static str, &[u8])>) -> EscapeTrie { + let mut tree = BTreeMap::new(); + let mut all_empty = true; + + for (key, val) in words.iter() { + if let Some(byte) = val.get(0) { + all_empty = false; + tree.entry(byte) + .or_insert_with(Vec::new) + .push((*key, &val[1..])); + } + } + + if all_empty { + EscapeTrie::Done(words.into_iter().map(|x| x.0).collect()) + } else { + let trie = tree + .into_iter() + .map(|(k, v)| (*k, trie_from_words(v))) + .collect(); + EscapeTrie::More(trie) + } +} + +impl From<&'static Terminfo<'static>> for EscapeTrie { + fn from(ti: &'static Terminfo<'static>) -> Self { + let w: Vec<(&'static str, &'static [u8])> = ti + .strings + .iter() + .filter(|(_, v)| !is_parametrized(v)) + .map(|(k, v)| (*k, *v)) + .collect(); + trie_from_words(w) + } +} diff --git a/src/main.rs b/src/main.rs index 742ed9a..d388ce1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -308,6 +308,7 @@ fn exec_rc_file(se: Arc>) { fn event_loop() { history::setup(); + ansi::setup(); let stdin = io::stdin(); diff --git a/src/run/builtin.rs b/src/run/builtin.rs index c080c93..f5dff81 100644 --- a/src/run/builtin.rs +++ b/src/run/builtin.rs @@ -564,3 +564,40 @@ impl Builtin for debug { Ok(()) } } + +pub struct terminfo; +impl Builtin for terminfo { + fn name(&self) -> &str { + "terminfo" + } + + fn io( + &self, + _session: Arc>, + _args: &[BString], + _stdin: &mut dyn Read, + f: &mut dyn Write, + ) -> Result { + let ti = crate::ansi::ti(); + + writeln!(f, "# Booleans")?; + for k in ti.booleans.iter() { + writeln!(f, "{k}")?; + } + writeln!(f)?; + + writeln!(f, "# Numbers")?; + for (k, v) in ti.numbers.iter() { + writeln!(f, "{k} {v}")?; + } + writeln!(f)?; + + writeln!(f, "# Strings")?; + for (k, v) in ti.strings.iter() { + writeln!(f, "{k} {}", v.escape_ascii())?; + } + writeln!(f)?; + + Ok(()) + } +} diff --git a/src/run/mod.rs b/src/run/mod.rs index 8a728c5..c866c6e 100644 --- a/src/run/mod.rs +++ b/src/run/mod.rs @@ -549,6 +549,7 @@ const BUILTINS: &[&'static dyn Builtin] = &[ &builtin::unalias, #[cfg(debug_assertions)] &builtin::debug, + &builtin::terminfo, ]; pub fn builtin_map() -> HashMap { -- cgit v1.2.3