use super::{Cursor, OtherHighlights, Parse, ParseError, Result}; use crate::regex::{GreedyBehavior, Pattern, LookDirection, LookPolarity}; const SYMBOLS: &[u8] = b"{}[]()*+-?|.\\ "; fn is_symbol(x: u8) -> bool { SYMBOLS.contains(&x) } impl Parse for Pattern { fn parse(b: &mut Cursor<'_>) -> super::Result { let begin = b.loc(); let result = parse0(b); if result.is_ok() { b.highlight_from(begin, OtherHighlights::Regex); } result } } fn parse0(s: &mut Cursor<'_>) -> Result { parse_alt(s) } fn parse_alt(s: &mut Cursor<'_>) -> Result { let mut seqs = vec![]; loop { let seq = parse_seq(s)?; seqs.push(seq); let begin = s.loc(); if s.has() && s.peek() == b'|' { s.adv(); s.highlight_from(begin, OtherHighlights::RegexSymbol); } else { break; } } Ok(match seqs.len() { 0 => Pattern::Nothing, 1 => seqs.into_iter().next().unwrap(), _ => Pattern::Alt(seqs), }) } fn parse_seq(s: &mut Cursor<'_>) -> Result { let mut reps = vec![]; loop { let rep = parse_rep(s)?; if rep != Pattern::Nothing { reps.push(rep); } else { break; } } Ok(match reps.len() { 0 => Pattern::Nothing, 1 => reps.into_iter().next().unwrap(), _ => Pattern::Concat(reps), }) } fn parse_rep(s: &mut Cursor<'_>) -> Result { let atom = parse_atom(s)?; if atom == Pattern::Nothing { return Ok(atom); } if !s.has() { return Ok(atom); } let begin = s.loc(); let rep = match s.peek() { b'*' => Some((0, None)), b'+' => Some((1, None)), b'?' => Some((0, Some(1))), _ => None, }; if let Some((min_rep, max_rep)) = rep { s.adv(); s.highlight_from(begin, OtherHighlights::RegexSymbol); let greed = if s.has() && s.peek() == b'?' { s.adv(); GreedyBehavior::NonGreedy } else { GreedyBehavior::Greedy }; Ok(Pattern::Rep(Box::new(atom), min_rep, max_rep, greed)) } else { Ok(atom) } } fn parse_atom(s: &mut super::Cursor<'_>) -> Result { if !s.has() { return Ok(Pattern::Nothing); } let begin = s.loc(); match s.peek() { b'[' => { s.adv(); s.highlight_from(begin, OtherHighlights::RegexSymbol); let mut ranges = Vec::new(); loop { if !s.has() { return Err(ParseError::Eof); } let begin = s.loc(); let tok = s.adv(); if tok == b']' { if ranges.is_empty() { todo!("error handling for empty alternative list"); } s.highlight_from(begin, OtherHighlights::RegexSymbol); return Ok(Pattern::Alt(ranges)); } let begin = s.loc(); if s.has() && s.peek() == b'-' { s.adv(); s.highlight_from(begin, OtherHighlights::RegexSymbol); if !s.has() { return Err(ParseError::Eof); } let tok2 = s.peek(); if tok2 == b']' { ranges.push(Pattern::Byte(tok)); ranges.push(Pattern::Byte(b'-')); } else if is_symbol(tok2) { return Err(ParseError::Unknown(tok2)); } else { s.adv(); ranges.push(Pattern::Range(tok, tok2)); } } else { ranges.push(Pattern::Byte(tok)); } } } b'(' => { s.adv(); let mut assertion = None; if s.buf.starts_with(b"?=") { s.advance(2); assertion = Some((LookDirection::Ahead, LookPolarity::Positive)); } else if s.buf.starts_with(b"?!") { s.advance(2); assertion = Some((LookDirection::Ahead, LookPolarity::Negative)); } else if s.buf.starts_with(b"?<=") { s.advance(3); assertion = Some((LookDirection::Behind, LookPolarity::Positive)); } else if s.buf.starts_with(b"? { s.adv(); s.highlight_from(begin, OtherHighlights::RegexSymbol); Ok(Pattern::Range(0, 127)) } b'\\' => { s.adv(); if s.has() { let escaped = s.adv(); s.highlight_from(begin, OtherHighlights::RegexSymbol); if is_symbol(escaped) { Ok(Pattern::Byte(escaped)) } else { // TODO interpret \w and others Err(ParseError::Unknown(escaped)) } } else { Err(ParseError::Eof) } } x if is_symbol(x) => Ok(Pattern::Nothing), ch => { s.adv(); Ok(Pattern::Byte(ch)) } } }