aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock543
-rw-r--r--Cargo.toml7
-rw-r--r--benches/regex.rs22
-rw-r--r--src/parse/regex/bc.rs82
4 files changed, 630 insertions, 24 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 41c2b86..ade7fae 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3,12 +3,51 @@
version = 4
[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "anes"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
+
+[[package]]
+name = "anstyle"
+version = "1.0.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
+
+[[package]]
+name = "autocfg"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53"
+
+[[package]]
name = "bitflags"
version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
[[package]]
+name = "bumpalo"
+version = "3.20.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649"
+
+[[package]]
+name = "cast"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
+
+[[package]]
name = "cc"
version = "1.2.56"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -31,18 +70,228 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
[[package]]
+name = "ciborium"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e"
+dependencies = [
+ "ciborium-io",
+ "ciborium-ll",
+ "serde",
+]
+
+[[package]]
+name = "ciborium-io"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757"
+
+[[package]]
+name = "ciborium-ll"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9"
+dependencies = [
+ "ciborium-io",
+ "half",
+]
+
+[[package]]
+name = "clap"
+version = "4.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51"
+dependencies = [
+ "clap_builder",
+]
+
+[[package]]
+name = "clap_builder"
+version = "4.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
+dependencies = [
+ "anstyle",
+ "clap_lex",
+]
+
+[[package]]
+name = "clap_lex"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
+
+[[package]]
+name = "criterion"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
+dependencies = [
+ "anes",
+ "cast",
+ "ciborium",
+ "clap",
+ "criterion-plot",
+ "is-terminal",
+ "itertools",
+ "num-traits",
+ "once_cell",
+ "oorandom",
+ "plotters",
+ "rayon",
+ "regex",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "tinytemplate",
+ "walkdir",
+]
+
+[[package]]
+name = "criterion-plot"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
+dependencies = [
+ "cast",
+ "itertools",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "crunchy"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
+
+[[package]]
+name = "either"
+version = "1.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
+
+[[package]]
name = "find-msvc-tools"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
[[package]]
+name = "futures-core"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
+
+[[package]]
+name = "futures-task"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
+
+[[package]]
+name = "futures-util"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
+dependencies = [
+ "futures-core",
+ "futures-task",
+ "pin-project-lite",
+ "slab",
+]
+
+[[package]]
+name = "half"
+version = "2.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+ "zerocopy",
+]
+
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
+[[package]]
+name = "is-terminal"
+version = "0.4.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "windows-sys",
+]
+
+[[package]]
+name = "itertools"
+version = "0.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
+
+[[package]]
+name = "js-sys"
+version = "0.3.99"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "142bc4740e452c1e57ade0cbc129f139c9093e354346f0872ef985f4f5cf5f11"
+dependencies = [
+ "cfg-if",
+ "futures-util",
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
name = "libc"
version = "0.2.182"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
[[package]]
+name = "memchr"
+version = "2.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8"
+
+[[package]]
name = "nix"
version = "0.31.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -55,9 +304,37 @@ dependencies = [
]
[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
+
+[[package]]
+name = "oorandom"
+version = "11.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
+
+[[package]]
name = "pish"
version = "0.7.0"
dependencies = [
+ "criterion",
"libc",
"nix",
"pish_derive",
@@ -82,6 +359,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
[[package]]
+name = "plotters"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747"
+dependencies = [
+ "num-traits",
+ "plotters-backend",
+ "plotters-svg",
+ "wasm-bindgen",
+ "web-sys",
+]
+
+[[package]]
+name = "plotters-backend"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a"
+
+[[package]]
+name = "plotters-svg"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670"
+dependencies = [
+ "plotters-backend",
+]
+
+[[package]]
name = "proc-macro2"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -100,12 +405,125 @@ dependencies = [
]
[[package]]
+name = "rayon"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "regex"
+version = "1.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.150"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
+name = "slab"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+
+[[package]]
name = "sqlite"
version = "0.37.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -183,7 +601,132 @@ dependencies = [
]
[[package]]
+name = "tinytemplate"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
+[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.122"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ed04576f974d2b2fba0f38c51dbc5518011e38c36bf1143164be765528fd409"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.122"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "916151b09da36bd82f6615cbf3a419e2f0ba23a03c6160e8e92eb6bd4aa1dec6"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.122"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "299047362ccbfce148b67ab7e73349f77748e00c8296f9542adfad2ad82c5c5e"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.122"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a929b2c61f11ba3e9bc35b50c1f25cb38e0e892c0c231ae2b8cf78d5dad4437"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "web-sys"
+version = "0.3.99"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d621441cfc37b84979402712047321980c178f299193a3589d05b99e8763436"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "winapi-util"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.50"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b065d4f0e55f82fae73202e189638116a87c55ab6b8e6c2721e13dd9d854ad1"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.50"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b631b19d36a892ab55420c92dbc83ccd79274f25be714855d3074aa71cab639"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
diff --git a/Cargo.toml b/Cargo.toml
index d93b041..ea68cef 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,3 +12,10 @@ termios = "0.3"
pish_derive = { path = "./pish_derive" }
nix = { version = "0.31.2", features = ["poll", "signal"] }
terminfo-lean = "0.1.2"
+
+[dev-dependencies]
+criterion = "0.5"
+
+[[bench]]
+name = "regex"
+harness = false
diff --git a/benches/regex.rs b/benches/regex.rs
new file mode 100644
index 0000000..7f20606
--- /dev/null
+++ b/benches/regex.rs
@@ -0,0 +1,22 @@
+use criterion::{Criterion, criterion_group, criterion_main};
+use pish::parse::{
+ Parse,
+ regex::{Pattern, bc::BytecodeCompiledRegex},
+};
+
+/// https://mattmahoney.net/dc/enwik8.zip
+pub fn regex_throughput(c: &mut Criterion) {
+ let mut content = std::fs::read("enwik8").unwrap();
+ content.truncate(1_000_000);
+ let pat = Pattern::parse_from_bytes(b".*GNU.plus.Linux.*").unwrap();
+ let compiled = BytecodeCompiledRegex::try_from(pat).unwrap();
+
+ c.bench_function("enwik8 match-all", |b| {
+ b.iter(|| {
+ let _ = compiled.re_match(&content);
+ })
+ });
+}
+
+criterion_group!(benches, regex_throughput);
+criterion_main!(benches);
diff --git a/src/parse/regex/bc.rs b/src/parse/regex/bc.rs
index 72ba21b..cb41607 100644
--- a/src/parse/regex/bc.rs
+++ b/src/parse/regex/bc.rs
@@ -115,41 +115,45 @@ struct Thread<F: Flavor> {
struct VM<'p, F: Flavor> {
instr: &'p [Instr<F>],
- threads: Vec<Thread<F>>,
+ passive_threads: VecDeque<Thread<F>>,
+ active_threads: VecDeque<Thread<F>>,
hot: BitSet,
+ warm: BitSet,
}
impl<'p, F: Flavor> VM<'p, F> {
fn new(instr: &'p [Instr<F>], starting_thread: Thread<F>) -> Self {
Self {
instr,
- threads: vec![starting_thread],
+ passive_threads: vec![starting_thread].into(),
+ active_threads: VecDeque::new(),
hot: BitSet::new(instr.len()),
+ warm: BitSet::new(instr.len()),
}
}
fn step_epsilon<'a>(&mut self, sd: &mut F::StepData<'a, 'p>) {
- let mut threads: VecDeque<_> = self.threads.drain(..).collect();
+ std::mem::swap(&mut self.active_threads, &mut self.passive_threads);
self.hot.set_all(false);
+ self.warm.set_all(false);
- let mut warm = self.hot.clone();
macro_rules! add_thread {
($t:expr) => {{
let t = $t;
let bit = t.pc as usize;
- if !warm.get(bit) {
- warm.set(bit, true);
- threads.push_front(t);
+ if !self.warm.get(bit) {
+ self.warm.set(bit, true);
+ self.active_threads.push_front(t);
}
}};
}
- while let Some(mut thread) = threads.pop_front() {
+ while let Some(mut thread) = self.active_threads.pop_front() {
match self.instr[thread.pc as usize] {
Instr::Class(_) | Instr::Consume(_) => {
if !self.hot.get(thread.pc as usize) {
self.hot.set(thread.pc as usize, true);
- self.threads.push(thread);
+ self.passive_threads.push_back(thread);
}
}
Instr::Jump(j) => {
@@ -178,7 +182,7 @@ impl<'p, F: Flavor> VM<'p, F> {
fn step_consume(&mut self, byte: u8) {
self.hot.set_all(false);
- self.threads
+ self.passive_threads
.retain_mut(|thread| match self.instr[thread.pc as usize] {
Instr::Class(class) => {
if class.matches(byte) {
@@ -254,12 +258,16 @@ struct VirtualMachine<'a> {
}
impl<'a> VirtualMachine<'a> {
- fn step_epsilon(&mut self, loc: usize) {
- self.vm0.step_epsilon(&mut ());
+ fn step_epsilon_1(&mut self, loc: usize) {
self.vm1
.step_epsilon(&mut (loc, &self.vm0.hot, &mut self.vm2));
}
+ fn step_epsilon(&mut self, loc: usize) {
+ self.vm0.step_epsilon(&mut ());
+ self.step_epsilon_1(loc);
+ }
+
fn step_consume(&mut self, byte: u8) {
self.vm0.step_consume(byte);
self.vm1.step_consume(byte);
@@ -272,7 +280,7 @@ impl<'a> VirtualMachine<'a> {
fn extract_match(&self) -> Option<Match> {
self.vm1
- .threads
+ .passive_threads
.iter()
.filter(|t| self.accepting.get(t.pc as usize))
.map(|t| {
@@ -302,6 +310,7 @@ pub struct BytecodeCompiledRegex {
instrs0: Box<[Instr<AssertionFlavor>]>,
instrs1: Box<[Instr<MainFlavor>]>,
instrs2: Box<[Instr<AssertionFlavor>]>,
+ no_lookbehind: bool,
submatch_count: usize,
accepting: BitSet,
}
@@ -333,10 +342,18 @@ impl BytecodeCompiledRegex {
vm2,
accepting: &self.accepting,
};
- for (i, ch) in data.iter().cloned().enumerate() {
- vm.step(ch, i);
+ if self.no_lookbehind {
+ for (i, ch) in data.iter().cloned().enumerate() {
+ vm.step_epsilon_1(i);
+ vm.vm1.step_consume(ch);
+ }
+ vm.step_epsilon(data.len());
+ } else {
+ for (i, ch) in data.iter().cloned().enumerate() {
+ vm.step(ch, i);
+ }
+ vm.step_epsilon(data.len());
}
- vm.step_epsilon(data.len());
vm.extract_match()
}
@@ -602,6 +619,7 @@ impl TryFrom<Pattern> for BytecodeCompiledRegex {
accepting.set(final_state, true);
Ok(Self {
+ no_lookbehind: neg.map.is_empty(),
instrs0: neg.instrs.into(),
instrs1: instrs.into(),
instrs2: pos.instrs.into(),
@@ -640,7 +658,9 @@ mod tests {
0..1
);
assert_eq!(
- re.re_match(b"abbbbb").unwrap().submatches[0].clone().unwrap(),
+ re.re_match(b"abbbbb").unwrap().submatches[0]
+ .clone()
+ .unwrap(),
0..1
);
}
@@ -653,7 +673,9 @@ mod tests {
0..3
);
assert_eq!(
- re.re_match(b"abbbbb").unwrap().submatches[0].clone().unwrap(),
+ re.re_match(b"abbbbb").unwrap().submatches[0]
+ .clone()
+ .unwrap(),
0..5
);
}
@@ -662,11 +684,15 @@ mod tests {
fn nongreedy_plus() {
let re = regex("(ab+?)bb*");
assert_eq!(
- re.re_match(b"abbbb").unwrap().submatches[0].clone().unwrap(),
+ re.re_match(b"abbbb").unwrap().submatches[0]
+ .clone()
+ .unwrap(),
0..2
);
assert_eq!(
- re.re_match(b"abbbbb").unwrap().submatches[0].clone().unwrap(),
+ re.re_match(b"abbbbb").unwrap().submatches[0]
+ .clone()
+ .unwrap(),
0..2
);
}
@@ -679,7 +705,9 @@ mod tests {
0..3
);
assert_eq!(
- re.re_match(b"abbbbb").unwrap().submatches[0].clone().unwrap(),
+ re.re_match(b"abbbbb").unwrap().submatches[0]
+ .clone()
+ .unwrap(),
0..5
);
}
@@ -688,11 +716,15 @@ mod tests {
fn nongreedy_qm() {
let re = regex("(ab??)bb*");
assert_eq!(
- re.re_match(b"abbbb").unwrap().submatches[0].clone().unwrap(),
+ re.re_match(b"abbbb").unwrap().submatches[0]
+ .clone()
+ .unwrap(),
0..1
);
assert_eq!(
- re.re_match(b"abbbbb").unwrap().submatches[0].clone().unwrap(),
+ re.re_match(b"abbbbb").unwrap().submatches[0]
+ .clone()
+ .unwrap(),
0..1
);
}
@@ -705,7 +737,9 @@ mod tests {
0..2
);
assert_eq!(
- re.re_match(b"abbbbb").unwrap().submatches[0].clone().unwrap(),
+ re.re_match(b"abbbbb").unwrap().submatches[0]
+ .clone()
+ .unwrap(),
0..2
);
}