diff --git a/.gitignore b/.gitignore
index 93466720..89efbf42 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,4 @@ dist/
htmlcov/
.tox/
docs/_build/
+/src/rust/target/
diff --git a/MANIFEST.in b/MANIFEST.in
index 7dfa3f60..35d1a31f 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -6,4 +6,6 @@ prune docs/_build
graft tests
include src/markupsafe/py.typed
include src/markupsafe/*.pyi
+graft src/rust
+prune src/rust/target
global-exclude *.pyc
diff --git a/bench.py b/bench.py
index 59617aa8..88c66fa2 100644
--- a/bench.py
+++ b/bench.py
@@ -8,7 +8,7 @@
("long plain", '"Hello, World!" * 1000'),
("long suffix", '"Hello, World!" + "x" * 100_000'),
):
- for mod in "native", "speedups":
+ for mod in "native", "rust_speedups":
subprocess.run(
[
sys.executable,
diff --git a/pyproject.toml b/pyproject.toml
index 3afbafbf..d645709d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ Source = "https://github.com/pallets/markupsafe/"
Chat = "https://discord.gg/pallets"
[build-system]
-requires = ["setuptools"]
+requires = ["setuptools", "setuptools-rust"]
build-backend = "setuptools.build_meta"
[tool.pytest.ini_options]
diff --git a/setup.py b/setup.py
index d19a4faa..b5eef907 100644
--- a/setup.py
+++ b/setup.py
@@ -1,84 +1,30 @@
import os
import platform
-import sys
from setuptools import Extension
from setuptools import setup
-from setuptools.command.build_ext import build_ext
-from setuptools.errors import CCompilerError
-from setuptools.errors import ExecError
-from setuptools.errors import PlatformError
+from setuptools_rust import RustExtension
-ext_modules = [Extension("markupsafe._speedups", ["src/markupsafe/_speedups.c"])]
-
-
-class BuildFailed(Exception):
- pass
-
-
-class ve_build_ext(build_ext):
- """This class allows C extension building to fail."""
-
- def run(self):
- try:
- super().run()
- except PlatformError as e:
- raise BuildFailed() from e
-
- def build_extension(self, ext):
- try:
- super().build_extension(ext)
- except (CCompilerError, ExecError, PlatformError) as e:
- raise BuildFailed() from e
- except ValueError as e:
- # this can happen on Windows 64 bit, see Python issue 7511
- if "'path'" in str(sys.exc_info()[1]): # works with Python 2 and 3
- raise BuildFailed() from e
- raise
-
-
-def run_setup(with_binary):
- setup(
- cmdclass={"build_ext": ve_build_ext},
- ext_modules=ext_modules if with_binary else [],
- )
-
-
-def show_message(*lines):
- print("=" * 74)
- for line in lines:
- print(line)
- print("=" * 74)
-
-
-supports_speedups = platform.python_implementation() not in {
+if platform.python_implementation() not in {
"PyPy",
"Jython",
"GraalVM",
-}
-
-if os.environ.get("CIBUILDWHEEL", "0") == "1" and supports_speedups:
- run_setup(True)
-elif supports_speedups:
- try:
- run_setup(True)
- except BuildFailed:
- show_message(
- "WARNING: The C extension could not be compiled, speedups"
- " are not enabled.",
- "Failure information, if any, is above.",
- "Retrying the build without the C extension now.",
- )
- run_setup(False)
- show_message(
- "WARNING: The C extension could not be compiled, speedups"
- " are not enabled.",
- "Plain-Python build succeeded.",
- )
-else:
- run_setup(False)
- show_message(
- "WARNING: C extensions are not supported on this Python"
- " platform, speedups are not enabled.",
- "Plain-Python build succeeded.",
+}:
+ local = os.environ.get("CIBUILDWHEEL", "0") != "1"
+ setup(
+ ext_modules=[
+ Extension(
+ "markupsafe._speedups", ["src/markupsafe/_speedups.c"], optional=local
+ )
+ ],
+ rust_extensions=[
+ RustExtension(
+ "markupsafe._rust_speedups",
+ "src/rust/Cargo.toml",
+ optional=local,
+ debug=False,
+ )
+ ],
)
+else:
+ setup()
diff --git a/src/markupsafe/__init__.py b/src/markupsafe/__init__.py
index 00cf6b8f..efdcfce1 100644
--- a/src/markupsafe/__init__.py
+++ b/src/markupsafe/__init__.py
@@ -6,7 +6,7 @@
import typing as t
try:
- from ._speedups import _escape_inner
+ from ._rust_speedups import _escape_inner
except ImportError:
from ._native import _escape_inner
diff --git a/src/markupsafe/_rust_speedups.pyi b/src/markupsafe/_rust_speedups.pyi
new file mode 100644
index 00000000..8c888585
--- /dev/null
+++ b/src/markupsafe/_rust_speedups.pyi
@@ -0,0 +1 @@
+def _escape_inner(s: str, /) -> str: ...
diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock
new file mode 100644
index 00000000..5f43fd20
--- /dev/null
+++ b/src/rust/Cargo.lock
@@ -0,0 +1,171 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "indoc"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8"
+
+[[package]]
+name = "libc"
+version = "0.2.153"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
+
+[[package]]
+name = "markupsafe-rust"
+version = "0.1.0"
+dependencies = [
+ "pyo3",
+]
+
+[[package]]
+name = "memoffset"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
+
+[[package]]
+name = "portable-atomic"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.86"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "pyo3"
+version = "0.22.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "831e8e819a138c36e212f3af3fd9eeffed6bf1510a805af35b0edee5ffa59433"
+dependencies = [
+ "cfg-if",
+ "indoc",
+ "libc",
+ "memoffset",
+ "once_cell",
+ "portable-atomic",
+ "pyo3-build-config",
+ "pyo3-ffi",
+ "pyo3-macros",
+ "unindent",
+]
+
+[[package]]
+name = "pyo3-build-config"
+version = "0.22.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e8730e591b14492a8945cdff32f089250b05f5accecf74aeddf9e8272ce1fa8"
+dependencies = [
+ "once_cell",
+ "target-lexicon",
+]
+
+[[package]]
+name = "pyo3-ffi"
+version = "0.22.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e97e919d2df92eb88ca80a037969f44e5e70356559654962cbb3316d00300c6"
+dependencies = [
+ "libc",
+ "pyo3-build-config",
+]
+
+[[package]]
+name = "pyo3-macros"
+version = "0.22.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb57983022ad41f9e683a599f2fd13c3664d7063a3ac5714cae4b7bee7d3f206"
+dependencies = [
+ "proc-macro2",
+ "pyo3-macros-backend",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "pyo3-macros-backend"
+version = "0.22.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec480c0c51ddec81019531705acac51bcdbeae563557c982aa8263bb96880372"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "pyo3-build-config",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.76"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "target-lexicon"
+version = "0.12.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
+
+[[package]]
+name = "unindent"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce"
diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml
new file mode 100644
index 00000000..f97c4901
--- /dev/null
+++ b/src/rust/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "markupsafe-rust"
+version = "0.1.0"
+edition = "2021"
+publish = false
+
+[profile.release]
+debug = true
+
+[dependencies]
+pyo3 = "0.22.2"
+
+[lib]
+name = "_rust_speedups"
+crate-type = ["cdylib"]
diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs
new file mode 100644
index 00000000..1a75564d
--- /dev/null
+++ b/src/rust/src/lib.rs
@@ -0,0 +1,162 @@
+use pyo3::prelude::*;
+use pyo3::{types::PyString, PyResult, Python};
+
+static NEEDS_SANITIZE: [bool; 256] = {
+ let mut needs_sanitize = [false; 256];
+ needs_sanitize[b'"' as usize] = true;
+ needs_sanitize[b'&' as usize] = true;
+ needs_sanitize[b'\'' as usize] = true;
+ needs_sanitize[b'<' as usize] = true;
+ needs_sanitize[b'>' as usize] = true;
+ needs_sanitize
+};
+
+pub fn needs_sanitize(bytes: &[u8]) -> Option {
+ let chunks = bytes.chunks_exact(4);
+ let rest = chunks.remainder();
+
+ for (i, chunk) in chunks.enumerate() {
+ let a = NEEDS_SANITIZE[chunk[0] as usize];
+ let b = NEEDS_SANITIZE[chunk[1] as usize];
+ let c = NEEDS_SANITIZE[chunk[2] as usize];
+ let d = NEEDS_SANITIZE[chunk[3] as usize];
+ if a | b | c | d {
+ return Some(i * 4);
+ }
+ }
+
+ for (i, &b) in rest.iter().enumerate() {
+ if NEEDS_SANITIZE[b as usize] {
+ return Some(((bytes.len() / 4) * 4) + i);
+ }
+ }
+
+ None
+}
+
+static SANITIZE_INDEX: [i8; 256] = {
+ let mut sanitize_index = [-1; 256];
+ sanitize_index[b'"' as usize] = 0;
+ sanitize_index[b'&' as usize] = 1;
+ sanitize_index[b'\'' as usize] = 2;
+ sanitize_index[b'<' as usize] = 3;
+ sanitize_index[b'>' as usize] = 4;
+ sanitize_index
+};
+
+static SANITIZED_VALUE: [&str; 5] = [""", "&", "'", "<", ">"];
+
+pub fn lut_replace(input: &str) -> Option {
+ let bytes = input.as_bytes();
+ if let Some(mut idx) = needs_sanitize(bytes) {
+ let mut out = String::with_capacity(input.len());
+ let mut prev_idx = 0;
+ for &b in bytes[idx..].iter() {
+ let replace_idx = SANITIZE_INDEX[b as usize];
+ if replace_idx >= 0 {
+ if prev_idx < idx {
+ out.push_str(&input[prev_idx..idx]);
+ }
+ out.push_str(SANITIZED_VALUE[replace_idx as usize]);
+ prev_idx = idx + 1;
+ }
+ idx += 1;
+ }
+ if prev_idx < idx {
+ out.push_str(&input[prev_idx..idx]);
+ }
+ Some(out)
+ } else {
+ None
+ }
+}
+
+#[pyfunction]
+pub fn _escape_inner<'py>(
+ py: Python<'py>,
+ s: Bound<'py, PyString>,
+) -> PyResult> {
+ if let Some(out) = lut_replace(s.to_str()?) {
+ Ok(PyString::new_bound(py, out.as_str()))
+ } else {
+ Ok(s)
+ }
+}
+
+#[pymodule]
+#[pyo3(name = "_rust_speedups")]
+fn speedups<'py>(_py: Python<'py>, m: &Bound<'py, PyModule>) -> PyResult<()> {
+ m.add_function(wrap_pyfunction!(_escape_inner, m)?)?;
+ Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::lut_replace;
+
+ #[test]
+ fn empty() {
+ let inp = "";
+ assert!(lut_replace(inp).is_none());
+ }
+
+ #[test]
+ fn no_change_test() {
+ let inp = "abcdefgh";
+ assert!(lut_replace(inp).is_none());
+ }
+
+ #[test]
+ fn middle() {
+ assert_eq!(
+ "abcd&><'"efgh",
+ lut_replace("abcd&><'\"efgh").unwrap()
+ );
+ }
+
+ #[test]
+ fn begin() {
+ assert_eq!(
+ "&><'"efgh",
+ lut_replace("&><'\"efgh").unwrap()
+ );
+ }
+
+ #[test]
+ fn end() {
+ assert_eq!(
+ "abcd&><'"",
+ lut_replace("abcd&><'\"").unwrap()
+ );
+ }
+
+ #[test]
+ fn no_change_large() {
+ let inp = "abcdefgh".repeat(1024);
+ assert!(lut_replace(inp.as_str()).is_none());
+ }
+
+ #[test]
+ fn middle_large() {
+ assert_eq!(
+ "abcd&><'"efgh".repeat(1024).as_str(),
+ lut_replace("abcd&><'\"efgh".repeat(1024).as_str()).unwrap()
+ );
+ }
+
+ #[test]
+ fn begin_large() {
+ assert_eq!(
+ "&><'"efgh".repeat(1024).as_str(),
+ lut_replace("&><'\"efgh".repeat(1024).as_str()).unwrap()
+ );
+ }
+
+ #[test]
+ fn end_large() {
+ assert_eq!(
+ "abcd&><'"".repeat(1024).as_str(),
+ lut_replace("abcd&><'\"".repeat(1024).as_str()).unwrap()
+ );
+ }
+}