diff --git a/.gitignore b/.gitignore index 93466720..89efbf42 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ dist/ htmlcov/ .tox/ docs/_build/ +/src/rust/target/ diff --git a/MANIFEST.in b/MANIFEST.in index 7dfa3f60..35d1a31f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -6,4 +6,6 @@ prune docs/_build graft tests include src/markupsafe/py.typed include src/markupsafe/*.pyi +graft src/rust +prune src/rust/target global-exclude *.pyc diff --git a/bench.py b/bench.py index 59617aa8..88c66fa2 100644 --- a/bench.py +++ b/bench.py @@ -8,7 +8,7 @@ ("long plain", '"Hello, World!" * 1000'), ("long suffix", '"Hello, World!" + "x" * 100_000'), ): - for mod in "native", "speedups": + for mod in "native", "rust_speedups": subprocess.run( [ sys.executable, diff --git a/pyproject.toml b/pyproject.toml index 3afbafbf..d645709d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ Source = "https://github.com/pallets/markupsafe/" Chat = "https://discord.gg/pallets" [build-system] -requires = ["setuptools"] +requires = ["setuptools", "setuptools-rust"] build-backend = "setuptools.build_meta" [tool.pytest.ini_options] diff --git a/setup.py b/setup.py index d19a4faa..b5eef907 100644 --- a/setup.py +++ b/setup.py @@ -1,84 +1,30 @@ import os import platform -import sys from setuptools import Extension from setuptools import setup -from setuptools.command.build_ext import build_ext -from setuptools.errors import CCompilerError -from setuptools.errors import ExecError -from setuptools.errors import PlatformError +from setuptools_rust import RustExtension -ext_modules = [Extension("markupsafe._speedups", ["src/markupsafe/_speedups.c"])] - - -class BuildFailed(Exception): - pass - - -class ve_build_ext(build_ext): - """This class allows C extension building to fail.""" - - def run(self): - try: - super().run() - except PlatformError as e: - raise BuildFailed() from e - - def build_extension(self, ext): - try: - super().build_extension(ext) - except (CCompilerError, ExecError, PlatformError) as e: - raise BuildFailed() from e - except ValueError as e: - # this can happen on Windows 64 bit, see Python issue 7511 - if "'path'" in str(sys.exc_info()[1]): # works with Python 2 and 3 - raise BuildFailed() from e - raise - - -def run_setup(with_binary): - setup( - cmdclass={"build_ext": ve_build_ext}, - ext_modules=ext_modules if with_binary else [], - ) - - -def show_message(*lines): - print("=" * 74) - for line in lines: - print(line) - print("=" * 74) - - -supports_speedups = platform.python_implementation() not in { +if platform.python_implementation() not in { "PyPy", "Jython", "GraalVM", -} - -if os.environ.get("CIBUILDWHEEL", "0") == "1" and supports_speedups: - run_setup(True) -elif supports_speedups: - try: - run_setup(True) - except BuildFailed: - show_message( - "WARNING: The C extension could not be compiled, speedups" - " are not enabled.", - "Failure information, if any, is above.", - "Retrying the build without the C extension now.", - ) - run_setup(False) - show_message( - "WARNING: The C extension could not be compiled, speedups" - " are not enabled.", - "Plain-Python build succeeded.", - ) -else: - run_setup(False) - show_message( - "WARNING: C extensions are not supported on this Python" - " platform, speedups are not enabled.", - "Plain-Python build succeeded.", +}: + local = os.environ.get("CIBUILDWHEEL", "0") != "1" + setup( + ext_modules=[ + Extension( + "markupsafe._speedups", ["src/markupsafe/_speedups.c"], optional=local + ) + ], + rust_extensions=[ + RustExtension( + "markupsafe._rust_speedups", + "src/rust/Cargo.toml", + optional=local, + debug=False, + ) + ], ) +else: + setup() diff --git a/src/markupsafe/__init__.py b/src/markupsafe/__init__.py index 00cf6b8f..efdcfce1 100644 --- a/src/markupsafe/__init__.py +++ b/src/markupsafe/__init__.py @@ -6,7 +6,7 @@ import typing as t try: - from ._speedups import _escape_inner + from ._rust_speedups import _escape_inner except ImportError: from ._native import _escape_inner diff --git a/src/markupsafe/_rust_speedups.pyi b/src/markupsafe/_rust_speedups.pyi new file mode 100644 index 00000000..8c888585 --- /dev/null +++ b/src/markupsafe/_rust_speedups.pyi @@ -0,0 +1 @@ +def _escape_inner(s: str, /) -> str: ... diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock new file mode 100644 index 00000000..5f43fd20 --- /dev/null +++ b/src/rust/Cargo.lock @@ -0,0 +1,171 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "indoc" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8" + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "markupsafe-rust" +version = "0.1.0" +dependencies = [ + "pyo3", +] + +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "portable-atomic" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "831e8e819a138c36e212f3af3fd9eeffed6bf1510a805af35b0edee5ffa59433" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e8730e591b14492a8945cdff32f089250b05f5accecf74aeddf9e8272ce1fa8" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e97e919d2df92eb88ca80a037969f44e5e70356559654962cbb3316d00300c6" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb57983022ad41f9e683a599f2fd13c3664d7063a3ac5714cae4b7bee7d3f206" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec480c0c51ddec81019531705acac51bcdbeae563557c982aa8263bb96880372" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "2.0.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unindent" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml new file mode 100644 index 00000000..f97c4901 --- /dev/null +++ b/src/rust/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "markupsafe-rust" +version = "0.1.0" +edition = "2021" +publish = false + +[profile.release] +debug = true + +[dependencies] +pyo3 = "0.22.2" + +[lib] +name = "_rust_speedups" +crate-type = ["cdylib"] diff --git a/src/rust/src/lib.rs b/src/rust/src/lib.rs new file mode 100644 index 00000000..1a75564d --- /dev/null +++ b/src/rust/src/lib.rs @@ -0,0 +1,162 @@ +use pyo3::prelude::*; +use pyo3::{types::PyString, PyResult, Python}; + +static NEEDS_SANITIZE: [bool; 256] = { + let mut needs_sanitize = [false; 256]; + needs_sanitize[b'"' as usize] = true; + needs_sanitize[b'&' as usize] = true; + needs_sanitize[b'\'' as usize] = true; + needs_sanitize[b'<' as usize] = true; + needs_sanitize[b'>' as usize] = true; + needs_sanitize +}; + +pub fn needs_sanitize(bytes: &[u8]) -> Option { + let chunks = bytes.chunks_exact(4); + let rest = chunks.remainder(); + + for (i, chunk) in chunks.enumerate() { + let a = NEEDS_SANITIZE[chunk[0] as usize]; + let b = NEEDS_SANITIZE[chunk[1] as usize]; + let c = NEEDS_SANITIZE[chunk[2] as usize]; + let d = NEEDS_SANITIZE[chunk[3] as usize]; + if a | b | c | d { + return Some(i * 4); + } + } + + for (i, &b) in rest.iter().enumerate() { + if NEEDS_SANITIZE[b as usize] { + return Some(((bytes.len() / 4) * 4) + i); + } + } + + None +} + +static SANITIZE_INDEX: [i8; 256] = { + let mut sanitize_index = [-1; 256]; + sanitize_index[b'"' as usize] = 0; + sanitize_index[b'&' as usize] = 1; + sanitize_index[b'\'' as usize] = 2; + sanitize_index[b'<' as usize] = 3; + sanitize_index[b'>' as usize] = 4; + sanitize_index +}; + +static SANITIZED_VALUE: [&str; 5] = [""", "&", "'", "<", ">"]; + +pub fn lut_replace(input: &str) -> Option { + let bytes = input.as_bytes(); + if let Some(mut idx) = needs_sanitize(bytes) { + let mut out = String::with_capacity(input.len()); + let mut prev_idx = 0; + for &b in bytes[idx..].iter() { + let replace_idx = SANITIZE_INDEX[b as usize]; + if replace_idx >= 0 { + if prev_idx < idx { + out.push_str(&input[prev_idx..idx]); + } + out.push_str(SANITIZED_VALUE[replace_idx as usize]); + prev_idx = idx + 1; + } + idx += 1; + } + if prev_idx < idx { + out.push_str(&input[prev_idx..idx]); + } + Some(out) + } else { + None + } +} + +#[pyfunction] +pub fn _escape_inner<'py>( + py: Python<'py>, + s: Bound<'py, PyString>, +) -> PyResult> { + if let Some(out) = lut_replace(s.to_str()?) { + Ok(PyString::new_bound(py, out.as_str())) + } else { + Ok(s) + } +} + +#[pymodule] +#[pyo3(name = "_rust_speedups")] +fn speedups<'py>(_py: Python<'py>, m: &Bound<'py, PyModule>) -> PyResult<()> { + m.add_function(wrap_pyfunction!(_escape_inner, m)?)?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use crate::lut_replace; + + #[test] + fn empty() { + let inp = ""; + assert!(lut_replace(inp).is_none()); + } + + #[test] + fn no_change_test() { + let inp = "abcdefgh"; + assert!(lut_replace(inp).is_none()); + } + + #[test] + fn middle() { + assert_eq!( + "abcd&><'"efgh", + lut_replace("abcd&><'\"efgh").unwrap() + ); + } + + #[test] + fn begin() { + assert_eq!( + "&><'"efgh", + lut_replace("&><'\"efgh").unwrap() + ); + } + + #[test] + fn end() { + assert_eq!( + "abcd&><'"", + lut_replace("abcd&><'\"").unwrap() + ); + } + + #[test] + fn no_change_large() { + let inp = "abcdefgh".repeat(1024); + assert!(lut_replace(inp.as_str()).is_none()); + } + + #[test] + fn middle_large() { + assert_eq!( + "abcd&><'"efgh".repeat(1024).as_str(), + lut_replace("abcd&><'\"efgh".repeat(1024).as_str()).unwrap() + ); + } + + #[test] + fn begin_large() { + assert_eq!( + "&><'"efgh".repeat(1024).as_str(), + lut_replace("&><'\"efgh".repeat(1024).as_str()).unwrap() + ); + } + + #[test] + fn end_large() { + assert_eq!( + "abcd&><'"".repeat(1024).as_str(), + lut_replace("abcd&><'\"".repeat(1024).as_str()).unwrap() + ); + } +}