Skip to content

Implement gzgetc and gzgets #352

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 25, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 182 additions & 0 deletions libz-rs-sys/src/gz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1774,6 +1774,7 @@ pub unsafe extern "C-unwind" fn gzoffset(file: gzFile) -> z_off_t {
/// # Safety
///
/// - `file`, if non-null, must be an open file handle obtained from [`gzopen`] or [`gzdopen`].
#[cfg_attr(feature = "export-symbols", export_name = crate::prefix!(gzputc))]
pub unsafe extern "C-unwind" fn gzputc(file: gzFile, c: c_int) -> c_int {
let Some(state) = (unsafe { file.cast::<GzState>().as_mut() }) else {
return -1;
Expand Down Expand Up @@ -1833,6 +1834,7 @@ pub unsafe extern "C-unwind" fn gzputc(file: gzFile, c: c_int) -> c_int {
///
/// - `file`, if non-null, must be an open file handle obtained from [`gzopen`] or [`gzdopen`].
/// - `s` must point to a null-terminated C string.
#[cfg_attr(feature = "export-symbols", export_name = crate::prefix!(gzputs))]
pub unsafe extern "C-unwind" fn gzputs(file: gzFile, s: *const c_char) -> c_int {
let Some(state) = (unsafe { file.cast::<GzState>().as_mut() }) else {
return -1;
Expand Down Expand Up @@ -1861,6 +1863,186 @@ pub unsafe extern "C-unwind" fn gzputs(file: gzFile, s: *const c_char) -> c_int
}
}

/// Read one decompressed byte from `file`.
///
/// Note: The C header file zlib.h provides a macro wrapper for gzgetc that implements
/// the fast path inline and calls this function for the slow path.
///
/// # Returns
///
/// - The byte read, on success.
/// - `-1` on error.
///
/// # Safety
///
/// - `file`, if non-null, must be an open file handle obtained from [`gzopen`] or [`gzdopen`].
#[cfg_attr(feature = "export-symbols", export_name = crate::prefix!(gzgetc))]
pub unsafe extern "C-unwind" fn gzgetc(file: gzFile) -> c_int {
let Some(state) = (unsafe { file.cast::<GzState>().as_mut() }) else {
return -1;
};

// Check that we're reading and that there's no (serious) error.
if state.mode != GzMode::GZ_READ || (state.err != Z_OK && state.err != Z_BUF_ERROR) {
return -1;
}

// Try output buffer (no need to check for skip request).
if state.have != 0 {
state.have -= 1;
state.pos += 1;
// Safety: Since `state.have` is at least 1, `state.next` points to at least
// one readable byte within `state.output`.
let ret = unsafe { *state.next };
// Safety: Since `state.have` is at least 1, the byte between `state.next` and
// `state.next + 1` is within the bounds of the `state.output` buffer, as required
// by the pointer `add` method.
state.next = unsafe { state.next.add(1) };
return ret as _;
}

// Nothing there -- try gz_read.
let mut buf = [0u8; 1];
// Safety: `buf` is big enough to hold `len = 1` bytes.
match unsafe { gz_read(state, buf.as_mut_ptr(), 1) } {
1 => buf[0] as _,
_ => -1,
}
}

/// Backward-compatibility alias for [`gzgetc`].
///
/// # Returns
///
/// - The byte read, on success.
/// - `-1` on error.
///
/// # Safety
///
/// - `file`, if non-null, must be an open file handle obtained from [`gzopen`] or [`gzdopen`].
#[cfg_attr(feature = "export-symbols", export_name = crate::prefix!(gzgetc_))]
pub unsafe extern "C-unwind" fn gzgetc_(file: gzFile) -> c_int {
// Safety: The caller has ensured that `file` is null or a valid file handle.
unsafe { gzgetc(file) }
}

/// Read decompressed bytes from `file` into `buf`, until `len-1` characters are
/// read, or until a newline character is read and transferred to `buf`, or an
/// end-of-file condition is encountered. If any characters are read or if `len`
/// is one, the string is terminated with a null character. If no characters
/// are read due to an end-of-file or `len` is less than one, then the buffer is
/// left untouched.
///
/// Note: This function generally only makes sense for files where the decompressed
/// content is text. If there are any null bytes, this function will copy them into
/// `buf` just like any other character, resulting in early truncation of the
/// returned C string. To read gzip files whose decompressed content is binary,
/// please see [`gzread`].
///
/// # Returns
///
/// - `buf`, which now is a null-terminated string, on success.
/// - `null` on error. If there was an error, the contents at `buf` are indeterminate.
///
/// # Safety
///
/// - `file`, if non-null, must be an open file handle obtained from [`gzopen`] or [`gzdopen`].
/// - `buf` must be null or a pointer to at least `len` writable bytes.
#[cfg_attr(feature = "export-symbols", export_name = crate::prefix!(gzgets))]
pub unsafe extern "C-unwind" fn gzgets(file: gzFile, buf: *mut c_char, len: c_int) -> *mut c_char {
// Check parameters.
if buf.is_null() || len < 1 {
return ptr::null_mut();
}

let Some(state) = (unsafe { file.cast::<GzState>().as_mut() }) else {
return ptr::null_mut();
};

// Check that we're reading and that there's no (serious) error.
if state.mode != GzMode::GZ_READ || (state.err != Z_OK && state.err != Z_BUF_ERROR) {
return ptr::null_mut();
}

/* FIXME uncomment when seek support is implemented.
// Process a skip request.
if state.seek {
state.seek = false;
if gz_skip(state, state.skip) == -1 {
return ptr::null_mut();
}
}
*/

// Copy output bytes up to newline or `len - 1`, whichever comes first.
let mut left = len as usize - 1;
if left == 0 {
// The caller provided a 1-byte buffer, so write the terminating null and we're done.
// Safety: `len` is 1 in this block, so it's safe to write 1 byte at `*buf`.
unsafe { *buf = 0 };
return buf;
}
let mut dst = buf;
loop {
// Assure that something is in the output buffer.
// Safety: `state` is valid based on the checked cast above.
if state.have == 0 && unsafe { gz_fetch(state) }.is_err() {
// Error -- couldn't read any data.
return ptr::null_mut();
}
if state.have == 0 {
// End of file; return whatever we have.
state.past = true;
break;
}

// Look for newline in current output buffer.
let mut n = cmp::min(left, state.have as _);
// Safety: `state.next` points to a block of `state.have` readable bytes. We're scanning
// the first `n` of those bytes, and `n <= state.have` based on the `min` calculation.
let eol = unsafe { libc::memchr(state.next.cast::<c_void>(), '\n' as c_int, n as _) };
if !eol.is_null() {
// Compute the number of bytes to copy, + 1 because we need to copy the newline itself.
// Safety: `eol` was found by `memchr` in the same buffer as `state.next`, so `offset_of`
// is valid. And because `memchr` only scans forward, `eol` will be at or after
// `state.next`, so we can cast the result of `offset_from` to an unsigned value.
n = unsafe { eol.cast::<u8>().offset_from(state.next) } as usize + 1;
}

// Copy through end of line, or remainder if newline not found.
// Safety: `state.next` points to at least `n` readable bytes because `n <= state.have`,
// `dst` points to at least `n` writable bytes because `n <= left`, and the source
// and destination regions are nonoverlapping because we're copying from an internal
// buffer to a caller-supplied buffer.
unsafe { ptr::copy_nonoverlapping(state.next, dst as _, n) };
state.have -= n as c_uint;
// Safety: As described above, `state.next` pointed to at least `n` readable bytes, so
// when we increase it by `n` it will still point into the `output` buffer.
state.next = unsafe { state.next.add(n) };
state.pos += n as u64;
left -= n;
// Safety: `dst` pointed to at least `n` writable bytes, so when we increase it by `n`
// it will still point into `buf`.
dst = unsafe { dst.add(n) };

if left == 0 || !eol.is_null() {
break;
}
}

if dst == buf {
// Nothing was copied.
ptr::null_mut()
} else {
// Something was copied. Null-terminate and return the string.
// Safety: we copied at most `left = len - 1` bytes, and `dst` points just past
// the last copied byte, so `dst` is within the block of `len` writable bytes
// starting at `buf`.
unsafe { *dst = 0 };
buf
}
}

// Create a deep copy of a C string using `ALLOCATOR`
//
// # Safety
Expand Down
152 changes: 150 additions & 2 deletions test-libz-rs-sys/src/gz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ use zlib_rs::c_api::*;

use libz_rs_sys::{
gzFile_s, gzbuffer, gzclearerr, gzclose, gzclose_r, gzclose_w, gzdirect, gzdopen, gzerror,
gzflush, gzoffset, gzopen, gzputc, gzputs, gzread, gztell, gzwrite,
gzflush, gzgetc, gzgetc_, gzgets, gzoffset, gzopen, gzputc, gzputs, gzread, gztell, gzwrite,
};

use std::ffi::{c_char, c_int, c_uint, c_void, CString};
use std::ffi::{c_char, c_int, c_uint, c_void, CStr, CString};
use std::path::{Path, PathBuf};
use std::ptr;

Expand Down Expand Up @@ -1022,6 +1022,154 @@ fn gzputs_error() {
assert_eq!(unsafe { gzclose(file) }, Z_ERRNO);
}

#[test]
fn gzgetc_basic() {
// Read data from a gzip file one byte at a time using gzgetc, and verify that
// the expected content is returned.
for gzgetc_fn in [|x| unsafe { gzgetc(x) }, |x| unsafe { gzgetc_(x) }] {
let file_name = crate_path("src/test-data/text.gz");
let file = unsafe {
gzopen(
CString::new(file_name.as_str()).unwrap().as_ptr(),
CString::new("r").unwrap().as_ptr(),
)
};
assert!(!file.is_null());
assert_eq!(unsafe { gzbuffer(file, 8) }, 0);
const EXPECTED: &str = "gzip\nexample data\nfor tests";
let mut content = String::with_capacity(EXPECTED.len());
for _ in 0..EXPECTED.len() {
// Safety: `file` was initialized by `gzopen`.
let ch = gzgetc_fn(file);
assert_ne!(ch, -1);
content.push(ch as u8 as char);
}
// We should be at the end, so the next gzgetc should return -1.
assert_eq!(gzgetc_fn(file), -1);
assert_eq!(unsafe { gzclose(file) }, Z_OK);
assert_eq!(content.as_str(), EXPECTED);
}
}

#[test]
fn gzgetc_error() {
for gzgetc_fn in [|x| unsafe { gzgetc(x) }, |x| unsafe { gzgetc_(x) }] {
// gzgetc on a null file handle should return -1.
assert_eq!(gzgetc_fn(ptr::null_mut()), -1);

// gzgetc on a write-only file handle should return -1.
let file = unsafe { gzdopen(-2, CString::new("w").unwrap().as_ptr()) };
assert_eq!(gzgetc_fn(file), -1);
assert_eq!(unsafe { gzclose(file) }, Z_ERRNO);

// Open an invalid file descriptor as a gzip read stream. gzgetc should return -1.
let file = unsafe { gzdopen(-2, CString::new("r").unwrap().as_ptr()) };
assert_eq!(gzgetc_fn(file), -1);
assert_eq!(unsafe { gzclose(file) }, Z_ERRNO);
}
}

#[test]
fn gzgets_basic() {
// Open a file containing gzip-compressed text.
let file_name = crate_path("src/test-data/text.gz");
let file = unsafe {
gzopen(
CString::new(file_name.as_str()).unwrap().as_ptr(),
CString::new("r").unwrap().as_ptr(),
)
};
assert!(!file.is_null());

// gzgets with a buffer too small to hold the next line should fetch len-1 bytes and
// add a null terminator. Note: we fill the output buffer with a nonzero value before
// the call to make sure gzgets null-terminates properly.
let mut buf = [127 as c_char; 4];
let ret = unsafe { gzgets(file, buf.as_mut_ptr(), buf.len() as _) };
assert!(!ret.is_null());
assert_eq!(
unsafe { CStr::from_ptr(buf.as_ptr()).to_str().unwrap() },
"gzi"
);

// gzgets with a bigger buffer should fetch (only) the remainder of the line, up to and
// including the '\n'.
let mut buf = [127 as c_char; 100];
let ret = unsafe { gzgets(file, buf.as_mut_ptr(), buf.len() as _) };
assert!(!ret.is_null());
assert_eq!(
unsafe { CStr::from_ptr(buf.as_ptr()).to_str().unwrap() },
"p\n"
);

// gzgets with len=1 should return a string consisting of just a null terminator.
let mut buf = [127 as c_char; 1];
let ret = unsafe { gzgets(file, buf.as_mut_ptr(), buf.len() as _) };
assert!(!ret.is_null());
assert_eq!(buf[0], 0 as c_char);

// Read the next line with gzgets, using a buffer just big enough.
let mut buf = [127 as c_char; 14];
let ret = unsafe { gzgets(file, buf.as_mut_ptr(), buf.len() as _) };
assert!(!ret.is_null());
assert!(!ret.is_null());
assert_eq!(
unsafe { CStr::from_ptr(buf.as_ptr()).to_str().unwrap() },
"example data\n"
);

// Read the final line of the file, which is not terminated by a newline character.
let mut buf = [127 as c_char; 100];
let ret = unsafe { gzgets(file, buf.as_mut_ptr(), buf.len() as _) };
assert!(!ret.is_null());
assert!(!ret.is_null());
assert_eq!(
unsafe { CStr::from_ptr(buf.as_ptr()).to_str().unwrap() },
"for tests"
);

// gzgets at the end of the file should return null.
let mut buf = [127 as c_char; 100];
let ret = unsafe { gzgets(file, buf.as_mut_ptr(), buf.len() as _) };
assert!(ret.is_null());

assert_eq!(unsafe { gzclose(file) }, Z_OK);
}

#[test]
fn gzgets_error() {
let mut buf = [0 as c_char; 16];

// gzgets on a null file handle should return null.
assert!(unsafe { gzgets(ptr::null_mut(), buf.as_mut_ptr(), buf.len() as _) }.is_null());

// gzgets on a write-only file handle should return null.
let file = unsafe { gzdopen(-2, CString::new("w").unwrap().as_ptr()) };
assert!(unsafe { gzgets(ptr::null_mut(), buf.as_mut_ptr(), buf.len() as _) }.is_null());
assert_eq!(unsafe { gzclose(file) }, Z_ERRNO);

// Open an invalid file descriptor as a gzip read stream. gzgets should return null.
let file = unsafe { gzdopen(-2, CString::new("r").unwrap().as_ptr()) };
assert!(unsafe { gzgets(ptr::null_mut(), buf.as_mut_ptr(), buf.len() as _) }.is_null());
assert_eq!(unsafe { gzclose(file) }, Z_ERRNO);

// Test invalid gzgets parameters with a valid input file.
let file_name = crate_path("src/test-data/issue-109.gz");
let file = unsafe {
gzopen(
CString::new(file_name.as_str()).unwrap().as_ptr(),
CString::new("r").unwrap().as_ptr(),
)
};
assert!(!file.is_null());
// gzgets with a null buffer should return null.
assert!(unsafe { gzgets(ptr::null_mut(), ptr::null_mut(), 1) }.is_null());
// gzgets with a nonpositive len should return null.
assert!(unsafe { gzgets(ptr::null_mut(), buf.as_mut_ptr(), 0) }.is_null());
assert!(unsafe { gzgets(ptr::null_mut(), buf.as_mut_ptr(), -1) }.is_null());
assert_eq!(unsafe { gzclose(file) }, Z_OK);
}

// Get the size in bytes of a file.
//
// # Returns
Expand Down
Binary file added test-libz-rs-sys/src/test-data/text.gz
Binary file not shown.
Loading