diff --git a/doc/specs/stdlib_string_type.md b/doc/specs/stdlib_string_type.md index 76f5cd5c4..3a809171e 100644 --- a/doc/specs/stdlib_string_type.md +++ b/doc/specs/stdlib_string_type.md @@ -1254,7 +1254,7 @@ The result is a scalar `string_type` value. ```fortran program demo_to_title - use stdlib_string_type, only: string_type, to_title + use stdlib_string_type implicit none type(string_type) :: string, titlecase_string @@ -1302,7 +1302,7 @@ The result is a scalar `string_type` value. ```fortran program demo_to_sentence - use stdlib_string_type, only: string_type, to_sentence + use stdlib_string_type implicit none type(string_type) :: string, sentencecase_string diff --git a/doc/specs/stdlib_strings.md b/doc/specs/stdlib_strings.md index 2b29f3d58..c047653b5 100644 --- a/doc/specs/stdlib_strings.md +++ b/doc/specs/stdlib_strings.md @@ -192,3 +192,81 @@ program demo print'(a)', ends_with("pattern", "pat") ! F end program demo ``` + + + +### `slice` + +#### Description + +Extracts the characters from the defined region of the input string by taking strides. + +Deduction Process: +Function first automatically deduces the optional arguments that are not provided by the user. +This process is independent of both input `string` and permitted indexes of Fortran. +Deduced `first` and `last` argument take +infinity or -infinity value whereas deduced `stride` argument takes +1 or -1 value. + +Validation Process: +Argument `first` and `last` defines this region for extraction by function `slice`. +If the defined region is invalid i.e. region contains atleast one invalid index, `first` and +`last` are converted to first and last valid indexes in this defined region respectively, +if no valid index exists in this region an empty string is returned. +`stride` can attain both negative or positive values but when the only invalid value +0 is given, it is converted to 1. + +Extraction Process: +After all this, extraction starts from `first` index and takes stride of length `stride`. +Extraction starts only if `last` index is crossable from `first` index with stride `stride` +and remains active until `last` index is crossed. + +#### Syntax + +`string = [[stdlib_strings(module):slice(interface)]] (string, first, last, stride)` + +#### Status + +Experimental + +#### Class + +Pure function. + +#### Argument + +- `string`: Character scalar or [[stdlib_string_type(module):string_type(type)]] + This argument is intent(in). +- `first`: integer + This argument is intent(in) and optional. +- `last`: integer + This argument is intent(in) and optional. +- `stride`: integer + This argument is intent(in) and optional. + +#### Result value + +The result is of the same type as `string`. + +#### Example + +```fortran +program demo_slice + use stdlib_string_type + use stdlib_strings, only : slice + implicit none + type(string_type) :: string + character(len=10) :: char + + string = "abcdefghij" + ! string <-- "abcdefghij" + + char = "abcdefghij" + ! char <-- "abcdefghij" + + print'(a)', slice("abcdefghij", 2, 6, 2) ! "bdf" + print'(a)', slice(char, 2, 6, 2) ! "bdf" + + string = slice(string, 2, 6, 2) + ! string <-- "bdf" + +end program demo_slice +``` diff --git a/src/Makefile.manual b/src/Makefile.manual index b372a8cb6..22a5abcdb 100644 --- a/src/Makefile.manual +++ b/src/Makefile.manual @@ -112,6 +112,8 @@ stdlib_stats_var.o: \ stdlib_stats_distribution_PRNG.o: \ stdlib_kinds.o \ stdlib_error.o -stdlib_string_type.o: stdlib_ascii.o stdlib_kinds.o -stdlib_strings.o: stdlib_ascii.o stdlib_string_type.o +stdlib_string_type.o: stdlib_ascii.o \ + stdlib_kinds.o +stdlib_strings.o: stdlib_ascii.o \ + stdlib_string_type.o stdlib_math.o: stdlib_kinds.o diff --git a/src/stdlib_strings.f90 b/src/stdlib_strings.f90 index 0bc83f9ce..158b06588 100644 --- a/src/stdlib_strings.f90 +++ b/src/stdlib_strings.f90 @@ -11,6 +11,7 @@ module stdlib_strings public :: strip, chomp public :: starts_with, ends_with + public :: slice !> Remove leading and trailing whitespace characters. @@ -57,6 +58,14 @@ module stdlib_strings module procedure :: ends_with_char_string module procedure :: ends_with_char_char end interface ends_with + + !> Extracts characters from the input string to return a new string + !> + !> Version: experimental + interface slice + module procedure :: slice_string + module procedure :: slice_char + end interface slice contains @@ -290,5 +299,72 @@ elemental function ends_with_string_string(string, substring) result(match) end function ends_with_string_string + !> Extract the characters from the region between 'first' and 'last' index (both inclusive) + !> of the input 'string' by taking strides of length 'stride' + !> Returns a new string + elemental function slice_string(string, first, last, stride) result(sliced_string) + type(string_type), intent(in) :: string + integer, intent(in), optional :: first, last, stride + type(string_type) :: sliced_string + + sliced_string = string_type(slice(char(string), first, last, stride)) + + end function slice_string + + !> Extract the characters from the region between 'first' and 'last' index (both inclusive) + !> of the input 'string' by taking strides of length 'stride' + !> Returns a new string + pure function slice_char(string, first, last, stride) result(sliced_string) + character(len=*), intent(in) :: string + integer, intent(in), optional :: first, last, stride + integer :: first_index, last_index, stride_vector, strides_taken, length_string, i, j + character(len=:), allocatable :: sliced_string + length_string = len(string) + + first_index = 0 ! first_index = -infinity + last_index = length_string + 1 ! last_index = +infinity + stride_vector = 1 + + if (present(stride)) then + if (stride /= 0) then + if (stride < 0) then + first_index = length_string + 1 ! first_index = +infinity + last_index = 0 ! last_index = -infinity + end if + stride_vector = stride + end if + else + if (present(first) .and. present(last)) then + if (last < first) then + stride_vector = -1 + end if + end if + end if + + if (present(first)) then + first_index = first + end if + if (present(last)) then + last_index = last + end if + + if (stride_vector > 0) then + first_index = max(first_index, 1) + last_index = min(last_index, length_string) + else + first_index = min(first_index, length_string) + last_index = max(last_index, 1) + end if + + strides_taken = floor( real(last_index - first_index)/real(stride_vector) ) + allocate(character(len=max(0, strides_taken + 1)) :: sliced_string) + + j = 1 + do i = first_index, last_index, stride_vector + sliced_string(j:j) = string(i:i) + j = j + 1 + end do + end function slice_char + end module stdlib_strings diff --git a/src/tests/string/test_string_functions.f90 b/src/tests/string/test_string_functions.f90 index e7157697d..e72b4c162 100644 --- a/src/tests/string/test_string_functions.f90 +++ b/src/tests/string/test_string_functions.f90 @@ -1,8 +1,12 @@ ! SPDX-Identifier: MIT module test_string_functions + use, intrinsic :: iso_fortran_env, only : error_unit use stdlib_error, only : check use stdlib_string_type, only : string_type, assignment(=), operator(==), & to_lower, to_upper, to_title, to_sentence, reverse + use stdlib_strings, only: slice + use stdlib_optval, only: optval + use stdlib_ascii, only : to_string implicit none contains @@ -52,6 +56,236 @@ subroutine test_reverse_string end subroutine test_reverse_string + subroutine test_slice_string + type(string_type) :: test_string + test_string = "abcdefghijklmnopqrstuvwxyz" + + ! Only one argument is given + ! Valid + call check(slice(test_string, first=10) == "jklmnopqrstuvwxyz", & + "Slice, Valid arguments: first=10") ! last=+inf + call check(slice(test_string, last=10) == "abcdefghij", & + "Slice, Valid arguments: last=10") ! first=-inf + call check(slice(test_string, stride=3) == "adgjmpsvy", & + "Slice, Valid arguments: stride=3") ! first=-inf, last=+inf + call check(slice(test_string, stride=-3) == "zwtqnkheb", & + "Slice, Valid arguments: stride=-3") ! first=+inf, last=-inf + + ! Invalid + call check(slice(test_string, first=27) == "", & + "Slice, Invalid arguments: first=27") ! last=+inf + call check(slice(test_string, first=-10) == "abcdefghijklmnopqrstuvwxyz", & + "Slice, Invalid arguments: first=-10") ! last=+inf + call check(slice(test_string, last=-2) == "", & + "Slice, Invalid arguments: last=-2") ! first=-inf + call check(slice(test_string, last=30) == "abcdefghijklmnopqrstuvwxyz", & + "Slice, Invalid arguments: last=30") ! first=-inf + call check(slice(test_string, stride=0) == "abcdefghijklmnopqrstuvwxyz", & + "Slice, Invalid arguments: stride=0") ! stride=1 + + ! Only two arguments are given + ! Valid + call check(slice(test_string, first=10, last=20) == "jklmnopqrst", & + "Slice, Valid arguments: first=10, last=20") + call check(slice(test_string, first=7, last=2) == "gfedcb", & + "Slice, Valid arguments: first=7, last=2") ! stride=-1 + call check(slice(test_string, first=10, stride=-2) == "jhfdb", & + "Slice, Valid arguments: first=10, stride=-2") ! last=-inf + call check(slice(test_string, last=21, stride=-2) == "zxv", & + "Slice, Valid arguments: last=21, stride=-2") ! first=+inf + + ! Atleast one argument is invalid + call check(slice(test_string, first=30, last=-3) == "zyxwvutsrqponmlkjihgfedcba", & + "Slice, Invalid arguments: first=30, last=-3") + call check(slice(test_string, first=1, last=-20) == "a", & + "Slice, Invalid arguments: first=1, last=-20") + call check(slice(test_string, first=7, last=-10) == "gfedcba", & + "Slice, Invalid arguments: first=7, last=-10") + call check(slice(test_string, first=500, last=22) == "zyxwv", & + "Slice, Invalid arguments: first=500, last=22") + call check(slice(test_string, first=50, last=27) == "", & + "Slice, Invalid arguments: first=50, last=27") + call check(slice(test_string, first=-20, last=0) == "", & + "Slice, Invalid arguments: first=-20, last=0") + call check(slice(test_string, last=-3, stride=-2) == "zxvtrpnljhfdb", & + "Slice, Invalid arguments: last=-3, stride=-2") ! first=+inf + call check(slice(test_string, last=10, stride=0) == "abcdefghij", & + "Slice, Invalid arguments: last=10, stride=0") ! stride=1 + call check(slice(test_string, first=-2, stride=-2) == "", & + "Slice, Invalid arguments: first=-2, stride=-2") ! last=-inf + call check(slice(test_string, first=27, stride=2) == "", & + "Slice, Invalid arguments: first=27, stride=2") ! last=+inf + call check(slice(test_string, last=27, stride=-1) == "", & + "Slice, Invalid arguments: last=27, stride=-1") ! first=+inf + + ! All three arguments are given + ! Valid + call check(slice(test_string, first=2, last=16, stride=3) == "behkn", & + "Slice, Valid arguments: first=2, last=16, stride=3") + call check(slice(test_string, first=16, last=2, stride=-3) == "pmjgd", & + "Slice, Valid arguments: first=16, last=2, stride=-3") + call check(slice(test_string, first=7, last=7, stride=-4) == "g", & + "Slice, Valid arguments: first=7, last=7, stride=-4") + call check(slice(test_string, first=7, last=7, stride=3) == "g", & + "Slice, Valid arguments: first=7, last=7, stride=3") + call check(slice(test_string, first=2, last=6, stride=-1) == "", & + "Slice, Valid arguments: first=2, last=6, stride=-1") + call check(slice(test_string, first=20, last=10, stride=2) == "", & + "Slice, Valid arguments: first=20, last=10, stride=2") + + ! Atleast one argument is invalid + call check(slice(test_string, first=20, last=30, stride=2) == "tvxz", & + "Slice, Invalid arguments: first=20, last=30, stride=2") + call check(slice(test_string, first=-20, last=30, stride=2) == "acegikmoqsuwy", & + "Slice, Invalid arguments: first=-20, last=30, stride=2") + call check(slice(test_string, first=26, last=30, stride=1) == "z", & + "Slice, Invalid arguments: first=26, last=30, stride=1") + call check(slice(test_string, first=1, last=-20, stride=-1) == "a", & + "Slice, Invalid arguments: first=1, last=-20, stride=-1") + call check(slice(test_string, first=26, last=20, stride=1) == "", & + "Slice, Invalid arguments: first=26, last=20, stride=1") + call check(slice(test_string, first=1, last=20, stride=-1) == "", & + "Slice, Invalid arguments: first=1, last=20, stride=-1") + + test_string = "" + ! Empty string input + call check(slice(test_string, first=-2, last=6) == "", & + "Slice, Empty string: first=-2, last=6") + call check(slice(test_string, first=6, last=-2) == "", & + "Slice, Empty string: first=6, last=-2") + call check(slice(test_string, first=-10) == "", & + "Slice, Empty string: first=-10") ! last=+inf + call check(slice(test_string, last=10) == "", & + "Slice, Empty string: last=10") ! first=-inf + call check(slice(test_string) == "", & + "Slice, Empty string: no arguments provided") + + end subroutine test_slice_string + + subroutine test_slice_gen + character(len=*), parameter :: test = & + & "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + integer :: i, j, k + integer, parameter :: offset = 3 + + do i = 1 - offset, len(test) + offset + call check_slicer(test, first=i) + end do + + do i = 1 - offset, len(test) + offset + call check_slicer(test, last=i) + end do + + do i = -len(test) - offset, len(test) + offset + call check_slicer(test, stride=i) + end do + + do i = 1 - offset, len(test) + offset + do j = 1 - offset, len(test) + offset + call check_slicer(test, first=i, last=j) + end do + end do + + do i = 1 - offset, len(test) + offset + do j = -len(test) - offset, len(test) + offset + call check_slicer(test, first=i, stride=j) + end do + end do + + do i = 1 - offset, len(test) + offset + do j = -len(test) - offset, len(test) + offset + call check_slicer(test, last=i, stride=j) + end do + end do + + do i = 1 - offset, len(test) + offset + do j = 1 - offset, len(test) + offset + do k = -len(test) - offset, len(test) + offset + call check_slicer(test, first=i, last=j, stride=k) + end do + end do + end do + end subroutine test_slice_gen + + subroutine check_slicer(string, first, last, stride) + character(len=*), intent(in) :: string + integer, intent(in), optional :: first + integer, intent(in), optional :: last + integer, intent(in), optional :: stride + + character(len=:), allocatable :: actual, expected, message + logical :: stat + + actual = slice(string, first, last, stride) + expected = reference_slice(string, first, last, stride) + + stat = actual == expected + + if (.not.stat) then + message = "For input '"//string//"'"//new_line('a') + + if (present(first)) then + message = message // "first: "//to_string(first)//new_line('a') + end if + if (present(last)) then + message = message // "last: "//to_string(last)//new_line('a') + end if + if (present(stride)) then + message = message // "stride: "//to_string(stride)//new_line('a') + end if + message = message // "Expected: '"//expected//"' but got '"//actual//"'" + end if + call check(stat, message) + + end subroutine check_slicer + + pure function reference_slice(string, first, last, stride) result(sliced_string) + character(len=*), intent(in) :: string + integer, intent(in), optional :: first + integer, intent(in), optional :: last + integer, intent(in), optional :: stride + character(len=:), allocatable :: sliced_string + character(len=1), allocatable :: carray(:) + + integer :: first_, last_, stride_ + + stride_ = 1 + if (present(stride)) then + stride_ = merge(stride_, stride, stride == 0) + else + if (present(first) .and. present(last)) then + if (last < first) stride_ = -1 + end if + end if + + if (stride_ < 0) then + last_ = min(max(optval(last, 1), 1), len(string)+1) + first_ = min(max(optval(first, len(string)), 0), len(string)) + else + first_ = min(max(optval(first, 1), 1), len(string)+1) + last_ = min(max(optval(last, len(string)), 0), len(string)) + end if + + carray = string_to_carray(string) + carray = carray(first_:last_:stride_) + sliced_string = carray_to_string(carray) + + end function reference_slice + + pure function string_to_carray(string) result(carray) + character(len=*), intent(in) :: string + character(len=1) :: carray(len(string)) + + carray = transfer(string, carray) + end function string_to_carray + + pure function carray_to_string(carray) result(string) + character(len=1), intent(in) :: carray(:) + character(len=size(carray)) :: string + + string = transfer(carray, string) + end function carray_to_string + end module test_string_functions @@ -64,5 +298,7 @@ program tester call test_to_title_string call test_to_sentence_string call test_reverse_string + call test_slice_string + call test_slice_gen end program tester