Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

implemented low level find function for string matching #433

Merged
merged 16 commits into from
Jun 18, 2021
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 62 additions & 4 deletions doc/specs/stdlib_strings.md
Original file line number Diff line number Diff line change
Expand Up @@ -233,13 +233,13 @@ Pure function.

#### Argument

- `string`: Character scalar or [[stdlib_string_type(module):string_type(type)]]
- `string`: Character scalar or [[stdlib_string_type(module):string_type(type)]].
This argument is intent(in).
- `first`: integer
- `first`: integer.
This argument is intent(in) and optional.
- `last`: integer
- `last`: integer.
This argument is intent(in) and optional.
- `stride`: integer
- `stride`: integer.
This argument is intent(in) and optional.

#### Result value
Expand Down Expand Up @@ -270,3 +270,61 @@ program demo_slice

end program demo_slice
```


<!-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -->
### `find`

#### Description

Returns the starting index of the `occurrence`th occurrence of the substring `pattern`
in the input string `string`.
Default value of `occurrence` is set to `1`.
If `consider_overlapping` is not provided or is set to `.true.` the function counts two overlapping occurrences of substring as two different occurrences.
If `occurrence`th occurrence is not found, function returns `0`.


#### Syntax

`string = [[stdlib_strings(module):find(interface)]] (string, pattern, occurrence, consider_overlapping)`

#### Status

Experimental

#### Class

Elemental function

#### Argument

- `string`: Character scalar or [[stdlib_string_type(module):string_type(type)]].
This argument is intent(in).
- `pattern`: Character scalar or [[stdlib_string_type(module):string_type(type)]].
This argument is intent(in).
- `occurrence`: integer.
This argument is intent(in) and optional.
- `consider_overlapping`: logical.
This argument is intent(in) and optional.

#### Result value

The result is a scalar of integer type or integer array of rank equal to the highest rank among all dummy arguments.

#### Example

```fortran
program demo_find
use stdlib_string_type
use stdlib_strings, only : find
implicit none
string_type :: string

string = "needle in the character-stack"

print *, find(string, "needle") ! 1
print *, find(string, ["a", "c"], [3, 2]) ! [27, 20]
print *, find("qwqwqwq", "qwq", 3, [.false., .true.]) ! [0, 5]

end program demo_find
```
5 changes: 3 additions & 2 deletions src/Makefile.manual
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ stdlib_stats_distribution_PRNG.o: \
stdlib_kinds.o \
stdlib_error.o
stdlib_string_type.o: stdlib_ascii.o \
stdlib_kinds.o
stdlib_kinds.o
stdlib_strings.o: stdlib_ascii.o \
stdlib_string_type.o
stdlib_string_type.o \
stdlib_optval.o
stdlib_math.o: stdlib_kinds.o
138 changes: 135 additions & 3 deletions src/stdlib_strings.f90
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@
!>
!> The specification of this module is available [here](../page/specs/stdlib_strings.html).
module stdlib_strings
use stdlib_ascii, only : whitespace
use stdlib_string_type, only : string_type, char, verify
use stdlib_ascii, only: whitespace
use stdlib_string_type, only: string_type, char, verify
use stdlib_optval, only: optval
implicit none
private

public :: strip, chomp
public :: starts_with, ends_with
public :: slice
public :: slice, find


!> Remove leading and trailing whitespace characters.
Expand Down Expand Up @@ -67,6 +68,15 @@ module stdlib_strings
module procedure :: slice_char
end interface slice

!> Finds the starting index of substring 'pattern' in the input 'string'
!>
!> Version: experimental
interface find
module procedure :: find_string_string
module procedure :: find_string_char
module procedure :: find_char_string
module procedure :: find_char_char
end interface find

contains

Expand Down Expand Up @@ -366,5 +376,127 @@ pure function slice_char(string, first, last, stride) result(sliced_string)
end do
end function slice_char

!> Returns the starting index of the 'occurrence'th occurrence of substring 'pattern'
!> in input 'string'
!> Returns an integer
elemental function find_string_string(string, pattern, occurrence, consider_overlapping) result(res)
type(string_type), intent(in) :: string
type(string_type), intent(in) :: pattern
integer, intent(in), optional :: occurrence
logical, intent(in), optional :: consider_overlapping
integer :: res

res = find(char(string), char(pattern), occurrence, consider_overlapping)

end function find_string_string

!> Returns the starting index of the 'occurrence'th occurrence of substring 'pattern'
!> in input 'string'
!> Returns an integer
elemental function find_string_char(string, pattern, occurrence, consider_overlapping) result(res)
type(string_type), intent(in) :: string
character(len=*), intent(in) :: pattern
integer, intent(in), optional :: occurrence
logical, intent(in), optional :: consider_overlapping
integer :: res

res = find(char(string), pattern, occurrence, consider_overlapping)

end function find_string_char

!> Returns the starting index of the 'occurrence'th occurrence of substring 'pattern'
!> in input 'string'
!> Returns an integer
elemental function find_char_string(string, pattern, occurrence, consider_overlapping) result(res)
character(len=*), intent(in) :: string
type(string_type), intent(in) :: pattern
integer, intent(in), optional :: occurrence
logical, intent(in), optional :: consider_overlapping
integer :: res

res = find(string, char(pattern), occurrence, consider_overlapping)

end function find_char_string

!> Returns the starting index of the 'occurrence'th occurrence of substring 'pattern'
!> in input 'string'
!> Returns an integer
elemental function find_char_char(string, pattern, occurrence, consider_overlapping) result(res)
character(len=*), intent(in) :: string
character(len=*), intent(in) :: pattern
integer, intent(in), optional :: occurrence
logical, intent(in), optional :: consider_overlapping
integer :: lps_array(len(pattern))
integer :: res, s_i, p_i, length_string, length_pattern, occurrence_
logical :: consider_overlapping_

consider_overlapping_ = optval(consider_overlapping, .true.)
occurrence_ = optval(occurrence, 1)
res = 0
length_string = len(string)
length_pattern = len(pattern)

if (length_pattern > 0 .and. length_pattern <= length_string &
& .and. occurrence_ > 0) then
lps_array = compute_lps(pattern)

s_i = 1
p_i = 1
do while(s_i <= length_string)
if (string(s_i:s_i) == pattern(p_i:p_i)) then
if (p_i == length_pattern) then
occurrence_ = occurrence_ - 1
if (occurrence_ == 0) then
res = s_i - length_pattern + 1
exit
else if (consider_overlapping_) then
p_i = lps_array(p_i)
else
p_i = 0
end if
end if
s_i = s_i + 1
p_i = p_i + 1
else if (p_i > 1) then
p_i = lps_array(p_i - 1) + 1
else
s_i = s_i + 1
end if
end do
end if

end function find_char_char

!> Computes longest prefix suffix for each index of the input 'string'
!>
!> Returns an array of integers
pure function compute_lps(string) result(lps_array)
character(len=*), intent(in) :: string
integer :: lps_array(len(string))
integer :: i, j, length_string

length_string = len(string)

if (length_string > 0) then
lps_array(1) = 0

i = 2
j = 1
do while (i <= length_string)
if (string(j:j) == string(i:i)) then
lps_array(i) = j
i = i + 1
j = j + 1
else if (j > 1) then
j = lps_array(j - 1) + 1
else
lps_array(i) = 0
i = i + 1
end if
end do
end if

end function compute_lps


end module stdlib_strings
35 changes: 34 additions & 1 deletion src/tests/string/test_string_functions.f90
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ module test_string_functions
use stdlib_error, only : check
use stdlib_string_type, only : string_type, assignment(=), operator(==), &
to_lower, to_upper, to_title, to_sentence, reverse
use stdlib_strings, only: slice
use stdlib_strings, only: slice, find
use stdlib_optval, only: optval
use stdlib_ascii, only : to_string
implicit none
Expand Down Expand Up @@ -162,6 +162,38 @@ subroutine test_slice_string

end subroutine test_slice_string

subroutine test_find
type(string_type) :: test_string_1, test_string_2, test_pattern_1, test_pattern_2
test_string_1 = "qwqwqwqwqwqwqw"
test_string_2 = "abccbabccbabc"
test_pattern_1 = "qwq"
test_pattern_2 = "abccbabc"

call check(all(find([test_string_1, test_string_2], test_pattern_1, 4) == [7, 0]), &
& 'Find: [test_string_1, test_string_2], test_pattern_1, 4')
call check(all(find(test_string_1, [test_pattern_1, test_pattern_2], 3, .false.) == [9, 0]), &
& 'Find: test_string_1, [test_pattern_1, test_pattern_2], 3, .false.')
call check(find(test_string_1, test_pattern_1, 7) == 0, &
& 'Find: test_string_1, test_pattern_1, 7')
call check(all(find([test_string_1, test_string_2, test_string_2], [test_pattern_1, &
& test_pattern_2, test_pattern_2], [7, 2, 2], [.true., .false., .true.]) == [0, 0, 6]), &
& 'Find: [test_string_1, test_string_2, test_string_2], [test_pattern_1, &
& test_pattern_2, test_pattern_2], [7, 2, 2], [.true., .false., .true.]')
call check(find("qwqwqwqwqwqwqw", test_pattern_1) == 1, &
& 'Find: "qwqwqwqwqwqwqw", test_pattern_1')
call check(all(find(test_string_1, ["qwq", "wqw"], 2) == [3, 4]), &
& 'Find: test_string_1, ["qwq", "wqw"], 2')
call check(find("qwqwqwqwqwqwqw", "qwq", 2, .false.) == 5, &
& 'Find: "qwqwqwqwqwqwqw", "qwq", 2, .false.')
call check(find("", "") == 0, &
& 'Find: "", ""')
call check(find("", test_pattern_1) == 0, &
& 'Find: "", test_pattern_1')
call check(find(test_string_1, "") == 0, &
& 'Find: test_string_1, ""')

end subroutine test_find

subroutine test_slice_gen
character(len=*), parameter :: test = &
& "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
Expand Down Expand Up @@ -300,5 +332,6 @@ program tester
call test_reverse_string
call test_slice_string
call test_slice_gen
call test_find

end program tester