From fc97695c0d96afac66683a9f194733c21a462823 Mon Sep 17 00:00:00 2001 From: breakfast Date: Sun, 28 Mar 2021 15:18:41 -0700 Subject: [PATCH 1/4] Introduce `adjacentPairs` --- Guides/AdjacentPairs.md | 46 +++++ Sources/Algorithms/AdjacentPairs.swift | 179 ++++++++++++++++++ .../AdjacentPairsTests.swift | 98 ++++++++++ 3 files changed, 323 insertions(+) create mode 100644 Guides/AdjacentPairs.md create mode 100644 Sources/Algorithms/AdjacentPairs.swift create mode 100644 Tests/SwiftAlgorithmsTests/AdjacentPairsTests.swift diff --git a/Guides/AdjacentPairs.md b/Guides/AdjacentPairs.md new file mode 100644 index 00000000..b9c5b75d --- /dev/null +++ b/Guides/AdjacentPairs.md @@ -0,0 +1,46 @@ +# AdjacentPairs + +[[Source](https://github.com/apple/swift-algorithms/blob/main/Sources/Algorithms/AdjacentPairs.swift) | + [Tests](https://github.com/apple/swift-algorithms/blob/main/Tests/SwiftAlgorithmsTests/AdjacentPairsTests.swift)] + +Lazily iterates over tuples of adjacent elements. + +This operation is available for any sequence by calling the `adjacentPairs()` method. + +```swift +let numbers = (1...5) +let pairs = numbers.adjacentPairs() +// Array(pairs) == [(1, 2), (2, 3), (3, 4), (4, 5)] +``` + +## Detailed Design + +The `adjacentPairs()` method is declared as a `Sequence` extension returning `AdjacentPairs`. + +```swift +extension Sequence { + public func adjacentPairs() -> AdjacentPairs +} +``` + +The resulting `AdjacentPairs` type is a sequence, with conditional conformance to `Collection`, `BidirectionalCollection`, and `RandomAccessCollection` when the underlying sequence conforms. + +The spelling `zip(s, s.dropFirst())` for a sequence `s` is an equivalent operation on collection types; however, this implementation is undefined behavior on single-pass sequences, and `Zip2Sequence` does not conditionally conform to the `Collection` family of protocols. + +### Complexity + +Calling `adjacentPairs` is an O(1) operation. + +### Naming + +This method is named for clarity while remaining agnostic to any particular domain of programming. In natural language processing, this operation is akin to computing a list of bigrams; however, this algorithm is not specific to this use case. + +[naming]: https://forums.swift.org/t/naming-of-chained-with/40999/ + +### Comparison with other languages + +This function is often written as a `zip` of a sequence together with itself, minus its first element. + +**Haskell:** This operation is spelled ``s `zip` tail s``. + +**Python:** Python users may write `zip(s, s[1:])` for a list with at least one element. For natural language processing, the `nltk` package offers a `bigrams` function akin to this method. diff --git a/Sources/Algorithms/AdjacentPairs.swift b/Sources/Algorithms/AdjacentPairs.swift new file mode 100644 index 00000000..e4df7eac --- /dev/null +++ b/Sources/Algorithms/AdjacentPairs.swift @@ -0,0 +1,179 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2021 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +extension Sequence { + /// Creates a sequence of adjacent pairs of elements from this sequence. + /// + /// In the `AdjacentPairs` instance returned by this method, the elements of + /// the *i*th pair are the *i*th and *(i+1)*th elements of the underlying + /// sequence. + /// The following example uses the `adjacentPairs()` method to iterate over + /// adjacent pairs of integers: + /// + /// for pair in (1...5).adjacentPairs() { + /// print(pair) + /// } + /// // Prints "(1, 2)" + /// // Prints "(2, 3)" + /// // Prints "(3, 4)" + /// // Prints "(4, 5)" + public func adjacentPairs() -> AdjacentPairs { + AdjacentPairs(_base: self) + } +} + +/// A sequence of adjacent pairs of elements built from an underlying sequence. +/// +/// In an `AdjacentPairs` sequence, the elements of the *i*th pair are the *i*th +/// and *(i+1)*th elements of the underlying sequence. The following example +/// uses the `adjacentPairs()` method to iterate over adjacent pairs of +/// integers: +/// ``` +/// for pair in (1...5).adjacentPairs() { +/// print(pair) +/// } +/// // Prints "(1, 2)" +/// // Prints "(2, 3)" +/// // Prints "(3, 4)" +/// // Prints "(4, 5)" +/// ``` +public struct AdjacentPairs { + internal let _base: Base + + /// Creates an instance that makes pairs of adjacent elements from `base`. + internal init(_base: Base) { + self._base = _base + } +} + +// MARK: - Sequence + +extension AdjacentPairs { + public struct Iterator { + internal var _base: Base.Iterator + internal var _previousElement: Base.Element? + + internal init(_base: Base.Iterator) { + self._base = _base + self._previousElement = self._base.next() + } + } +} + +extension AdjacentPairs.Iterator: IteratorProtocol { + public typealias Element = (Base.Element, Base.Element) + + public mutating func next() -> Element? { + guard let previous = _previousElement, let next = _base.next() else { + return nil + } + _previousElement = next + return (previous, next) + } +} + +extension AdjacentPairs: Sequence { + public func makeIterator() -> Iterator { + Iterator(_base: _base.makeIterator()) + } + + public var underestimatedCount: Int { + Swift.max(0, _base.underestimatedCount - 1) + } +} + +// MARK: - Collection + +extension AdjacentPairs where Base: Collection { + public struct Index: Comparable { + internal var _base: Base.Index + + internal init(_base: Base.Index) { + self._base = _base + } + + public static func < (lhs: Index, rhs: Index) -> Bool { + lhs._base < rhs._base + } + } +} + +extension AdjacentPairs: Collection where Base: Collection { + public var startIndex: Index { Index(_base: _base.startIndex) } + + public var endIndex: Index { + switch _base.endIndex { + case _base.startIndex, _base.index(after: _base.startIndex): + return Index(_base: _base.startIndex) + case let endIndex: + return Index(_base: endIndex) + } + } + + public subscript(position: Index) -> (Base.Element, Base.Element) { + (_base[position._base], _base[_base.index(after: position._base)]) + } + + public func index(after i: Index) -> Index { + let next = _base.index(after: i._base) + return _base.index(after: next) == _base.endIndex + ? endIndex + : Index(_base: next) + } + + public func index(_ i: Index, offsetBy distance: Int) -> Index { + if distance == 0 { + return i + } else if distance > 0 { + let offsetIndex = _base.index(i._base, offsetBy: distance) + return _base.index(after: offsetIndex) == _base.endIndex + ? endIndex + : Index(_base: offsetIndex) + } else { + return i == endIndex + ? Index(_base: _base.index(i._base, offsetBy: distance - 1)) + : Index(_base: _base.index(i._base, offsetBy: distance)) + } + } + + public func distance(from start: Index, to end: Index) -> Int { + let offset: Int + switch (start._base, end._base) { + case (_base.endIndex, _base.endIndex): + return 0 + case (_base.endIndex, _): + offset = +1 + case (_, _base.endIndex): + offset = -1 + default: + offset = 0 + } + + return _base.distance(from: start._base, to: end._base) + offset + } + + public var count: Int { + Swift.max(0, _base.count - 1) + } +} + +extension AdjacentPairs: BidirectionalCollection + where Base: BidirectionalCollection +{ + public func index(before i: Index) -> Index { + i == endIndex + ? Index(_base: _base.index(i._base, offsetBy: -2)) + : Index(_base: _base.index(before: i._base)) + } +} + +extension AdjacentPairs: RandomAccessCollection + where Base: RandomAccessCollection {} diff --git a/Tests/SwiftAlgorithmsTests/AdjacentPairsTests.swift b/Tests/SwiftAlgorithmsTests/AdjacentPairsTests.swift new file mode 100644 index 00000000..3226027d --- /dev/null +++ b/Tests/SwiftAlgorithmsTests/AdjacentPairsTests.swift @@ -0,0 +1,98 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2021 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +import XCTest +import Algorithms + +final class AdjacentPairsTests: XCTestCase { + func testZeroElements() { + let pairs = (0..<0).adjacentPairs() + XCTAssertEqual(pairs.startIndex, pairs.endIndex) + XCTAssert(Array(pairs) == []) + } + + func testOneElement() { + let pairs = (0..<1).adjacentPairs() + XCTAssertEqual(pairs.startIndex, pairs.endIndex) + XCTAssert(Array(pairs) == []) + } + + func testTwoElements() { + let pairs = (0..<2).adjacentPairs() + XCTAssert(Array(pairs) == [(0, 1)]) + } + + func testThreeElements() { + let pairs = (0..<3).adjacentPairs() + XCTAssert(Array(pairs) == [(0, 1), (1, 2)]) + } + + func testFourElements() { + let pairs = (0..<4).adjacentPairs() + XCTAssert(Array(pairs) == [(0, 1), (1, 2), (2, 3)]) + } + + func testForwardIndexing() { + let pairs = (1...5).adjacentPairs() + let expected = [(1, 2), (2, 3), (3, 4), (4, 5)] + var index = pairs.startIndex + for iteration in expected.indices { + XCTAssert(pairs[index] == expected[iteration]) + pairs.formIndex(after: &index) + } + XCTAssertEqual(index, pairs.endIndex) + } + + func testBackwardIndexing() { + let pairs = (1...5).adjacentPairs() + let expected = [(4, 5), (3, 4), (2, 3), (1, 2)] + var index = pairs.endIndex + for iteration in expected.indices { + pairs.formIndex(before: &index) + XCTAssert(pairs[index] == expected[iteration]) + } + XCTAssertEqual(index, pairs.startIndex) + } + + func testIndexDistance() { + let pairSequences = (0...4).map { (0..<$0).adjacentPairs() } + + for pairs in pairSequences { + for index in pairs.indices.dropLast() { + let next = pairs.index(after: index) + XCTAssertEqual(pairs.distance(from: index, to: next), 1) + } + + XCTAssertEqual(pairs.distance(from: pairs.startIndex, to: pairs.endIndex), pairs.count) + XCTAssertEqual(pairs.distance(from: pairs.endIndex, to: pairs.startIndex), -pairs.count) + } + } + + func testIndexOffsetBy() { + let pairSequences = (0...4).map { (0..<$0).adjacentPairs() } + + for pairs in pairSequences { + for index in pairs.indices.dropLast() { + let next = pairs.index(after: index) + XCTAssertEqual(pairs.index(index, offsetBy: 1), next) + } + + XCTAssertEqual(pairs.index(pairs.startIndex, offsetBy: pairs.count), pairs.endIndex) + XCTAssertEqual(pairs.index(pairs.endIndex, offsetBy: -pairs.count), pairs.startIndex) + } + } +} + +extension Collection { + fileprivate static func == (lhs: Self, rhs: Self) -> Bool where Element == (L, R) { + lhs.count == rhs.count && zip(lhs, rhs).allSatisfy(==) + } +} From 03e37dec77081d092b774407223688c803d4b632 Mon Sep 17 00:00:00 2001 From: breakfast Date: Sun, 28 Mar 2021 15:22:56 -0700 Subject: [PATCH 2/4] Update README and CHANGELOG --- CHANGELOG.md | 2 +- README.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70b59ea4..b8acad4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ package updates, you can specify your package dependency using ## [Unreleased] -*No changes yet.* +-`adjacentPairs()` lazily iterates over tuples of adjacent elements of a sequence. --- diff --git a/README.md b/README.md index 638ba8cd..a5d17f52 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ Read more about the package, and the intent behind it, in the [announcement on s #### Other useful operations +- [`adjacentPairs()`](https://github.com/apple/swift-algorithms/blob/main/Guides/AdjacentPairs.md): Lazily iterates over tuples of adjacent elements. - [`chunked(by:)`, `chunked(on:)`, `chunks(ofCount:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Chunked.md): Eager and lazy operations that break a collection into chunks based on either a binary predicate or when the result of a projection changes or chunks of a given count. - [`indexed()`](https://github.com/apple/swift-algorithms/blob/main/Guides/Indexed.md): Iterate over tuples of a collection's indices and elements. - [`interspersed(with:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Intersperse.md): Place a value between every two elements of a sequence. From b0f302032272847c1cca706958155e161e975093 Mon Sep 17 00:00:00 2001 From: breakfast Date: Sat, 10 Apr 2021 20:45:56 -0700 Subject: [PATCH 3/4] Split sequence & collection types --- Sources/Algorithms/AdjacentPairs.swift | 202 ++++++++++++++++++------- 1 file changed, 147 insertions(+), 55 deletions(-) diff --git a/Sources/Algorithms/AdjacentPairs.swift b/Sources/Algorithms/AdjacentPairs.swift index e4df7eac..ff4901c6 100644 --- a/Sources/Algorithms/AdjacentPairs.swift +++ b/Sources/Algorithms/AdjacentPairs.swift @@ -12,7 +12,7 @@ extension Sequence { /// Creates a sequence of adjacent pairs of elements from this sequence. /// - /// In the `AdjacentPairs` instance returned by this method, the elements of + /// In the `AdjacentPairsSequence` returned by this method, the elements of /// the *i*th pair are the *i*th and *(i+1)*th elements of the underlying /// sequence. /// The following example uses the `adjacentPairs()` method to iterate over @@ -25,14 +25,37 @@ extension Sequence { /// // Prints "(2, 3)" /// // Prints "(3, 4)" /// // Prints "(4, 5)" - public func adjacentPairs() -> AdjacentPairs { - AdjacentPairs(_base: self) + @inlinable + public func adjacentPairs() -> AdjacentPairsSequence { + AdjacentPairsSequence(base: self) + } +} + +extension Collection { + /// A collection of adjacent pairs of elements built from an underlying collection. + /// + /// In an `AdjacentPairsCollection`, the elements of the *i*th pair are the *i*th + /// and *(i+1)*th elements of the underlying sequence. The following example + /// uses the `adjacentPairs()` method to iterate over adjacent pairs of + /// integers: + /// ``` + /// for pair in (1...5).adjacentPairs() { + /// print(pair) + /// } + /// // Prints "(1, 2)" + /// // Prints "(2, 3)" + /// // Prints "(3, 4)" + /// // Prints "(4, 5)" + /// ``` + @inlinable + public func adjacentPairs() -> AdjacentPairsCollection { + AdjacentPairsCollection(base: self) } } /// A sequence of adjacent pairs of elements built from an underlying sequence. /// -/// In an `AdjacentPairs` sequence, the elements of the *i*th pair are the *i*th +/// In an `AdjacentPairsSequence`, the elements of the *i*th pair are the *i*th /// and *(i+1)*th elements of the underlying sequence. The following example /// uses the `adjacentPairs()` method to iterate over adjacent pairs of /// integers: @@ -45,135 +68,204 @@ extension Sequence { /// // Prints "(3, 4)" /// // Prints "(4, 5)" /// ``` -public struct AdjacentPairs { - internal let _base: Base +public struct AdjacentPairsSequence { + @usableFromInline + internal let base: Base /// Creates an instance that makes pairs of adjacent elements from `base`. - internal init(_base: Base) { - self._base = _base + @inlinable + internal init(base: Base) { + self.base = base } } -// MARK: - Sequence - -extension AdjacentPairs { +extension AdjacentPairsSequence { public struct Iterator { - internal var _base: Base.Iterator - internal var _previousElement: Base.Element? + @usableFromInline + internal var base: Base.Iterator - internal init(_base: Base.Iterator) { - self._base = _base - self._previousElement = self._base.next() + @usableFromInline + internal var previousElement: Base.Element? + + @inlinable + internal init(base: Base.Iterator) { + self.base = base } } } -extension AdjacentPairs.Iterator: IteratorProtocol { +extension AdjacentPairsSequence.Iterator: IteratorProtocol { public typealias Element = (Base.Element, Base.Element) + @inlinable public mutating func next() -> Element? { - guard let previous = _previousElement, let next = _base.next() else { + if previousElement == nil { + previousElement = base.next() + } + + guard let previous = previousElement, let next = base.next() else { return nil } - _previousElement = next + + previousElement = next return (previous, next) } } -extension AdjacentPairs: Sequence { +extension AdjacentPairsSequence: Sequence { + @inlinable public func makeIterator() -> Iterator { - Iterator(_base: _base.makeIterator()) + Iterator(base: base.makeIterator()) } + @inlinable public var underestimatedCount: Int { - Swift.max(0, _base.underestimatedCount - 1) + Swift.max(0, base.underestimatedCount - 1) } } -// MARK: - Collection +/// A collection of adjacent pairs of elements built from an underlying collection. +/// +/// In an `AdjacentPairsCollection`, the elements of the *i*th pair are the *i*th +/// and *(i+1)*th elements of the underlying sequence. The following example +/// uses the `adjacentPairs()` method to iterate over adjacent pairs of +/// integers: +/// ``` +/// for pair in (1...5).adjacentPairs() { +/// print(pair) +/// } +/// // Prints "(1, 2)" +/// // Prints "(2, 3)" +/// // Prints "(3, 4)" +/// // Prints "(4, 5)" +/// ``` +public struct AdjacentPairsCollection { + @usableFromInline + internal let base: Base + + public let startIndex: Index + + @inlinable + internal init(base: Base) { + self.base = base + + // Precompute `startIndex` to ensure O(1) behavior, + // avoiding indexing past `endIndex` + let start = base.startIndex + let end = base.endIndex + let second = start == end ? start : base.index(after: start) + self.startIndex = Index(first: start, second: second) + } +} + +extension AdjacentPairsCollection { + public typealias Iterator = AdjacentPairsSequence.Iterator + + @inlinable + public func makeIterator() -> Iterator { + Iterator(base: base.makeIterator()) + } +} -extension AdjacentPairs where Base: Collection { +extension AdjacentPairsCollection { public struct Index: Comparable { - internal var _base: Base.Index + @usableFromInline + internal var first: Base.Index + + @usableFromInline + internal var second: Base.Index - internal init(_base: Base.Index) { - self._base = _base + @inlinable + internal init(first: Base.Index, second: Base.Index) { + self.first = first + self.second = second } + @inlinable public static func < (lhs: Index, rhs: Index) -> Bool { - lhs._base < rhs._base + (lhs.first, lhs.second) < (rhs.first, rhs.second) } } } -extension AdjacentPairs: Collection where Base: Collection { - public var startIndex: Index { Index(_base: _base.startIndex) } - +extension AdjacentPairsCollection: Collection { + @inlinable public var endIndex: Index { - switch _base.endIndex { - case _base.startIndex, _base.index(after: _base.startIndex): - return Index(_base: _base.startIndex) - case let endIndex: - return Index(_base: endIndex) + switch base.endIndex { + case startIndex.first, startIndex.second: + return startIndex + case let end: + return Index(first: end, second: end) } } + @inlinable public subscript(position: Index) -> (Base.Element, Base.Element) { - (_base[position._base], _base[_base.index(after: position._base)]) + (base[position.first], base[position.second]) } + @inlinable public func index(after i: Index) -> Index { - let next = _base.index(after: i._base) - return _base.index(after: next) == _base.endIndex + let next = base.index(after: i.second) + return next == base.endIndex ? endIndex - : Index(_base: next) + : Index(first: i.second, second: next) } + @inlinable public func index(_ i: Index, offsetBy distance: Int) -> Index { if distance == 0 { return i } else if distance > 0 { - let offsetIndex = _base.index(i._base, offsetBy: distance) - return _base.index(after: offsetIndex) == _base.endIndex + let firstOffsetIndex = base.index(i.first, offsetBy: distance) + let secondOffsetIndex = base.index(after: firstOffsetIndex) + return secondOffsetIndex == base.endIndex ? endIndex - : Index(_base: offsetIndex) + : Index(first: firstOffsetIndex, second: secondOffsetIndex) } else { return i == endIndex - ? Index(_base: _base.index(i._base, offsetBy: distance - 1)) - : Index(_base: _base.index(i._base, offsetBy: distance)) + ? Index(first: base.index(i.first, offsetBy: distance - 1), + second: base.index(i.first, offsetBy: distance)) + : Index(first: base.index(i.first, offsetBy: distance), + second: i.first) } } + @inlinable public func distance(from start: Index, to end: Index) -> Int { let offset: Int - switch (start._base, end._base) { - case (_base.endIndex, _base.endIndex): + switch (start.first, end.first) { + case (base.endIndex, base.endIndex): return 0 - case (_base.endIndex, _): + case (base.endIndex, _): offset = +1 - case (_, _base.endIndex): + case (_, base.endIndex): offset = -1 default: offset = 0 } - return _base.distance(from: start._base, to: end._base) + offset + return base.distance(from: start.first, to: end.first) + offset } + @inlinable public var count: Int { - Swift.max(0, _base.count - 1) + Swift.max(0, base.count - 1) } } -extension AdjacentPairs: BidirectionalCollection +extension AdjacentPairsCollection: BidirectionalCollection where Base: BidirectionalCollection { + @inlinable public func index(before i: Index) -> Index { i == endIndex - ? Index(_base: _base.index(i._base, offsetBy: -2)) - : Index(_base: _base.index(before: i._base)) + ? Index(first: base.index(i.first, offsetBy: -2), + second: base.index(before: i.first)) + : Index(first: base.index(before: i.first), + second: i.first) } } -extension AdjacentPairs: RandomAccessCollection +extension AdjacentPairsCollection: RandomAccessCollection where Base: RandomAccessCollection {} From e092ca010da6b8b2f1489ba85168b8c7084b3463 Mon Sep 17 00:00:00 2001 From: breakfast Date: Sat, 10 Apr 2021 20:56:09 -0700 Subject: [PATCH 4/4] Update documentation --- Guides/AdjacentPairs.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/Guides/AdjacentPairs.md b/Guides/AdjacentPairs.md index b9c5b75d..1b790eb6 100644 --- a/Guides/AdjacentPairs.md +++ b/Guides/AdjacentPairs.md @@ -15,17 +15,21 @@ let pairs = numbers.adjacentPairs() ## Detailed Design -The `adjacentPairs()` method is declared as a `Sequence` extension returning `AdjacentPairs`. +The `adjacentPairs()` method is declared as a `Sequence` extension returning `AdjacentPairsSequence` and as a `Collection` extension returning `AdjacentPairsCollection`. ```swift extension Sequence { - public func adjacentPairs() -> AdjacentPairs + public func adjacentPairs() -> AdjacentPairsSequence } ``` -The resulting `AdjacentPairs` type is a sequence, with conditional conformance to `Collection`, `BidirectionalCollection`, and `RandomAccessCollection` when the underlying sequence conforms. +```swift +extension Collection { + public func adjacentPairs() -> AdjacentPairsCollection +} +``` -The spelling `zip(s, s.dropFirst())` for a sequence `s` is an equivalent operation on collection types; however, this implementation is undefined behavior on single-pass sequences, and `Zip2Sequence` does not conditionally conform to the `Collection` family of protocols. +The `AdjacentPairsSequence` type is a sequence, and the `AdjacentPairsCollection` type is a collection with conditional conformance to `BidirectionalCollection` and `RandomAccessCollection` when the underlying collection conforms. ### Complexity @@ -44,3 +48,5 @@ This function is often written as a `zip` of a sequence together with itself, mi **Haskell:** This operation is spelled ``s `zip` tail s``. **Python:** Python users may write `zip(s, s[1:])` for a list with at least one element. For natural language processing, the `nltk` package offers a `bigrams` function akin to this method. + + Note that in Swift, the spelling `zip(s, s.dropFirst())` is undefined behavior for a single-pass sequence `s`.