Skip to content

Commit 28d3495

Browse files
[XMLParser] Use TaskLocal for storing the current parser
Instead of thread-local storage, use `TaskLocal` to store the current parser. This solves three issues: 1. If someone calls `XMLParser.parse()` with a new parser instance in a delegate method call, it overwrote the current parser and wrote it back after the call as `nil`, not the previous current parser. This reentrancy issue can be a problem especially when someone uses external entity resolving since the feature depends on the current parser tracking. Using `TaskLocal` solves this issue since it tracks values as a stack and restores the previous value at the end of the `withValue` call. 2. Since jobs of different tasks can be scheduled on the same thread, different tasks can refer to the same thread-local storage. This wouldn't be a problem for now since the `parse()` method doesn't have any suspention points and different tasks can't run on the same thread during the parsing. However, it's better to use `TaskLocal` to leverage the concurrency model of Swift. 3. The global variable `_currentParser` existed in the WASI platform path but it's unsafe in the Swift concurrency model. It wouldn't be a problem on WASI since it's always single-threaded, we should avoid platform-specific assumption as much as possible.
1 parent 7ccef0d commit 28d3495

File tree

2 files changed

+71
-54
lines changed

2 files changed

+71
-54
lines changed

Sources/FoundationXML/XMLParser.swift

+32-53
Original file line numberDiff line numberDiff line change
@@ -398,9 +398,7 @@ extension XMLParser : @unchecked Sendable { }
398398

399399
open class XMLParser : NSObject {
400400
private var _handler: _CFXMLInterfaceSAXHandler
401-
#if !os(WASI)
402401
internal var _stream: InputStream?
403-
#endif
404402
internal var _data: Data?
405403

406404
internal var _chunkSize = Int(4096 * 32) // a suitably large number for a decent chunk size
@@ -469,33 +467,35 @@ open class XMLParser : NSObject {
469467
open var externalEntityResolvingPolicy: ExternalEntityResolvingPolicy = .never
470468

471469
open var allowedExternalEntityURLs: Set<URL>?
472-
473-
#if os(WASI)
474-
private static var _currentParser: XMLParser?
475-
#endif
476470

477-
internal static func currentParser() -> XMLParser? {
478-
#if os(WASI)
479-
return _currentParser
480-
#else
481-
if let current = Thread.current.threadDictionary["__CurrentNSXMLParser"] {
482-
return current as? XMLParser
483-
} else {
484-
return nil
471+
/// The current parser is stored in a task local variable to allow for
472+
/// concurrent parsing in different tasks with different parsers.
473+
///
474+
/// Rationale for `@unchecked Sendable`:
475+
/// While the ``XMLParser`` class itself is not `Sendable`, `TaskLocal`
476+
/// requires the value type to be `Sendable`. The sendability requirement
477+
/// of `TaskLocal` is only for the "default" value and values set with
478+
/// `withValue` will not be shared between tasks.
479+
/// So as long as 1. the default value is safe to be shared between tasks
480+
/// and 2. the `Sendable` conformance of `_CurrentParser` is not used
481+
/// outside of `TaskLocal`, it is safe to mark it as `@unchecked Sendable`.
482+
private struct _CurrentParser: @unchecked Sendable {
483+
let parser: XMLParser?
484+
485+
static var `default`: _CurrentParser {
486+
return _CurrentParser(parser: nil)
485487
}
486-
#endif
488+
}
489+
490+
@TaskLocal
491+
private static var _currentParser: _CurrentParser = .default
492+
493+
internal static func currentParser() -> XMLParser? {
494+
return _currentParser.parser
487495
}
488496

489-
internal static func setCurrentParser(_ parser: XMLParser?) {
490-
#if os(WASI)
491-
_currentParser = parser
492-
#else
493-
if let p = parser {
494-
Thread.current.threadDictionary["__CurrentNSXMLParser"] = p
495-
} else {
496-
Thread.current.threadDictionary.removeObject(forKey: "__CurrentNSXMLParser")
497-
}
498-
#endif
497+
internal static func withCurrentParser<R>(_ parser: XMLParser, _ body: () -> R) -> R {
498+
return self.$_currentParser.withValue(_CurrentParser(parser: parser), operation: body)
499499
}
500500

501501
internal func _handleParseResult(_ parseResult: Int32) -> Bool {
@@ -569,7 +569,6 @@ open class XMLParser : NSObject {
569569
return result
570570
}
571571

572-
#if !os(WASI)
573572
internal func parseFrom(_ stream : InputStream) -> Bool {
574573
var result = true
575574

@@ -598,37 +597,17 @@ open class XMLParser : NSObject {
598597

599598
return result
600599
}
601-
#else
602-
internal func parse(from data: Data) -> Bool {
603-
var result = true
604-
var chunkStart = 0
605-
var chunkEnd = min(_chunkSize, data.count)
606-
while result && chunkStart < chunkEnd {
607-
let chunk = data[chunkStart..<chunkEnd]
608-
result = parseData(chunk)
609-
chunkStart = chunkEnd
610-
chunkEnd = min(chunkEnd + _chunkSize, data.count)
611-
}
612-
return result
613-
}
614-
#endif
615600

616601
// called to start the event-driven parse. Returns YES in the event of a successful parse, and NO in case of error.
617602
open func parse() -> Bool {
618-
#if os(WASI)
619-
return _data.map { parse(from: $0) } ?? false
620-
#else
621-
XMLParser.setCurrentParser(self)
622-
defer { XMLParser.setCurrentParser(nil) }
623-
624-
if _stream != nil {
625-
return parseFrom(_stream!)
626-
} else if _data != nil {
627-
return parseData(_data!, lastChunkOfData: true)
603+
return Self.withCurrentParser(self) {
604+
if _stream != nil {
605+
return parseFrom(_stream!)
606+
} else if _data != nil {
607+
return parseData(_data!, lastChunkOfData: true)
608+
}
609+
return false
628610
}
629-
630-
return false
631-
#endif
632611
}
633612

634613
// called by the delegate to stop the parse. The delegate will get an error message sent to it.

Tests/Foundation/TestXMLParser.swift

+39-1
Original file line numberDiff line numberDiff line change
@@ -198,5 +198,43 @@ class TestXMLParser : XCTestCase {
198198
ElementNameChecker("noPrefix").check()
199199
ElementNameChecker("myPrefix:myLocalName").check()
200200
}
201-
201+
202+
func testExternalEntity() throws {
203+
class Delegate: XMLParserDelegateEventStream {
204+
override func parserDidStartDocument(_ parser: XMLParser) {
205+
// Start a child parser, updating `currentParser` to the child parser
206+
// to ensure that `currentParser` won't be reset to `nil`, which would
207+
// ignore any external entity related configuration.
208+
let childParser = XMLParser(data: "<child />".data(using: .utf8)!)
209+
XCTAssertTrue(childParser.parse())
210+
super.parserDidStartDocument(childParser)
211+
}
212+
}
213+
try withTemporaryDirectory { dir, _ in
214+
let greetingPath = dir.appendingPathComponent("greeting.xml")
215+
try Data("<hello />".utf8).write(to: greetingPath)
216+
let xml = """
217+
<?xml version="1.0" standalone="no"?>
218+
<!DOCTYPE doc [
219+
<!ENTITY greeting SYSTEM "\(greetingPath.absoluteString)">
220+
]>
221+
<doc>&greeting;</doc>
222+
"""
223+
224+
let parser = XMLParser(data: xml.data(using: .utf8)!)
225+
// Explicitly disable external entity resolving
226+
parser.externalEntityResolvingPolicy = .never
227+
let delegate = Delegate()
228+
parser.delegate = delegate
229+
XCTAssertTrue(parser.parse())
230+
XCTAssertNil(parser.parserError)
231+
XCTAssertEqual(delegate.events, [
232+
.startDocument,
233+
.didStartElement("doc", nil, nil, [:]),
234+
// Should not have parsed the external entity
235+
.didEndElement("doc", nil, nil),
236+
.endDocument,
237+
])
238+
}
239+
}
202240
}

0 commit comments

Comments
 (0)