|
21 | 21 | #include "unicode/uscript.h"
|
22 | 22 | #include "unicode/ucharstrie.h"
|
23 | 23 | #include "unicode/bytestrie.h"
|
| 24 | +#include "unicode/rbbi.h" |
24 | 25 |
|
25 | 26 | #include "brkeng.h"
|
26 | 27 | #include "cmemory.h"
|
@@ -70,19 +71,21 @@ UnhandledEngine::~UnhandledEngine() {
|
70 | 71 | }
|
71 | 72 |
|
72 | 73 | UBool
|
73 |
| -UnhandledEngine::handles(UChar32 c) const { |
| 74 | +UnhandledEngine::handles(UChar32 c, const char* locale) const { |
| 75 | + (void)locale; // Unused |
74 | 76 | return fHandled && fHandled->contains(c);
|
75 | 77 | }
|
76 | 78 |
|
77 | 79 | int32_t
|
78 | 80 | UnhandledEngine::findBreaks( UText *text,
|
79 |
| - int32_t /* startPos */, |
| 81 | + int32_t startPos, |
80 | 82 | int32_t endPos,
|
81 | 83 | UVector32 &/*foundBreaks*/,
|
82 | 84 | UBool /* isPhraseBreaking */,
|
83 | 85 | UErrorCode &status) const {
|
84 | 86 | if (U_FAILURE(status)) return 0;
|
85 |
| - UChar32 c = utext_current32(text); |
| 87 | + utext_setNativeIndex(text, startPos); |
| 88 | + UChar32 c = utext_current32(text); |
86 | 89 | while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
|
87 | 90 | utext_next32(text); // TODO: recast loop to work with post-increment operations.
|
88 | 91 | c = utext_current32(text);
|
@@ -120,49 +123,47 @@ ICULanguageBreakFactory::~ICULanguageBreakFactory() {
|
120 | 123 | }
|
121 | 124 | }
|
122 | 125 |
|
123 |
| -U_NAMESPACE_END |
124 |
| -U_CDECL_BEGIN |
125 |
| -static void U_CALLCONV _deleteEngine(void *obj) { |
126 |
| - delete (const icu::LanguageBreakEngine *) obj; |
| 126 | +void ICULanguageBreakFactory::ensureEngines(UErrorCode& status) { |
| 127 | + static UMutex gBreakEngineMutex; |
| 128 | + Mutex m(&gBreakEngineMutex); |
| 129 | + if (fEngines == nullptr) { |
| 130 | + LocalPointer<UStack> engines(new UStack(uprv_deleteUObject, nullptr, status), status); |
| 131 | + if (U_SUCCESS(status)) { |
| 132 | + fEngines = engines.orphan(); |
| 133 | + } |
| 134 | + } |
127 | 135 | }
|
128 |
| -U_CDECL_END |
129 |
| -U_NAMESPACE_BEGIN |
130 | 136 |
|
131 | 137 | const LanguageBreakEngine *
|
132 |
| -ICULanguageBreakFactory::getEngineFor(UChar32 c) { |
| 138 | +ICULanguageBreakFactory::getEngineFor(UChar32 c, const char* locale) { |
133 | 139 | const LanguageBreakEngine *lbe = nullptr;
|
134 | 140 | UErrorCode status = U_ZERO_ERROR;
|
| 141 | + ensureEngines(status); |
| 142 | + if (U_FAILURE(status) ) { |
| 143 | + // Note: no way to return error code to caller. |
| 144 | + return nullptr; |
| 145 | + } |
135 | 146 |
|
136 | 147 | static UMutex gBreakEngineMutex;
|
137 | 148 | Mutex m(&gBreakEngineMutex);
|
138 |
| - |
139 |
| - if (fEngines == nullptr) { |
140 |
| - LocalPointer<UStack> engines(new UStack(_deleteEngine, nullptr, status), status); |
141 |
| - if (U_FAILURE(status) ) { |
142 |
| - // Note: no way to return error code to caller. |
143 |
| - return nullptr; |
144 |
| - } |
145 |
| - fEngines = engines.orphan(); |
146 |
| - } else { |
147 |
| - int32_t i = fEngines->size(); |
148 |
| - while (--i >= 0) { |
149 |
| - lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); |
150 |
| - if (lbe != nullptr && lbe->handles(c)) { |
151 |
| - return lbe; |
152 |
| - } |
| 149 | + int32_t i = fEngines->size(); |
| 150 | + while (--i >= 0) { |
| 151 | + lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); |
| 152 | + if (lbe != nullptr && lbe->handles(c, locale)) { |
| 153 | + return lbe; |
153 | 154 | }
|
154 | 155 | }
|
155 |
| - |
| 156 | + |
156 | 157 | // We didn't find an engine. Create one.
|
157 |
| - lbe = loadEngineFor(c); |
| 158 | + lbe = loadEngineFor(c, locale); |
158 | 159 | if (lbe != nullptr) {
|
159 | 160 | fEngines->push((void *)lbe, status);
|
160 | 161 | }
|
161 | 162 | return U_SUCCESS(status) ? lbe : nullptr;
|
162 | 163 | }
|
163 | 164 |
|
164 | 165 | const LanguageBreakEngine *
|
165 |
| -ICULanguageBreakFactory::loadEngineFor(UChar32 c) { |
| 166 | +ICULanguageBreakFactory::loadEngineFor(UChar32 c, const char*) { |
166 | 167 | UErrorCode status = U_ZERO_ERROR;
|
167 | 168 | UScriptCode code = uscript_getScript(c, &status);
|
168 | 169 | if (U_SUCCESS(status)) {
|
@@ -299,6 +300,70 @@ ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) {
|
299 | 300 | return nullptr;
|
300 | 301 | }
|
301 | 302 |
|
| 303 | + |
| 304 | +void ICULanguageBreakFactory::addExternalEngine( |
| 305 | + ExternalBreakEngine* external, UErrorCode& status) { |
| 306 | + LocalPointer<ExternalBreakEngine> engine(external, status); |
| 307 | + ensureEngines(status); |
| 308 | + LocalPointer<BreakEngineWrapper> wrapper( |
| 309 | + new BreakEngineWrapper(engine.orphan(), status), status); |
| 310 | + static UMutex gBreakEngineMutex; |
| 311 | + Mutex m(&gBreakEngineMutex); |
| 312 | + fEngines->push(wrapper.getAlias(), status); |
| 313 | + wrapper.orphan(); |
| 314 | +} |
| 315 | + |
| 316 | +BreakEngineWrapper::BreakEngineWrapper( |
| 317 | + ExternalBreakEngine* engine, UErrorCode &status) : delegate(engine, status) { |
| 318 | +} |
| 319 | + |
| 320 | +BreakEngineWrapper::~BreakEngineWrapper() { |
| 321 | +} |
| 322 | + |
| 323 | +UBool BreakEngineWrapper::handles(UChar32 c, const char* locale) const { |
| 324 | + return delegate->isFor(c, locale); |
| 325 | +} |
| 326 | + |
| 327 | +int32_t BreakEngineWrapper::findBreaks( |
| 328 | + UText *text, |
| 329 | + int32_t startPos, |
| 330 | + int32_t endPos, |
| 331 | + UVector32 &foundBreaks, |
| 332 | + UBool /* isPhraseBreaking */, |
| 333 | + UErrorCode &status) const { |
| 334 | + if (U_FAILURE(status)) return 0; |
| 335 | + int32_t result = 0; |
| 336 | + |
| 337 | + // Find the span of characters included in the set. |
| 338 | + // The span to break begins at the current position in the text, and |
| 339 | + // extends towards the start or end of the text, depending on 'reverse'. |
| 340 | + |
| 341 | + utext_setNativeIndex(text, startPos); |
| 342 | + int32_t start = (int32_t)utext_getNativeIndex(text); |
| 343 | + int32_t current; |
| 344 | + int32_t rangeStart; |
| 345 | + int32_t rangeEnd; |
| 346 | + UChar32 c = utext_current32(text); |
| 347 | + while((current = (int32_t)utext_getNativeIndex(text)) < endPos && delegate->handles(c)) { |
| 348 | + utext_next32(text); // TODO: recast loop for postincrement |
| 349 | + c = utext_current32(text); |
| 350 | + } |
| 351 | + rangeStart = start; |
| 352 | + rangeEnd = current; |
| 353 | + int32_t beforeSize = foundBreaks.size(); |
| 354 | + int32_t additionalCapacity = rangeEnd - rangeStart + 1; |
| 355 | + // enlarge to contains (rangeEnd-rangeStart+1) more items |
| 356 | + foundBreaks.ensureCapacity(beforeSize+additionalCapacity, status); |
| 357 | + if (U_FAILURE(status)) return 0; |
| 358 | + foundBreaks.setSize(beforeSize + beforeSize+additionalCapacity); |
| 359 | + result = delegate->fillBreaks(text, rangeStart, rangeEnd, foundBreaks.getBuffer()+beforeSize, |
| 360 | + additionalCapacity, status); |
| 361 | + if (U_FAILURE(status)) return 0; |
| 362 | + foundBreaks.setSize(beforeSize + result); |
| 363 | + utext_setNativeIndex(text, current); |
| 364 | + return result; |
| 365 | +} |
| 366 | + |
302 | 367 | U_NAMESPACE_END
|
303 | 368 |
|
304 | 369 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
|
0 commit comments