1
1
#include " manager.h"
2
+ // TODO: move to simple_reader/
2
3
3
4
#include < ydb/core/tx/columnshard/engines/reader/duplicates/merge.h>
5
+ #include < ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/context.h>
6
+ #include < ydb/core/tx/columnshard/engines/reader/simple_reader/iterator/scanner.h>
4
7
#include < ydb/core/tx/conveyor/usage/service.h>
5
8
6
9
#include < bit>
@@ -35,14 +38,14 @@ void TIntervalCounter::PropagateDelta(const TPosition& node) {
35
38
36
39
void TIntervalCounter::Update (const TPosition& node, const TModification& modification, TZerosCollector* callback) {
37
40
if (modification.GetLeft () <= node.GetLeft () && modification.GetRight () >= node.GetRight ()) {
38
- if (callback) {
39
- callback->OnUpdate (node.GetLeft (), node.GetRight (), GetCount (node), modification.GetDelta ());
40
- }
41
41
if (node.GetLeft () == node.GetRight ()) {
42
42
Count[node.GetIndex ()] += modification.GetDelta ();
43
43
} else {
44
44
PropagatedDeltas[node.GetIndex ()] += modification.GetDelta ();
45
45
}
46
+ if (callback) {
47
+ callback->OnUpdate (node, GetCount (node), modification.GetDelta ());
48
+ }
46
49
} else {
47
50
PropagateDelta (node.GetIndex ());
48
51
if (modification.GetLeft () <= node.LeftChild ().GetRight ()) {
@@ -55,7 +58,7 @@ void TIntervalCounter::Update(const TPosition& node, const TModification& modifi
55
58
}
56
59
57
60
void TIntervalCounter::Inc (const ui32 l, const ui32 r) {
58
- Update (TPosition (MaxIndex ), TModification (l, r, 1 ), nullptr );
61
+ Update (GetRoot ( ), TModification (l, r, 1 ), nullptr );
59
62
}
60
63
61
64
TIntervalCounter::TIntervalCounter (const std::vector<std::pair<ui32, ui32>>& intervals) {
@@ -67,21 +70,21 @@ TIntervalCounter::TIntervalCounter(const std::vector<std::pair<ui32, ui32>>& int
67
70
}
68
71
}
69
72
MaxIndex = std::bit_ceil (maxValue);
70
- Count.resize (MaxIndex * 2 - 1 );
71
- PropagatedDeltas.resize (MaxIndex * 2 - 1 );
73
+ Count.resize (MaxIndex * 2 + 1 );
74
+ PropagatedDeltas.resize (MaxIndex * 2 + 1 );
72
75
73
76
for (const auto & [l, r] : intervals) {
74
77
Inc (l, r);
75
78
}
76
79
}
77
80
78
81
bool TIntervalCounter::IsAllZeros () const {
79
- return GetCount (TPosition (MaxIndex )) == 0 ;
82
+ return GetCount (GetRoot ( )) == 0 ;
80
83
}
81
84
82
85
std::vector<ui32> TIntervalCounter::DecAndGetZeros (const ui32 l, const ui32 r) {
83
86
TZerosCollector callback;
84
- Update (TPosition (MaxIndex ), TModification (l, r, -1 ), &callback);
87
+ Update (GetRoot ( ), TModification (l, r, -1 ), &callback);
85
88
return callback.ExtractValues ();
86
89
}
87
90
@@ -160,20 +163,30 @@ TDuplicateFilterConstructor::TSourceIntervals::TSourceIntervals(const std::vecto
160
163
}
161
164
162
165
void TDuplicateFilterConstructor::Handle (const TEvRequestFilter::TPtr& ev) {
166
+ AFL_DEBUG (NKikimrServices::TX_COLUMNSHARD_SCAN)(" event" , " request_duplicates_filter" )(" source_id" , ev->Get ()->GetSource ()->GetSourceId ());
167
+ // TODO: handle step restarts (completely avoid them or fix verify here)
163
168
// TODO: add counter: max volume of redundant memory for merges
164
169
const auto & source = ev->Get ()->GetSource ();
165
170
TIntervalsRange range = Intervals.GetRangeVerified (source->GetSourceId ());
166
- AFL_VERIFY (AvailableSources.emplace (source->GetSourceId (), TSourceFilterConstructor (source, ev->Get ()->GetSubscriber (), Intervals)).second );
171
+ auto [constructionInfo, emplaced] =
172
+ AvailableSources.emplace (source->GetSourceId (), TSourceFilterConstructor (source, ev->Get ()->GetSubscriber (), Intervals));
173
+ AFL_VERIFY (emplaced);
167
174
AvailableSourcesCount.AddRange (range.GetFirstIdx (), range.GetLastIdx (), source->GetSourceId ());
168
175
std::vector<ui32> readyIntervals = AwaitedSourcesCount.DecAndGetZeros (range.GetFirstIdx (), range.GetLastIdx ());
169
176
177
+ if (readyIntervals.size () != range.NumIntervals ()) {
178
+ AFL_VERIFY (readyIntervals.size () < range.NumIntervals ());
179
+ constructionInfo->second .SetBlockGuard (NSimple::TSourceFetchingScheduler::SetBlocked (
180
+ source->GetSourceIdx (), source->GetContextAsVerified <NSimple::TSpecialReadContext>()->GetScheduler ()));
181
+ }
182
+
170
183
for (const ui32 intervalIdx : readyIntervals) {
171
184
auto sourceIds = AvailableSourcesCount.FindIntersections (intervalIdx);
172
185
AFL_VERIFY (sourceIds.size ());
173
186
const std::shared_ptr<NCommon::TSpecialReadContext> readContext =
174
187
TValidator::CheckNotNull (AvailableSources.FindPtr (sourceIds.front ()))->GetSource ()->GetContext ();
175
- const std::shared_ptr<TBuildDuplicateFilters> task =
176
- std::make_shared<TBuildDuplicateFilters>( readContext->GetReadMetadata ()->GetReplaceKey (), IIndexInfo::GetSnapshotColumnNames ());
188
+ const std::shared_ptr<TBuildDuplicateFilters> task = std::make_shared<TBuildDuplicateFilters>(
189
+ readContext->GetReadMetadata ()->GetReplaceKey (), IIndexInfo::GetSnapshotColumnNames (), intervalIdx, SelfId ());
177
190
for (const ui64 sourceId : sourceIds) {
178
191
const TSourceFilterConstructor* constructionInfo = AvailableSources.FindPtr (sourceId);
179
192
AFL_VERIFY (constructionInfo)(" source" , sourceId);
@@ -184,27 +197,31 @@ void TDuplicateFilterConstructor::Handle(const TEvRequestFilter::TPtr& ev) {
184
197
std::make_shared<NArrow::TGeneralContainer>(source->GetStageData ()
185
198
.ToGeneralContainer (source->GetContext ()->GetCommonContext ()->GetResolver ())
186
199
->Slice (intervalRange.GetBegin (), intervalRange.Size ()));
187
- task->AddSource (slice, source->GetStageData ().GetNotAppliedFilter (),
188
- std::make_shared<TInternalFilterSubscriber>(intervalIdx, source->GetSourceId (), SelfId ()));
200
+ task->AddSource (slice, source->GetStageData ().GetNotAppliedFilter (), source->GetSourceId ());
189
201
}
190
202
NConveyor::TScanServiceOperator::SendTaskToExecute (task, readContext->GetCommonContext ()->GetConveyorProcessId ());
191
203
}
192
204
}
193
205
194
206
void TDuplicateFilterConstructor::Handle (const TEvDuplicateFilterPartialResult::TPtr& ev) {
195
207
if (ev->Get ()->GetResult ().IsFail ()) {
208
+ AFL_INFO (NKikimrServices::TX_COLUMNSHARD_SCAN)(" event" , " interval_merging_error" )(" error" , ev->Get ()->GetResult ().GetErrorMessage ());
196
209
AbortConstruction (ev->Get ()->GetResult ().GetErrorMessage ());
197
210
return ;
198
211
}
199
- TSourceFilterConstructor* constructor = AvailableSources.FindPtr (ev->Get ()->GetSourceId ());
200
- AFL_VERIFY (constructor)(" portion" , ev->Get ()->GetSourceId ());
201
- // TODO: avoid copying filters
202
- constructor->SetFilter (ev->Get ()->GetIntervalIdx (), ev->Get ()->ExtractResult ().DetachResult ());
203
- if (constructor->IsReady ()) {
204
- std::move (*constructor).Finish ();
205
- AFL_VERIFY (AvailableSources.erase (ev->Get ()->GetSourceId ()));
206
- if (AvailableSources.empty () && AwaitedSourcesCount.IsAllZeros ()) {
207
- PassAway ();
212
+ for (auto && [sourceId, filter] : ev->Get ()->DetachResult ()) {
213
+ TSourceFilterConstructor* constructor = AvailableSources.FindPtr (sourceId);
214
+ AFL_VERIFY (constructor)(" portion" , sourceId);
215
+ // TODO: avoid copying filters
216
+ constructor->SetFilter (ev->Get ()->GetIntervalIdx (), std::move (filter));
217
+ if (constructor->IsReady ()) {
218
+ AFL_DEBUG (NKikimrServices::TX_COLUMNSHARD_SCAN)(" event" , " build_duplicates_filter" )(
219
+ " source_id" , constructor->GetSource ()->GetSourceId ());
220
+ std::move (*constructor).Finish ();
221
+ AFL_VERIFY (AvailableSources.erase (sourceId));
222
+ if (AvailableSources.empty () && AwaitedSourcesCount.IsAllZeros ()) {
223
+ PassAway ();
224
+ }
208
225
}
209
226
}
210
227
}
0 commit comments