Skip to content

Commit ef5fb62

Browse files
committed
ease gc
1 parent 5da609b commit ef5fb62

File tree

1 file changed

+18
-22
lines changed

1 file changed

+18
-22
lines changed

Diff for: scheduler/batch.go

+18-22
Original file line numberDiff line numberDiff line change
@@ -31,28 +31,22 @@ type batcher struct {
3131
}
3232

3333
type worker struct {
34-
ch chan *schema.Resource
35-
flush chan chan struct{}
36-
rows schema.Resources
37-
builder *array.RecordBuilder // we can reuse that
38-
res chan<- message.SyncMessage
34+
ch chan *schema.Resource
35+
flush chan chan struct{}
36+
curRows, maxRows int // todo: consider using capped int64 from https://github.com/cloudquery/plugin-sdk/pull/1647
37+
builder *array.RecordBuilder // we can reuse that
38+
res chan<- message.SyncMessage
3939
}
4040

4141
// send must be called on len(rows) > 0
4242
func (w *worker) send() {
43-
for _, row := range w.rows {
44-
scalar.AppendToRecordBuilder(w.builder, row.GetValues())
45-
}
46-
4743
w.res <- &message.SyncInsert{Record: w.builder.NewRecord()}
4844
// we need to reserve here as NewRecord (& underlying NewArray calls) reset the memory
49-
w.builder.Reserve(cap(w.rows))
50-
51-
clear(w.rows) // ease GC
52-
w.rows = w.rows[:0]
45+
w.builder.Reserve(w.maxRows)
46+
w.curRows = 0 // reset
5347
}
5448

55-
func (w *worker) work(done <-chan struct{}, size int, timeout time.Duration) {
49+
func (w *worker) work(done <-chan struct{}, timeout time.Duration) {
5650
ticker := writers.NewTicker(timeout)
5751
defer ticker.Stop()
5852
tickerCh := ticker.Chan()
@@ -61,25 +55,27 @@ func (w *worker) work(done <-chan struct{}, size int, timeout time.Duration) {
6155
select {
6256
case r, ok := <-w.ch:
6357
if !ok {
64-
if len(w.rows) > 0 {
58+
if w.curRows > 0 {
6559
w.send()
6660
}
6761
return
6862
}
6963

70-
w.rows = append(w.rows, r)
71-
if len(w.rows) == size {
64+
// append to builder right away
65+
scalar.AppendToRecordBuilder(w.builder, r.GetValues())
66+
w.curRows++
67+
if w.curRows == w.maxRows {
7268
w.send()
7369
ticker.Reset(timeout)
7470
}
7571

7672
case <-tickerCh:
77-
if len(w.rows) > 0 {
73+
if w.curRows > 0 {
7874
w.send()
7975
}
8076

8177
case ch := <-w.flush:
82-
if len(w.rows) > 0 {
78+
if w.curRows > 0 {
8379
w.send()
8480
ticker.Reset(timeout)
8581
}
@@ -103,7 +99,7 @@ func (b *batcher) process(res *schema.Resource) {
10399

104100
// we alloc only ch here, as it may be needed right away
105101
// for instance, if another goroutine will get the value allocated by us
106-
wr := &worker{ch: make(chan *schema.Resource, b.size)}
102+
wr := &worker{ch: make(chan *schema.Resource, 5)} // 5 is quite enough
107103
v, loaded = b.workers.LoadOrStore(table.Name, wr)
108104
if loaded {
109105
// means that the worker was already in tne sync.Map, so we just discard the wr value
@@ -120,13 +116,13 @@ func (b *batcher) process(res *schema.Resource) {
120116

121117
// fill in the worker fields
122118
wr.flush = make(chan chan struct{})
123-
wr.rows = make(schema.Resources, 0, b.size)
119+
wr.maxRows = b.size
124120
wr.builder = array.NewRecordBuilder(memory.DefaultAllocator, table.ToArrowSchema())
125121
wr.res = b.res
126122
wr.builder.Reserve(b.size)
127123

128124
// start processing
129-
wr.work(b.ctxDone, b.size, b.timeout)
125+
wr.work(b.ctxDone, b.timeout)
130126
}()
131127

132128
wr.ch <- res

0 commit comments

Comments
 (0)