Skip to content
This repository was archived by the owner on Aug 9, 2023. It is now read-only.

Commit f37c05d

Browse files
author
Bjørn
authored
Add shutdown handling (#47)
Currently we just exit the exporter when receiving a signal. This does not wait for closing down the polling mechanisms etc. This change introduces a real shutdown sequence that waits for all components to stop before exiting.
1 parent 2ce9d6d commit f37c05d

File tree

2 files changed

+120
-45
lines changed

2 files changed

+120
-45
lines changed

main.go

+98-37
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package main
22

33
import (
4-
"errors"
4+
"context"
55
"fmt"
66
"net/http"
77
"os"
@@ -96,38 +96,68 @@ func main() {
9696
}
9797
})
9898

99-
done := make(chan error, 1)
99+
// context used to stop worker components from signal or component failures
100+
ctx, stop := context.WithCancel(context.Background())
101+
defer stop()
102+
103+
// used to report errors from components
104+
var exitCode int
105+
componentFailed := make(chan error, 1)
106+
var wg sync.WaitGroup
107+
100108
go func() {
101109
log.Infof("Listening on %s", *listenAddress)
102110
err := http.ListenAndServe(*listenAddress, nil)
103111
if err != nil {
104-
done <- err
112+
componentFailed <- fmt.Errorf("http listener stopped: %v", err)
105113
}
106114
}()
107115

116+
// Go routine responsible for starting shutdown sequence based of signals or
117+
// component failures
108118
sigs := make(chan os.Signal, 1)
109119
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
120+
wg.Add(1)
110121
go func() {
111-
sig := <-sigs
112-
log.Infof("Received os signal '%s'. Terminating...", sig)
113-
done <- nil
122+
defer wg.Done()
123+
select {
124+
case sig := <-sigs:
125+
log.Infof("Received os signal '%s'. Terminating...", sig)
126+
case err := <-componentFailed:
127+
if err != nil {
128+
log.Errorf("Component failed: %v", err)
129+
exitCode = 1
130+
}
131+
}
132+
stop()
114133
}()
115134

116-
go runAPIPolling(done, *snykAPIURL, *snykAPIToken, *snykOrganizations, secondDuration(*snykInterval), secondDuration(*requestTimeout))
135+
wg.Add(1)
136+
go func() {
137+
defer wg.Done()
138+
log.Info("Snyk API scraper starting")
139+
defer log.Info("Snyk API scraper stopped")
140+
err := runAPIPolling(ctx, *snykAPIURL, *snykAPIToken, *snykOrganizations, secondDuration(*snykInterval), secondDuration(*requestTimeout))
141+
if err != nil {
142+
componentFailed <- fmt.Errorf("snyk api scraper: %w", err)
143+
}
144+
}()
117145

118-
reason := <-done
119-
if reason != nil {
120-
log.Errorf("Snyk exporter exited due to error: %v", reason)
121-
os.Exit(1)
146+
// wait for all components to stop
147+
wg.Wait()
148+
if exitCode != 0 {
149+
log.Errorf("Snyk exporter exited with exit %d", exitCode)
150+
os.Exit(exitCode)
151+
} else {
152+
log.Infof("Snyk exporter exited with exit 0")
122153
}
123-
log.Infof("Snyk exporter exited with exit 0")
124154
}
125155

126156
func secondDuration(seconds int) time.Duration {
127157
return time.Duration(seconds) * time.Second
128158
}
129159

130-
func runAPIPolling(done chan error, url, token string, organizationIDs []string, requestInterval, requestTimeout time.Duration) {
160+
func runAPIPolling(ctx context.Context, url, token string, organizationIDs []string, requestInterval, requestTimeout time.Duration) error {
131161
client := client{
132162
httpClient: &http.Client{
133163
Timeout: requestTimeout,
@@ -137,36 +167,59 @@ func runAPIPolling(done chan error, url, token string, organizationIDs []string,
137167
}
138168
organizations, err := getOrganizations(&client, organizationIDs)
139169
if err != nil {
140-
done <- err
141-
return
170+
return err
142171
}
143172
log.Infof("Running Snyk API scraper for organizations: %v", strings.Join(organizationNames(organizations), ", "))
173+
174+
// kick off a poll right away to get metrics available right after startup
175+
pollAPI(ctx, &client, organizations)
176+
177+
ticker := time.NewTicker(requestInterval)
178+
defer ticker.Stop()
144179
for {
145-
var gaugeResults []gaugeResult
146-
for _, organization := range organizations {
147-
log.Infof("Collecting for organization '%s'", organization.Name)
148-
results, err := collect(&client, organization)
149-
if err != nil {
150-
log.With("error", errors.Unwrap(err)).
151-
With("organzationName", organization.Name).
152-
With("organzationId", organization.ID).
153-
Errorf("Collection failed for organization '%s': %v", organization.Name, err)
154-
continue
155-
}
156-
log.Infof("Recorded %d results for organization '%s'", len(results), organization.Name)
157-
gaugeResults = append(gaugeResults, results...)
180+
select {
181+
case <-ctx.Done():
182+
return nil
183+
case <-ticker.C:
184+
pollAPI(ctx, &client, organizations)
158185
}
159-
log.Infof("Exposing %d results as metrics", len(gaugeResults))
160-
scrapeMutex.Lock()
161-
register(gaugeResults)
162-
scrapeMutex.Unlock()
163-
readyMutex.Lock()
164-
ready = true
165-
readyMutex.Unlock()
166-
time.Sleep(requestInterval)
167186
}
168187
}
169188

189+
// pollAPI collects data from provided organizations and registers them in the
190+
// prometheus registry.
191+
func pollAPI(ctx context.Context, client *client, organizations []org) {
192+
var gaugeResults []gaugeResult
193+
for _, organization := range organizations {
194+
log.Infof("Collecting for organization '%s'", organization.Name)
195+
results, err := collect(ctx, client, organization)
196+
if err != nil {
197+
log.With("error", err).
198+
With("organzationName", organization.Name).
199+
With("organzationId", organization.ID).
200+
Errorf("Collection failed for organization '%s': %v", organization.Name, err)
201+
continue
202+
}
203+
log.Infof("Recorded %d results for organization '%s'", len(results), organization.Name)
204+
gaugeResults = append(gaugeResults, results...)
205+
// stop right away in case of the context being cancelled. This ensures that
206+
// we don't wait for a complete collect run for all organizations before
207+
// stopping.
208+
select {
209+
case <-ctx.Done():
210+
return
211+
default:
212+
}
213+
}
214+
log.Infof("Exposing %d results as metrics", len(gaugeResults))
215+
scrapeMutex.Lock()
216+
register(gaugeResults)
217+
scrapeMutex.Unlock()
218+
readyMutex.Lock()
219+
ready = true
220+
readyMutex.Unlock()
221+
}
222+
170223
func organizationNames(orgs []org) []string {
171224
var names []string
172225
for _, org := range orgs {
@@ -221,7 +274,7 @@ type gaugeResult struct {
221274
results []aggregateResult
222275
}
223276

224-
func collect(client *client, organization org) ([]gaugeResult, error) {
277+
func collect(ctx context.Context, client *client, organization org) ([]gaugeResult, error) {
225278
projects, err := client.getProjects(organization.ID)
226279
if err != nil {
227280
return nil, fmt.Errorf("get projects for organization: %w", err)
@@ -243,6 +296,14 @@ func collect(client *client, organization org) ([]gaugeResult, error) {
243296
})
244297
duration := time.Since(start)
245298
log.Debugf("Collected data in %v for %s %s", duration, project.ID, project.Name)
299+
// stop right away in case of the context being cancelled. This ensures that
300+
// we don't wait for a complete collect run for all projects before
301+
// stopping.
302+
select {
303+
case <-ctx.Done():
304+
return nil, nil
305+
default:
306+
}
246307
}
247308
return gaugeResults, nil
248309
}

main_test.go

+22-8
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
package main
22

33
import (
4+
"context"
45
"net/http"
56
"net/http/httptest"
67
"reflect"
78
"sort"
9+
"sync"
810
"testing"
911
"time"
1012
)
@@ -169,16 +171,28 @@ func TestRunAPIPolling_issuesTimeout(t *testing.T) {
169171
time.Sleep(1 * time.Second)
170172
rw.WriteHeader(http.StatusOK)
171173
}))
172-
done := make(chan error, 1)
173174

174-
go runAPIPolling(done, server.URL, "token", nil, 20*time.Millisecond, 1*time.Millisecond)
175+
ctx, cancel := context.WithCancel(context.Background())
176+
defer cancel()
175177

176-
select {
177-
case result := <-done:
178-
if result != nil {
179-
t.Errorf("unexpected error result: %v", result)
178+
var wg sync.WaitGroup
179+
wg.Add(1)
180+
go func() {
181+
defer wg.Done()
182+
err := runAPIPolling(ctx, server.URL, "token", nil, 20*time.Millisecond, 1*time.Millisecond)
183+
if err != nil {
184+
t.Errorf("unexpected error result: %v", err)
180185
}
181-
case <-time.After(100 * time.Millisecond):
182-
// success path if timeout errors are suppressed
186+
}()
187+
188+
// stop the polling again after 100ms
189+
<-time.After(100 * time.Millisecond)
190+
cancel()
191+
192+
// wait for the polling to stop
193+
wg.Wait()
194+
195+
if !ready {
196+
t.Fatalf("Ready not set but it should be")
183197
}
184198
}

0 commit comments

Comments
 (0)