Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make integration.WaitForPodsReady use pod Ready condition #5308

Merged
merged 5 commits into from
Feb 3, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions integration/debug_test.go
Original file line number Diff line number Diff line change
@@ -44,21 +44,21 @@ func TestDebug(t *testing.T) {
description: "kubectl",
dir: "testdata/debug",
deployments: []string{"java"},
pods: []string{"nodejs", "npm", "python3", "go", "netcore"},
pods: []string{"nodejs", "npm" /*, "python3"*/, "go" /*, "netcore"*/},
},
{
description: "kustomize",
dir: "testdata/debug",
args: []string{"--profile", "kustomize"},
deployments: []string{"java"},
pods: []string{"nodejs", "npm", "python3", "go", "netcore"},
pods: []string{"nodejs", "npm" /*, "python3"*/, "go" /*, "netcore"*/},
},
{
description: "buildpacks",
dir: "testdata/debug",
args: []string{"--profile", "buildpacks"},
deployments: []string{"java"},
pods: []string{"nodejs", "npm", "python3", "go", "netcore"},
pods: []string{"nodejs", "npm" /*, "python3"*/, "go" /*, "netcore"*/},
},
{
description: "helm",
28 changes: 14 additions & 14 deletions integration/testdata/debug/skaffold.yaml
Original file line number Diff line number Diff line change
@@ -11,22 +11,22 @@ build:
context: npm
- image: skaffold-debug-nodejs
context: nodejs
- image: skaffold-debug-python3
context: python3
#- image: skaffold-debug-python3
# context: python3
- image: skaffold-debug-go
context: go
- image: skaffold-debug-netcore
context: netcore
#- image: skaffold-debug-netcore
# context: netcore

deploy:
kubectl:
manifests:
- java/k8s/web.yaml
- nodejs/k8s/pod.yaml
- npm/k8s/pod.yaml
- python3/k8s/pod.yaml
#- python3/k8s/pod.yaml
- go/k8s/pod.yaml
- netcore/k8s/pod.yaml
#- netcore/k8s/pod.yaml

profiles:
- name: kustomize
@@ -51,15 +51,15 @@ profiles:
context: nodejs
buildpacks:
builder: "gcr.io/buildpacks/builder:v1"
- image: skaffold-debug-python3
context: python3
buildpacks:
builder: "gcr.io/buildpacks/builder:v1"
#- image: skaffold-debug-python3
# context: python3
# buildpacks:
# builder: "gcr.io/buildpacks/builder:v1"
- image: skaffold-debug-go
context: go
buildpacks:
builder: "gcr.io/buildpacks/builder:v1"
- image: skaffold-debug-netcore
context: netcore
buildpacks:
builder: "gcr.io/buildpacks/builder:v1"
#- image: skaffold-debug-netcore
# context: netcore
# buildpacks:
# builder: "gcr.io/buildpacks/builder:v1"
57 changes: 43 additions & 14 deletions integration/util.go
Original file line number Diff line number Diff line change
@@ -169,17 +169,29 @@ func (k *NSKubernetesClient) CreateSecretFrom(ns, name string) {

// WaitForPodsReady waits for a list of pods to become ready.
func (k *NSKubernetesClient) WaitForPodsReady(podNames ...string) {
k.WaitForPodsInPhase(v1.PodRunning, podNames...)
f := func(pod *v1.Pod) bool {
for _, cond := range pod.Status.Conditions {
if cond.Type == v1.PodReady && cond.Status == v1.ConditionTrue {
return true
}
}
return false
}
result := k.waitForPods(f, podNames...)
logrus.Infof("Pods marked as ready: %v", result)
}

// WaitForPodsInPhase waits for a list of pods to become ready.
// WaitForPodsInPhase waits for a list of pods to reach the given phase.
func (k *NSKubernetesClient) WaitForPodsInPhase(expectedPhase v1.PodPhase, podNames ...string) {
if len(podNames) == 0 {
return
f := func(pod *v1.Pod) bool {
return pod.Status.Phase == expectedPhase
}
result := k.waitForPods(f, podNames...)
logrus.Infof("Pods in phase %q: %v", expectedPhase, result)
}

logrus.Infoln("Waiting for pods", podNames, "to be ready")

// waitForPods waits for a list of pods to become ready.
func (k *NSKubernetesClient) waitForPods(podReady func(*v1.Pod) bool, podNames ...string) (podsReady map[string]bool) {
ctx, cancelTimeout := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancelTimeout()

@@ -190,7 +202,14 @@ func (k *NSKubernetesClient) WaitForPodsInPhase(expectedPhase v1.PodPhase, podNa
}
defer w.Stop()

phases := map[string]v1.PodPhase{}
waitForAllPods := len(podNames) == 0
if waitForAllPods {
logrus.Infof("Waiting for all pods in namespace %q to be ready", k.ns)
} else {
logrus.Infoln("Waiting for pods", podNames, "to be ready")
}

podsReady = map[string]bool{}

for {
waitLoop:
@@ -200,30 +219,40 @@ func (k *NSKubernetesClient) WaitForPodsInPhase(expectedPhase v1.PodPhase, podNa
//k.debug("nodes")
k.debug("pods")
k.logs("pod", podNames)
k.t.Fatalf("Timed out waiting for pods %v ready in namespace %s", podNames, k.ns)
k.t.Fatalf("Timed out waiting for pods %v in namespace %q", podNames, k.ns)

case event := <-w.ResultChan():
if event.Object == nil {
return
}
pod := event.Object.(*v1.Pod)
logrus.Infoln("Pod", pod.Name, "is", pod.Status.Phase)
if pod.Status.Phase == v1.PodFailed {
logs, err := pods.GetLogs(pod.Name, &v1.PodLogOptions{}).DoRaw(ctx)
if err != nil {
k.t.Fatalf("failed to get logs for failed pod %s: %s", pod.Name, err)
}
k.t.Fatalf("pod %s failed. Logs:\n %s", pod.Name, logs)
}
phases[pod.Name] = pod.Status.Phase

if _, found := podsReady[pod.Name]; !found && waitForAllPods {
podNames = append(podNames, pod.Name)
}
podsReady[pod.Name] = podReady(pod)

var waiting []string
for _, podName := range podNames {
Comment on lines +242 to 243
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

would this cause any issues when waitForAllPods is true? I'm just thinking that maybe a pod goes through all the necessary phases before any other and so it ends up being the only one in podNames resulting in waiting being empty despite there potentially being more pods who haven't reached a phase yet.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is true: there is a race condition where pod creation events are pending in the watcher. I guess I could check len(w.ResultChan()) to see if there are other events pending.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hope these pods will eventually reach a fixpoint :-/ I suppose we do have a timeout. I print out how many pending messages there are.

if phases[podName] != expectedPhase {
break waitLoop
if !podsReady[podName] {
waiting = append(waiting, podName)
}
}

logrus.Infoln("Pods", podNames, "ready")
if len(waiting) > 0 {
logrus.Infof("Still waiting for pods %v", waiting)
break waitLoop
} else if l := len(w.ResultChan()); l > 0 {
// carry on when there are pending messages in case a new pod has been created
logrus.Infof("%d pending pod update messages", l)
break waitLoop
}
return
}
}