Skip to content

Commit 4bf83fc

Browse files
xianlubirdsarabala1979
authored andcommitted
Fix DAG enable failFast will hang in some case (#1595)
* Fix failFast will hang in some case
1 parent e9f3d9c commit 4bf83fc

File tree

2 files changed

+78
-15
lines changed

2 files changed

+78
-15
lines changed
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
apiVersion: argoproj.io/v1alpha1
2+
kind: Workflow
3+
metadata:
4+
generateName: dag-hang-failFast-
5+
spec:
6+
entrypoint: retry-with-dags
7+
templates:
8+
- name: retry-with-dags
9+
dag:
10+
failFast: false
11+
tasks:
12+
- name: A
13+
template: success
14+
- name: B
15+
template: success-2
16+
dependencies:
17+
- A
18+
- name: C
19+
template: sub-dag2
20+
dependencies:
21+
- A
22+
- name: D
23+
dependencies:
24+
- A
25+
- C
26+
template: success
27+
28+
- name: sub-dag
29+
dag:
30+
tasks:
31+
- name: fail
32+
template: fail
33+
- name: success1
34+
template: success
35+
36+
- name: fail
37+
container:
38+
command: [sh, -c, exit 1]
39+
image: alpine
40+
retryStrategy:
41+
limit: 1
42+
43+
- name: sub-dag2
44+
steps:
45+
- - name: sub-dag-a
46+
template: success
47+
- - name: sub-dag-b
48+
template: fail
49+
50+
- name: success
51+
container:
52+
command: [sh, -c, exit 0]
53+
image: alpine
54+
retryStrategy:
55+
limit: 1
56+
57+
- name: success-2
58+
container:
59+
command: [sh, -c, sleep 30; exit 0]
60+
image: alpine
61+
retryStrategy:
62+
limit: 1

workflow/controller/dag.go

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -68,27 +68,28 @@ func (d *dagContext) GetTaskNode(taskName string) *wfv1.NodeStatus {
6868
}
6969

7070
// Assert all branch finished for failFast:disable function
71-
func (d *dagContext) assertBranchFinished(targetTaskName string) bool {
71+
func (d *dagContext) assertBranchFinished(targetTaskNames []string) bool {
7272
// We should ensure that from the bottom to the top,
7373
// all the nodes of this branch have at least one failure.
7474
// If successful, we should continue to run down until the leaf node
75-
taskNode := d.GetTaskNode(targetTaskName)
76-
if taskNode == nil {
77-
taskObject := d.getTask(targetTaskName)
78-
if taskObject != nil {
79-
// Make sure all the dependency node have one failed
80-
for _, tmpTaskName := range taskObject.Dependencies {
75+
flag := false
76+
for _, targetTaskName := range targetTaskNames {
77+
taskNode := d.GetTaskNode(targetTaskName)
78+
if taskNode == nil {
79+
taskObject := d.getTask(targetTaskName)
80+
if taskObject != nil {
81+
// Make sure all the dependency node have one failed
8182
// Recursive check until top root node
82-
return d.assertBranchFinished(tmpTaskName)
83+
return d.assertBranchFinished(taskObject.Dependencies)
8384
}
85+
} else if !taskNode.Successful() {
86+
flag = true
8487
}
85-
} else if !taskNode.Successful() {
86-
return true
87-
}
8888

89-
// In failFast situation, if node is successful, it will run to leaf node, above
90-
// the function, we have already check the leaf node status
91-
return false
89+
// In failFast situation, if node is successful, it will run to leaf node, above
90+
// the function, we have already check the leaf node status
91+
}
92+
return flag
9293
}
9394

9495
// assessDAGPhase assesses the overall DAG status
@@ -128,7 +129,7 @@ func (d *dagContext) assessDAGPhase(targetTasks []string, nodes map[string]wfv1.
128129
tmpDepNode := d.GetTaskNode(tmpDepName)
129130
if tmpDepNode == nil {
130131
// If leaf node is nil, we should check it's parent node and recursive check
131-
if !d.assertBranchFinished(tmpDepName) {
132+
if !d.assertBranchFinished([]string{tmpDepName}) {
132133
tmpOverAllFinished = false
133134
}
134135
} else if tmpDepNode.Type == wfv1.NodeTypeRetry && d.hasMoreRetries(tmpDepNode) {

0 commit comments

Comments
 (0)