gha: add guardrails timeouts on all jobs

We had a few "runaway jobs" recently, where the job got stuck, and kept
running for 6 hours (in one case even 24 hours, probably due some github
outage). Some of those jobs could not be terminated.

While running these actions on public repositories doesn't cost us, it's
still not desirable to have jobs running for that long (as they can still
hold up the queue).

This patch adds a blanket "2 hours" time-limit to all jobs that didn't
have a limit set. We should look at tweaking those limits to actually
expected duration, but having a default at least is a start.

Also changed the position of some existing timeouts so that we have a
consistent order in which it's set; making it easier to spot locations
where no limit is defined.

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
(cherry picked from commit 6b7e2783d1)
Signed-off-by: Austin Vazquez <macedonv@amazon.com>
This commit is contained in:
Sebastiaan van Stijn
2024-10-10 13:43:59 +02:00
committed by Austin Vazquez
parent 590e249fb9
commit 936bfb792c
9 changed files with 33 additions and 12 deletions

View File

@@ -12,6 +12,7 @@ env:
jobs:
run:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
steps:
-
name: Checkout

View File

@@ -13,6 +13,7 @@ on:
jobs:
run:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
outputs:
matrix: ${{ steps.set.outputs.matrix }}
steps:

View File

@@ -23,8 +23,8 @@ env:
jobs:
unit:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
continue-on-error: ${{ github.event_name != 'pull_request' }}
timeout-minutes: 120
steps:
-
name: Checkout
@@ -74,8 +74,8 @@ jobs:
unit-report:
runs-on: ubuntu-20.04
continue-on-error: ${{ github.event_name != 'pull_request' }}
timeout-minutes: 10
continue-on-error: ${{ github.event_name != 'pull_request' }}
if: always()
needs:
- unit
@@ -102,8 +102,8 @@ jobs:
docker-py:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
continue-on-error: ${{ github.event_name != 'pull_request' }}
timeout-minutes: 120
steps:
-
name: Checkout
@@ -155,8 +155,8 @@ jobs:
integration-flaky:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
continue-on-error: ${{ github.event_name != 'pull_request' }}
timeout-minutes: 120
steps:
-
name: Checkout
@@ -183,8 +183,8 @@ jobs:
integration:
runs-on: ${{ matrix.os }}
timeout-minutes: 120 # guardrails timeout for the whole job
continue-on-error: ${{ github.event_name != 'pull_request' }}
timeout-minutes: 120
strategy:
fail-fast: false
matrix:
@@ -277,8 +277,8 @@ jobs:
integration-report:
runs-on: ubuntu-20.04
continue-on-error: ${{ github.event_name != 'pull_request' }}
timeout-minutes: 10
continue-on-error: ${{ github.event_name != 'pull_request' }}
if: always()
needs:
- integration
@@ -306,6 +306,7 @@ jobs:
integration-cli-prepare:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
continue-on-error: ${{ github.event_name != 'pull_request' }}
outputs:
matrix: ${{ steps.tests.outputs.matrix }}
@@ -341,8 +342,8 @@ jobs:
integration-cli:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
continue-on-error: ${{ github.event_name != 'pull_request' }}
timeout-minutes: 120
needs:
- integration-cli-prepare
strategy:
@@ -417,8 +418,8 @@ jobs:
integration-cli-report:
runs-on: ubuntu-20.04
continue-on-error: ${{ github.event_name != 'pull_request' }}
timeout-minutes: 10
continue-on-error: ${{ github.event_name != 'pull_request' }}
if: always()
needs:
- integration-cli

View File

@@ -33,6 +33,7 @@ env:
jobs:
build:
runs-on: ${{ inputs.os }}
timeout-minutes: 120 # guardrails timeout for the whole job
env:
GOPATH: ${{ github.workspace }}\go
GOBIN: ${{ github.workspace }}\go\bin
@@ -112,7 +113,7 @@ jobs:
unit-test:
runs-on: ${{ inputs.os }}
timeout-minutes: 120
timeout-minutes: 120 # guardrails timeout for the whole job
env:
GOPATH: ${{ github.workspace }}\go
GOBIN: ${{ github.workspace }}\go\bin
@@ -194,6 +195,7 @@ jobs:
unit-test-report:
runs-on: ubuntu-latest
timeout-minutes: 120 # guardrails timeout for the whole job
if: always()
needs:
- unit-test
@@ -220,6 +222,7 @@ jobs:
integration-test-prepare:
runs-on: ubuntu-latest
timeout-minutes: 120 # guardrails timeout for the whole job
outputs:
matrix: ${{ steps.tests.outputs.matrix }}
steps:
@@ -253,8 +256,8 @@ jobs:
integration-test:
runs-on: ${{ inputs.os }}
timeout-minutes: 120 # guardrails timeout for the whole job
continue-on-error: ${{ inputs.storage == 'snapshotter' && github.event_name != 'pull_request' }}
timeout-minutes: 120
needs:
- build
- integration-test-prepare
@@ -513,6 +516,7 @@ jobs:
integration-test-report:
runs-on: ubuntu-latest
timeout-minutes: 120 # guardrails timeout for the whole job
continue-on-error: ${{ inputs.storage == 'snapshotter' && github.event_name != 'pull_request' }}
if: always()
needs:

View File

@@ -29,6 +29,7 @@ jobs:
prepare:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
outputs:
platforms: ${{ steps.platforms.outputs.matrix }}
steps:
@@ -81,6 +82,7 @@ jobs:
build:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
needs:
- validate-dco
- prepare
@@ -151,6 +153,7 @@ jobs:
merge:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
needs:
- build
if: always() && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') && github.event_name != 'pull_request' && github.repository == 'moby/moby'

View File

@@ -22,6 +22,7 @@ jobs:
build:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
needs:
- validate-dco
steps:
@@ -47,7 +48,7 @@ jobs:
test:
runs-on: ubuntu-20.04
timeout-minutes: 120
timeout-minutes: 120 # guardrails timeout for the whole job
needs:
- build
env:

View File

@@ -21,6 +21,7 @@ jobs:
build:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
needs:
- validate-dco
strategy:
@@ -54,6 +55,7 @@ jobs:
prepare-cross:
runs-on: ubuntu-latest
timeout-minutes: 120 # guardrails timeout for the whole job
needs:
- validate-dco
outputs:
@@ -75,6 +77,7 @@ jobs:
cross:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
needs:
- validate-dco
- prepare-cross

View File

@@ -23,6 +23,7 @@ jobs:
build-dev:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
needs:
- validate-dco
strategy:
@@ -71,6 +72,7 @@ jobs:
validate-prepare:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
needs:
- validate-dco
outputs:
@@ -92,7 +94,7 @@ jobs:
validate:
runs-on: ubuntu-20.04
timeout-minutes: 120
timeout-minutes: 120 # guardrails timeout for the whole job
needs:
- validate-prepare
- build-dev
@@ -126,6 +128,7 @@ jobs:
smoke-prepare:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
needs:
- validate-dco
outputs:
@@ -147,6 +150,7 @@ jobs:
smoke:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
needs:
- smoke-prepare
strategy:

View File

@@ -7,6 +7,7 @@ on:
jobs:
check-area-label:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
steps:
- name: Missing `area/` label
if: contains(join(github.event.pull_request.labels.*.name, ','), 'impact/') && !contains(join(github.event.pull_request.labels.*.name, ','), 'area/')
@@ -19,6 +20,7 @@ jobs:
check-changelog:
if: contains(join(github.event.pull_request.labels.*.name, ','), 'impact/')
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
env:
PR_BODY: |
${{ github.event.pull_request.body }}
@@ -47,6 +49,7 @@ jobs:
check-pr-branch:
runs-on: ubuntu-20.04
timeout-minutes: 120 # guardrails timeout for the whole job
env:
PR_TITLE: ${{ github.event.pull_request.title }}
steps: