Flaky Test Workflow (#8055)

* add permissions * replace db setup * try with bash instead of just pytest flags * fix test command * remove spaces * remove force-flaky flag * add starting vlaues * add mac and windows postgres isntall * define use bash * fix typo * update output report * tweak last if condition * clarify failures/successful runs * print running success and failure tally * just output pytest instead of capturing it * set shell to not exit immediately on exit code * add formatting around results for easier scanning * more output formatting * add matrix to unlock parallel runners * increase to ten batches * update debug * add comment * clean up comments
dbt-labs · Jul 11, 2023 · d8f38ca · d8f38ca
1 parent 7740bd6
commit d8f38ca
Showing 1 changed file with 74 additions and 29 deletions.
diff --git a/.github/workflows/test-repeater.yml b/.github/workflows/test-repeater.yml
@@ -1,6 +1,6 @@
 # **what?**
-# This workflow will test a single test a given number of times to determine if it's flaky or not.  You can test with any supported OS/Python combination.
-
+# This workflow will test all test(s) at the input path given number of times to determine if it's flaky or not.  You can test with any supported OS/Python combination.
+# This is batched in 10 to allow more test iterations faster.
 
 # **why?**
 # Testing if a test is flaky and if a previously flaky test has been fixed.  This allows easy testing on supported python versions and OS combinations.
@@ -38,29 +38,45 @@ on:
           - 'ubuntu-latest'
           - 'macos-latest'
           - 'windows-latest'
-      num_runs:
-        description: 'Max number of times to run the test'
+      num_runs_per_batch:
+        description: 'Max number of times to run the test per batch.  We always run 10 batches.'
         type: number
         required: true
-        default: '100'
+        default: '50'
 
-jobs:
-  pytest:
-    runs-on: ${{ inputs.os }}
-    env:
-      DBT_TEST_USER_1: dbt_test_user_1
-      DBT_TEST_USER_2: dbt_test_user_2
-      DBT_TEST_USER_3: dbt_test_user_3
+permissions: read-all
+
+defaults:
+  run:
+    shell: bash
 
+jobs:
+  debug:
+    runs-on: ubuntu-latest
     steps:
       - name: "[DEBUG] Output Inputs"
         run: |
           echo "Branch: ${{ inputs.branch }}"
           echo "test_path: ${{ inputs.test_path }}"
           echo "python_version: ${{ inputs.python_version }}"
           echo "os: ${{ inputs.os }}"
-          echo "num_runs: ${{ inputs.num_runs }}"
+          echo "num_runs_per_batch: ${{ inputs.num_runs_per_batch }}"
 
+  pytest:
+    runs-on: ${{ inputs.os }}
+    strategy:
+      # run all batches, even if one fails.  This informs how flaky the test may be.
+      fail-fast: false
+      # using a matrix to speed up the jobs since the matrix will run in parallel when runners are available
+      matrix:
+        batch: ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
+    env:
+      PYTEST_ADDOPTS: "-v --color=yes -n4 --csv integration_results.csv"
+      DBT_TEST_USER_1: dbt_test_user_1
+      DBT_TEST_USER_2: dbt_test_user_2
+      DBT_TEST_USER_3: dbt_test_user_3
+
+    steps:
       - name: "Checkout code"
         uses: actions/checkout@v3
         with:
@@ -76,30 +92,59 @@ jobs:
 
       - name: "Set up postgres (linux)"
         if: inputs.os == 'ubuntu-latest'
-        uses: ./.github/actions/setup-postgres-linux
+        run: make setup-db
 
-      - name: Set up "postgres (macos)"
+        # mac and windows don't use make due to limitations with docker with those runners in GitHub
+      - name: "Set up postgres (macos)"
         if: inputs.os == 'macos-latest'
         uses: ./.github/actions/setup-postgres-macos
 
       - name: "Set up postgres (windows)"
         if: inputs.os == 'windows-latest'
         uses: ./.github/actions/setup-postgres-windows
 
-      - name: Run test
-        id: pytest
+      - name: "Test Command"
+        id: command
         run: |
-          echo "Running test ${{ inputs.test_path }} ${{ inputs.num_runs }} times with Python ${{inputs.python_version }} on ${{ inputs.os }} for branch/commit ${{ inputs.branch }}"
-          python -m pytest ${{ inputs.test_path }} --force-flaky --min-passes=${{ inputs.num_runs }}  --max-runs=${{ inputs.num_runs }}
+          test_command="python -m pytest ${{ inputs.test_path }}"
+          echo "test_command=$test_command" >> $GITHUB_OUTPUT
 
-      - uses: actions/upload-artifact@v3
-        if: always()
-        with:
-          name: logs_${{ inputs.python_version }}_${{ inputs.os }}_${{ github.run_id }}
-          path: ./logs
+      - name: "Run test ${{ inputs.num_runs_per_batch }} times"
+        id: pytest
+        run: |
+          set +e
+          for ((i=1; i<=${{ inputs.num_runs_per_batch }}; i++))
+          do
+            echo "Running pytest iteration $i..."
+            python -m pytest ${{ inputs.test_path }}
+            exit_code=$?
+
+            if [[ $exit_code -eq 0 ]]; then
+              success=$((success + 1))
+              echo "Iteration $i: Success"
+            else
+              failure=$((failure + 1))
+              echo "Iteration $i: Failure"
+            fi
+
+            echo
+            echo "==========================="
+            echo "Successful runs: $success"
+            echo "Failed runs: $failure"
+            echo "==========================="
+            echo
+          done
+
+          echo "failure=$failure" >> $GITHUB_OUTPUT
+
+      - name: "Success and Failure Summary: ${{ inputs.os }}/Python ${{ inputs.python_version }}"
+        run: |
+          echo "Batch: ${{ matrix.batch }}"
+          echo "Successful runs: ${{ steps.pytest.outputs.success }}"
+          echo "Failed runs: ${{ steps.pytest.outputs.failure }}"
 
-      - uses: actions/upload-artifact@v3
-        if: always()
-        with:
-          name: integration_results_${{ inputs.python_version }}_${{ inputs.os }}_${{ github.run_id }}.csv
-          path: integration_results.csv
+      - name: "Error for Failures"
+        if: ${{ steps.pytest.outputs.failure }}
+        run: |
+            echo "Batch ${{ matrix.batch }} failed ${{ steps.pytest.outputs.failure }} of ${{ inputs.num_runs_per_batch }} tests"
+            exit 1