wfe/workflows.yaml

# workflows.yaml -- WFE self-hosting CI pipeline (Kubernetes-native)
#
# Designed to run on every push via wfe-server webhook → ci workflow.
# Every step runs as a Kubernetes Job in the wfe namespace; every service
# (postgres, valkey, opensearch) is a K8s Pod + Service in the workflow's
# scoped namespace.
#
# === Prerequisites ===
#
# 1. wfe-server deployed to the K8s cluster (see ../sbbb/base/wfe/)
#    Reads webhooks at /webhooks/gitea, persists to postgres, locks via valkey.
#
# 2. wfe-credentials Secret in the wfe namespace (via Vault Secrets Operator).
#    Required keys:
#      sccache-s3-endpoint:        S3-compatible endpoint for sccache
#      sccache-bucket:             S3 bucket name
#      sccache-region:             S3 region
#      aws-access-key-id:          S3 credentials for sccache
#      aws-secret-access-key:      S3 credentials for sccache
#      cargo-registry-token:       sunbeam registry publish token
#      gitea-token:                tea CLI token for releases
#      buildkit-ca-cert:           PEM-encoded CA cert (mTLS to buildkitd)
#      buildkit-client-cert:       PEM client cert
#      buildkit-client-key:        PEM client key
#
# 3. wfe-ci image at src.sunbeam.pt/studio/wfe-ci:latest
#    Built from Dockerfile.ci -- contains rust, nextest, llvm-cov, sccache,
#    buildctl, kubectl, tea, git.
#
# 4. buildkitd running in the build namespace at buildkitd.build.svc:1234
#    Uses mTLS; client cert mounted from wfe-credentials secret.

# --- Shared Templates ---

_templates:
  # Shared CI environment variables for the prebuilt wfe-ci image.
  # Pulls secrets from wfe-credentials so sccache hits the shared S3 cache.
  ci_env: &ci_env
    SCCACHE_BUCKET: ${WFE_SCCACHE_BUCKET}
    SCCACHE_REGION: ${WFE_SCCACHE_REGION}
    SCCACHE_ENDPOINT: ${WFE_SCCACHE_S3_ENDPOINT}
    SCCACHE_S3_USE_SSL: "true"
    AWS_ACCESS_KEY_ID: ${WFE_AWS_ACCESS_KEY_ID}
    AWS_SECRET_ACCESS_KEY: ${WFE_AWS_SECRET_ACCESS_KEY}
    CARGO_HOME: /workspace/.cargo
    RUSTC_WRAPPER: /usr/local/cargo/bin/sccache
    CARGO_INCREMENTAL: "0"
    # Secrets used by individual steps (publish, image, release). Defined here
    # so the shared *ci_env / *ci_config anchors stay flat — YAML 1.1 merge
    # keys are shallow, so a step can't override `env:` and still inherit.
    GITEA_TOKEN: ${WFE_GITEA_TOKEN}
    TEA_TOKEN: ${WFE_GITEA_TOKEN}
    CARGO_REGISTRIES_SUNBEAM_TOKEN: ${WFE_CARGO_REGISTRY_TOKEN}
    BUILDKIT_CA_CERT: ${WFE_BUILDKIT_CA_CERT}
    BUILDKIT_CLIENT_CERT: ${WFE_BUILDKIT_CLIENT_CERT}
    BUILDKIT_CLIENT_KEY: ${WFE_BUILDKIT_CLIENT_KEY}

  # Default config for short CI steps (4Gi memory, 30min timeout).
  ci_config: &ci_config
    image: src.sunbeam.pt/studio/wfe-ci:latest
    memory: 4Gi
    cpu: "2"
    timeout: 30m
    env: *ci_env

  # Default config for long-running CI steps (8Gi memory, 60min timeout).
  ci_long_config: &ci_long_config
    image: src.sunbeam.pt/studio/wfe-ci:latest
    memory: 8Gi
    cpu: "4"
    timeout: 60m
    env: *ci_env

# --- Workflows ---

workflows:

  # === checkout: clone the repo into a shared workspace ===

  - id: checkout
    name: Checkout
    version: 1
    inputs:
      repo_url: string
      commit_sha: string
    outputs:
      checkout_ok: bool
      commit: string
    steps:
      - name: clone
        type: kubernetes
        outputs:
          - name: checkout_ok
          - name: commit
        config:
          <<: *ci_config
          run: |
            set -euo pipefail
            cd /workspace
            if [ ! -d wfe ]; then
              git clone "$REPO_URL" wfe
            fi
            cd wfe
            git fetch --all
            git checkout "$COMMIT_SHA"
            COMMIT=$(git rev-parse HEAD)
            echo "Checked out: $COMMIT"
            echo "##wfe[output checkout_ok=true]"
            echo "##wfe[output commit=$COMMIT]"

  # === lint: fmt + clippy ===

  - id: lint
    name: Lint
    version: 1
    outputs:
      fmt_ok: bool
      clippy_ok: bool
    steps:
      - name: fmt-check
        type: kubernetes
        outputs:
          - name: fmt_ok
        config:
          <<: *ci_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            cargo fmt --all -- --check
            echo "##wfe[output fmt_ok=true]"

      - name: clippy
        type: kubernetes
        outputs:
          - name: clippy_ok
        config:
          <<: *ci_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            cargo clippy --workspace --all-features -- -D warnings
            echo "##wfe[output clippy_ok=true]"

  # === test-unit: pure unit tests, no external dependencies ===

  - id: test-unit
    name: Unit Tests
    version: 1
    outputs:
      core_ok: bool
      yaml_ok: bool
      deno_ok: bool
      kubernetes_ok: bool
      rustlang_ok: bool
    steps:
      - name: core-tests
        type: kubernetes
        outputs:
          - name: core_ok
        config:
          <<: *ci_long_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            cargo nextest run -p wfe-core -p wfe -P ci
            echo "##wfe[output core_ok=true]"

      - name: yaml-tests
        type: kubernetes
        outputs:
          - name: yaml_ok
        config:
          <<: *ci_long_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            cargo nextest run -p wfe-yaml \
              --features deno,buildkit,containerd,rustlang,kubernetes -P ci
            echo "##wfe[output yaml_ok=true]"

      - name: deno-tests
        type: kubernetes
        outputs:
          - name: deno_ok
        config:
          <<: *ci_long_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            cargo nextest run -p wfe-deno -P ci
            echo "##wfe[output deno_ok=true]"

      - name: kubernetes-unit-tests
        type: kubernetes
        outputs:
          - name: kubernetes_ok
        config:
          <<: *ci_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            cargo nextest run -p wfe-kubernetes --lib -P ci
            echo "##wfe[output kubernetes_ok=true]"

      - name: rustlang-tests
        type: kubernetes
        outputs:
          - name: rustlang_ok
        config:
          <<: *ci_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            cargo nextest run -p wfe-rustlang -P ci
            echo "##wfe[output rustlang_ok=true]"

  # === test-integration: postgres + valkey + opensearch via K8s services ===
  #
  # The wfe ServiceProvider creates Pods + K8s Services in the workflow's
  # scoped namespace. Step containers reach services via in-cluster DNS:
  # postgres → postgres.<namespace>.svc.cluster.local

  - id: test-integration
    name: Integration Tests
    version: 1
    outputs:
      postgres_ok: bool
      valkey_ok: bool
      opensearch_ok: bool

    services:
      postgres:
        image: postgres:17
        ports: [5432]
        env:
          POSTGRES_USER: wfe
          POSTGRES_PASSWORD: wfe
          POSTGRES_DB: wfe_test
        readiness:
          exec: ["pg_isready", "-U", "wfe"]
          interval: 2s
          timeout: 60s
          retries: 30
        memory: 512Mi
        cpu: 500m

      valkey:
        image: valkey/valkey:8
        ports: [6379]
        readiness:
          tcp: 6379
          interval: 2s
          timeout: 30s
          retries: 15
        memory: 256Mi
        cpu: 250m

      opensearch:
        image: opensearchproject/opensearch:2
        ports: [9200]
        env:
          discovery.type: single-node
          DISABLE_SECURITY_PLUGIN: "true"
          OPENSEARCH_INITIAL_ADMIN_PASSWORD: admin
          ES_JAVA_OPTS: "-Xms512m -Xmx512m"
        readiness:
          http: { port: 9200, path: / }
          interval: 5s
          timeout: 120s
          retries: 24
        memory: 1536Mi
        cpu: "1"

    steps:
      - name: postgres-tests
        type: kubernetes
        outputs:
          - name: postgres_ok
        config:
          <<: *ci_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            export DATABASE_URL="postgres://wfe:wfe@postgres:5432/wfe_test"
            cargo nextest run -p wfe-postgres -P ci
            echo "##wfe[output postgres_ok=true]"

      - name: valkey-tests
        type: kubernetes
        outputs:
          - name: valkey_ok
        config:
          <<: *ci_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            export VALKEY_URL="redis://valkey:6379"
            cargo nextest run -p wfe-valkey -P ci
            echo "##wfe[output valkey_ok=true]"

      - name: opensearch-tests
        type: kubernetes
        outputs:
          - name: opensearch_ok
        config:
          <<: *ci_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            export OPENSEARCH_URL="http://opensearch:9200"
            cargo nextest run -p wfe-opensearch -P ci
            echo "##wfe[output opensearch_ok=true]"

  # === test-kubernetes: K8s executor + service provisioner ===
  #
  # These tests need cluster access to create namespaces, pods, jobs, services.
  # The wfe ServiceAccount must have RBAC for the necessary verbs.

  - id: test-kubernetes
    name: Kubernetes Tests
    version: 1
    outputs:
      k8s_integration_ok: bool
    steps:
      - name: k8s-integration-tests
        type: kubernetes
        outputs:
          - name: k8s_integration_ok
        config:
          <<: *ci_long_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            # In-cluster auth via the pod's ServiceAccount.
            cargo nextest run -p wfe-kubernetes --test integration -P ci
            echo "##wfe[output k8s_integration_ok=true]"

  # === test (orchestrator) ===

  - id: test
    name: Tests
    version: 1
    outputs:
      all_passed: bool
    steps:
      - name: run-unit
        type: workflow
        outputs:
          - name: core_ok
          - name: yaml_ok
          - name: deno_ok
          - name: kubernetes_ok
          - name: rustlang_ok
        config:
          workflow: test-unit
          version: 1

      - name: run-integration
        type: workflow
        outputs:
          - name: postgres_ok
          - name: valkey_ok
          - name: opensearch_ok
        config:
          workflow: test-integration
          version: 1

      - name: run-kubernetes
        type: workflow
        outputs:
          - name: k8s_integration_ok
        config:
          workflow: test-kubernetes
          version: 1

      - name: mark-passed
        type: kubernetes
        outputs:
          - name: all_passed
        config:
          <<: *ci_config
          run: |
            echo "All test workflows completed"
            echo "##wfe[output all_passed=true]"

  # === cover: coverage report with threshold gate ===

  - id: cover
    name: Coverage
    version: 1
    inputs:
      coverage_threshold: number?
    outputs:
      line_coverage: number
      meets_threshold: bool

    services:
      postgres:
        image: postgres:17
        ports: [5432]
        env:
          POSTGRES_USER: wfe
          POSTGRES_PASSWORD: wfe
          POSTGRES_DB: wfe_test
        readiness:
          exec: ["pg_isready", "-U", "wfe"]
          timeout: 60s
        memory: 512Mi

      valkey:
        image: valkey/valkey:8
        ports: [6379]
        readiness:
          tcp: 6379
        memory: 256Mi

      opensearch:
        image: opensearchproject/opensearch:2
        ports: [9200]
        env:
          discovery.type: single-node
          DISABLE_SECURITY_PLUGIN: "true"
          ES_JAVA_OPTS: "-Xms512m -Xmx512m"
        readiness:
          http: { port: 9200, path: / }
          timeout: 120s
        memory: 1536Mi

    steps:
      - name: run-coverage
        type: kubernetes
        outputs:
          - name: coverage_json
        config:
          <<: *ci_long_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            export DATABASE_URL="postgres://wfe:wfe@postgres:5432/wfe_test"
            export VALKEY_URL="redis://valkey:6379"
            export OPENSEARCH_URL="http://opensearch:9200"
            cargo llvm-cov nextest --workspace -P cover --json > /tmp/wfe-coverage.json
            echo "##wfe[output coverage_json=/tmp/wfe-coverage.json]"

      - name: assert-threshold
        type: deno
        outputs:
          - name: line_coverage
          - name: meets_threshold
        config:
          script: |
            const data = inputs();
            const threshold = data.coverage_threshold || 85;

            const text = await readFile("/tmp/wfe-coverage.json");
            const report = JSON.parse(text);

            const totals = report.data[0].totals;
            const lineCov = (totals.lines.covered / totals.lines.count * 100).toFixed(1);

            log(`Line coverage: ${lineCov}% (threshold: ${threshold}%)`);

            output("line_coverage", parseFloat(lineCov));
            output("meets_threshold", parseFloat(lineCov) >= threshold);

            if (parseFloat(lineCov) < threshold) {
              throw new Error(`Coverage ${lineCov}% is below threshold ${threshold}%`);
            }
          permissions:
            read: ["/tmp"]

  # === tag: read version, create git tag (only on mainline) ===

  - id: tag
    name: Tag Release
    version: 1
    outputs:
      version: string
      tag_created: bool
      tag_already_existed: bool
    steps:
      - name: read-version
        type: kubernetes
        outputs:
          - name: version
        config:
          <<: *ci_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            VERSION=$(grep -m1 '^version' Cargo.toml | sed -E 's/.*"([^"]+)".*/\1/')
            echo "Version: $VERSION"
            echo "##wfe[output version=$VERSION]"

      - name: check-tag-exists
        type: kubernetes
        outputs:
          - name: tag_already_existed
        config:
          <<: *ci_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            git fetch --tags
            TAG="v${VERSION}"
            if git tag -l "$TAG" | grep -q "$TAG"; then
              echo "Tag $TAG already exists"
              echo "##wfe[output tag_already_existed=true]"
            else
              echo "Tag $TAG does not exist"
              echo "##wfe[output tag_already_existed=false]"
            fi

      - name: create-tag
        type: kubernetes
        when:
          field: .outputs.tag_already_existed
          equals: false
        outputs:
          - name: tag_created
        config:
          <<: *ci_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            TAG="v${VERSION}"
            git config user.email "wfe-ci@sunbeam.pt"
            git config user.name "wfe-ci"
            git tag -a "$TAG" -m "$TAG"
            git push origin "$TAG"
            echo "##wfe[output tag_created=true]"

  # === publish: publish all crates to sunbeam registry ===

  - id: publish
    name: Publish Crates
    version: 1
    outputs:
      all_published: bool
    steps:
      - name: publish-tier-1
        type: kubernetes
        config:
          <<: *ci_long_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            for crate in wfe-core wfe-containerd-protos wfe-buildkit-protos wfe-server-protos; do
              echo "--- Publishing $crate ---"
              cargo publish -p "$crate" --registry sunbeam 2>&1 || echo "Already published: $crate"
            done
        error_behavior:
          type: retry
          interval: 10s
          max_retries: 2

      - name: publish-tier-2
        type: kubernetes
        config:
          <<: *ci_long_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            for crate in wfe-sqlite wfe-postgres wfe-opensearch wfe-valkey \
                         wfe-buildkit wfe-containerd wfe-rustlang wfe-kubernetes; do
              echo "--- Publishing $crate ---"
              cargo publish -p "$crate" --registry sunbeam 2>&1 || echo "Already published: $crate"
            done
        error_behavior:
          type: retry
          interval: 10s
          max_retries: 2

      - name: publish-tier-3
        type: kubernetes
        config:
          <<: *ci_long_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            sleep 10
            for crate in wfe wfe-yaml; do
              echo "--- Publishing $crate ---"
              cargo publish -p "$crate" --registry sunbeam 2>&1 || echo "Already published: $crate"
            done
        error_behavior:
          type: retry
          interval: 10s
          max_retries: 2

      - name: publish-tier-4
        type: kubernetes
        outputs:
          - name: all_published
        config:
          <<: *ci_long_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            sleep 10
            for crate in wfe-server wfe-deno; do
              echo "--- Publishing $crate ---"
              cargo publish -p "$crate" --registry sunbeam 2>&1 || echo "Already published: $crate"
            done
            echo "##wfe[output all_published=true]"
        error_behavior:
          type: retry
          interval: 10s
          max_retries: 2

  # === image: build wfe-server Docker image via in-cluster buildkitd ===
  #
  # Connects to buildkitd.build.svc:1234 over mTLS using certs mounted from
  # the wfe-credentials secret. The wfe-buildkit step type handles all the
  # client-side details.

  - id: image
    name: Build Image
    version: 1
    inputs:
      version: string
    outputs:
      image_pushed: bool
      image_digest: string
    steps:
      - name: write-buildkit-certs
        type: kubernetes
        config:
          <<: *ci_config
          run: |
            set -euo pipefail
            mkdir -p /workspace/buildkit-certs
            echo "$BUILDKIT_CA_CERT"     > /workspace/buildkit-certs/ca.pem
            echo "$BUILDKIT_CLIENT_CERT" > /workspace/buildkit-certs/client.pem
            echo "$BUILDKIT_CLIENT_KEY"  > /workspace/buildkit-certs/client-key.pem
            chmod 600 /workspace/buildkit-certs/client-key.pem

      - name: build-and-push
        type: buildkit
        outputs:
          - name: image_pushed
          - name: image_digest
        config:
          dockerfile: /workspace/wfe/Dockerfile
          context: /workspace/wfe
          tags:
            - "src.sunbeam.pt/studio/wfe:${VERSION}"
            - "src.sunbeam.pt/studio/wfe:latest"
          push: true
          buildkit_addr: tcp://buildkitd.build.svc:1234
          tls:
            ca: /workspace/buildkit-certs/ca.pem
            cert: /workspace/buildkit-certs/client.pem
            key: /workspace/buildkit-certs/client-key.pem
          timeout: 30m

  # === release: create Gitea release with changelog notes ===

  - id: release
    name: Gitea Release
    version: 1
    inputs:
      version: string
    outputs:
      release_created: bool
      release_url: string
    steps:
      - name: extract-changelog
        type: deno
        outputs:
          - name: notes
        config:
          script: |
            const data = inputs();
            const version = data.version;
            const text = await readFile("/workspace/wfe/CHANGELOG.md");

            const lines = text.split("\n");
            const notes = [];
            let inSection = false;
            for (const line of lines) {
              if (line.startsWith("## [")) {
                if (inSection) break;
                if (line.includes(`[${version}]`)) inSection = true;
                continue;
              }
              if (inSection) notes.push(line);
            }
            const body = notes.join("\n").trim();
            log(`Extracted ${body.length} chars of changelog notes`);
            output("notes", body);
          permissions:
            read: ["/workspace/wfe"]

      - name: create-gitea-release
        type: kubernetes
        outputs:
          - name: release_created
          - name: release_url
        config:
          <<: *ci_config
          run: |
            set -euo pipefail
            cd /workspace/wfe
            TAG="v${VERSION}"

            # tea login from env
            tea login add --name sunbeam --url https://src.sunbeam.pt --token "$TEA_TOKEN" 2>/dev/null || true

            if tea release create --tag "$TAG" --title "$TAG" --note "$NOTES" 2>&1; then
              echo "##wfe[output release_created=true]"
            else
              echo "Release may already exist"
              echo "##wfe[output release_created=false]"
            fi
            echo "##wfe[output release_url=https://src.sunbeam.pt/studio/wfe/releases/tag/${TAG}]"

  # === ci: top-level orchestrator -- runs on every push ===
  #
  # Triggered by Gitea webhook → /webhooks/gitea on wfe-server.
  # The webhook handler maps push events to this workflow with:
  #   inputs: { repo_url, commit_sha, branch }

  - id: ci
    name: Continuous Integration
    version: 1
    inputs:
      repo_url: string
      commit_sha: string
      branch: string?
      coverage_threshold: number?
    outputs:
      version: string
      all_tests_passed: bool
      coverage: number
      published: bool
      image_pushed: bool
      released: bool

    steps:
      - name: run-checkout
        type: workflow
        outputs:
          - name: commit
        config:
          workflow: checkout
          version: 1

      - name: run-lint
        type: workflow
        outputs:
          - name: fmt_ok
          - name: clippy_ok
        config:
          workflow: lint
          version: 1

      - name: run-tests
        type: workflow
        outputs:
          - name: all_tests_passed
        config:
          workflow: test
          version: 1

      - name: run-coverage
        type: workflow
        outputs:
          - name: coverage
        config:
          workflow: cover
          version: 1

      # Below this line: only run on mainline (release branch).

      - name: run-tag
        type: workflow
        when:
          field: .inputs.branch
          equals: mainline
        outputs:
          - name: version
          - name: tag_created
        config:
          workflow: tag
          version: 1

      - name: run-publish
        type: workflow
        when:
          field: .outputs.tag_created
          equals: true
        outputs:
          - name: published
        config:
          workflow: publish
          version: 1

      - name: run-image
        type: workflow
        when:
          field: .outputs.tag_created
          equals: true
        outputs:
          - name: image_pushed
        config:
          workflow: image
          version: 1

      - name: run-release
        type: workflow
        when:
          field: .outputs.tag_created
          equals: true
        outputs:
          - name: released
          - name: release_url
        config:
          workflow: release
          version: 1