Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,54 @@ jobs:
# also need to be updated and the special case for windows, removed.
FOCUS="[^TestContainerdRestart$]" make cri-integration

- name: Stage CRI integration containerd logs into workspace
if: always()
shell: pwsh
run: |
$dest = Join-Path "${{ github.workspace }}" "ci-debug-logs"
New-Item -ItemType Directory -Force -Path $dest | Out-Null

# cri-integration.sh writes containerd.log to C:/Windows/Temp/test-integration
# and on failure tries to `mv` it to $GITHUB_WORKSPACE/report (which often fails
# on Windows with "Device or resource busy" because containerd still holds the
# file open). Look in both places. actions/upload-artifact@v7 refuses to upload
# files outside the workspace root, so stage everything inside the workspace.
$sources = @(
'C:/Windows/Temp/test-integration',
(Join-Path "${{ github.workspace }}" 'report')
)
foreach ($src in $sources) {
if (Test-Path $src) {
Write-Host "Copying logs from $src"
# Use robocopy for resilience against handles still being open.
# Exit codes 0-7 are success for robocopy; treat anything else as warning only.
$leaf = Split-Path -Leaf $src
$target = Join-Path $dest $leaf
New-Item -ItemType Directory -Force -Path $target | Out-Null
robocopy $src $target /E /R:3 /W:1 /NFL /NDL /NP | Out-Null
if ($LASTEXITCODE -ge 8) {
Write-Warning "robocopy $src returned $LASTEXITCODE (best-effort, continuing)"
}
# robocopy sets nonzero "success" codes; reset for downstream steps.
$global:LASTEXITCODE = 0
} else {
Write-Host "Source not found (skipping): $src"
}
}

Write-Host '::group::Staged files'
Get-ChildItem -Recurse -File $dest | Select-Object FullName, Length
Write-Host '::endgroup::'

- name: Upload CRI integration containerd.log (DEBUG-HCS / DEBUG-SHIM traces)
if: always()
uses: actions/upload-artifact@v7
with:
name: cri-integration-containerd-log
path: ci-debug-logs/**/*
if-no-files-found: warn
retention-days: 7

# Enable these tests once the required JobContainer images are updated.
#
# - name: Install containerd service
Expand Down
34 changes: 34 additions & 0 deletions cmd/containerd-shim-runhcs-v1/exec_hcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package main

import (
"context"
"fmt"
"sync"
"time"

Expand Down Expand Up @@ -455,10 +456,19 @@ func (he *hcsExec) waitForExit() {
trace.StringAttribute("tid", he.tid),
trace.StringAttribute("eid", he.id))

log.G(ctx).WithFields(logrus.Fields{
"tid": he.tid,
"eid": he.id,
}).Info("DEBUG-SHIM: hcsExec.waitForExit START")

err = he.p.Process.Wait()
if err != nil {
log.G(ctx).WithError(err).Error("failed process Wait")
}
log.G(ctx).WithFields(logrus.Fields{
"tid": he.tid,
"eid": he.id,
}).Info("DEBUG-SHIM: hcsExec.waitForExit Process.Wait() RETURNED")

// Issue the process cancellation to unblock the container wait as early as
// possible.
Expand All @@ -470,6 +480,12 @@ func (he *hcsExec) waitForExit() {
} else {
log.G(ctx).WithField("exitCode", code).Debug("exited")
}
log.G(ctx).WithFields(logrus.Fields{
"tid": he.tid,
"eid": he.id,
"exitCode": code,
"err": fmt.Sprintf("%v", err),
}).Info("DEBUG-SHIM: hcsExec.waitForExit got exit code, transitioning to Exited")

he.sl.Lock()
he.state = shimExecStateExited
Expand Down Expand Up @@ -517,22 +533,40 @@ func (he *hcsExec) waitForContainerExit() {
trace.StringAttribute("tid", he.tid),
trace.StringAttribute("eid", he.id))

log.G(ctx).WithFields(logrus.Fields{
"tid": he.tid,
"eid": he.id,
}).Info("DEBUG-SHIM: hcsExec.waitForContainerExit START")

// wait for container or process to exit and ckean up resrources
select {
case <-he.c.WaitChannel():
// Container exited first. We need to force the process into the exited
// state and cleanup any resources
he.sl.Lock()
log.G(ctx).WithFields(logrus.Fields{
"tid": he.tid,
"eid": he.id,
"state": he.state,
}).Info("DEBUG-SHIM: hcsExec.waitForContainerExit CONTAINER WaitChannel fired (container exited first)")
switch he.state {
case shimExecStateCreated:
he.exitFromCreatedL(ctx, 1)
case shimExecStateRunning:
// Kill the process to unblock `he.waitForExit`.
log.G(ctx).WithFields(logrus.Fields{
"tid": he.tid,
"eid": he.id,
}).Info("DEBUG-SHIM: hcsExec.waitForContainerExit calling Process.Kill to unblock waitForExit")
_, _ = he.p.Process.Kill(ctx)
}
he.sl.Unlock()
case <-he.processDone:
// Process exited first. This is the normal case do nothing because
// `he.waitForExit` will release any waiters.
log.G(ctx).WithFields(logrus.Fields{
"tid": he.tid,
"eid": he.id,
}).Info("DEBUG-SHIM: hcsExec.waitForContainerExit processDone fired (process exited first; normal path)")
}
}
163 changes: 0 additions & 163 deletions internal/hcs/callback.go

This file was deleted.

Loading
Loading