[jhoblitt@lsst-bastion01 ~]$ kubectl -n jenkins-prod get pods
|
NAME READY STATUS RESTARTS AGE
|
agent-ldfc-0 1/3 Terminating 1 22h
|
agent-ldfc-1 3/3 Running 0 22h
|
agent-ldfc-2 3/3 Running 0 22h
|
agent-ldfc-3 3/3 Running 0 22h
|
agent-ldfc-4 3/3 Running 0 22h
|
agent-ldfc-5 3/3 Running 0 22h
|
[jhoblitt@lsst-bastion01 ~]$ kubectl -n jenkins-prod describe pod agent-ldfc-0
|
Name: agent-ldfc-0
|
Namespace: jenkins-prod
|
Priority: 0
|
PriorityClassName: <none>
|
Node: lsst-kub016/141.142.181.185
|
Start Time: Sun, 23 Jun 2019 12:46:07 -0500
|
Labels: app.k8s.io/component=agent
|
app.k8s.io/instance=prod
|
app.k8s.io/managed-by=terraform
|
app.k8s.io/name=agent-ldfc
|
app.k8s.io/part-of=jenkins
|
app.k8s.io/version=1.0.0
|
controller-revision-hash=agent-ldfc-748c96c6f6
|
statefulset.kubernetes.io/pod-name=agent-ldfc-0
|
Annotations: kubernetes.io/psp: privileged
|
Status: Terminating (lasts 12h)
|
Termination Grace Period: 30s
|
IP: 10.47.128.37
|
Controlled By: StatefulSet/agent-ldfc
|
Containers:
|
dind:
|
Container ID: docker://f2559135a5a3b3223985c4c368bf31919b07d1c5d129b8fe389ba7e88145817a
|
Image: lsstsqre/dind:18.09.5
|
Image ID: docker-pullable://lsstsqre/dind@sha256:3ed62672a84eb9c1a3c214b4d8159d77263aaec159eecab13ab18d39d3ea4652
|
Port: <none>
|
Host Port: <none>
|
Command:
|
/usr/local/bin/dockerd
|
Args:
|
--host=tcp://localhost:2375
|
--mtu=1376
|
State: Running
|
Started: Sun, 23 Jun 2019 12:53:59 -0500
|
Ready: False
|
Restart Count: 0
|
Limits:
|
cpu: 32
|
memory: 96Gi
|
Requests:
|
cpu: 6
|
memory: 12Gi
|
Liveness: exec [wget --spider -q http://localhost:2375/_ping] delay=5s timeout=1s period=5s #success=1 #failure=2
|
Readiness: exec [wget --spider -q http://localhost:2375/_ping] delay=5s timeout=1s period=5s #success=1 #failure=2
|
Environment:
|
DOCKER_HOST: tcp://localhost:2375
|
Mounts:
|
/j from ws (rw)
|
/var/lib/docker from docker-graph-storage (rw)
|
/var/run/secrets/kubernetes.io/serviceaccount from default-token-nkqvn (ro)
|
docker-gc:
|
Container ID: docker://6f7bafaeb25a28ccb842a0532d6d2173e972407dcbc6ad7a1ef679778ff98cb8
|
Image: lsstsqre/docker-gc:latest
|
Image ID: docker-pullable://lsstsqre/docker-gc@sha256:83e4a8b52eef27ebf1b469e294ffe6905fc4e5de33b30d591811c8ec802c292c
|
Port: <none>
|
Host Port: <none>
|
Command:
|
sh
|
-c
|
while true; do /usr/local/bin/docker-gc; sleep $GRACE_PERIOD_SECONDS; done
|
State: Running
|
Started: Sun, 23 Jun 2019 12:54:14 -0500
|
Ready: True
|
Restart Count: 0
|
Limits:
|
cpu: 500m
|
memory: 512Mi
|
Requests:
|
cpu: 200m
|
memory: 100Mi
|
Environment:
|
DOCKER_HOST: tcp://localhost:2375
|
GRACE_PERIOD_SECONDS: 3600
|
MINIMUM_IMAGES_TO_SAVE: 5
|
REMOVE_VOLUMES: 1
|
FORCE_CONTAINER_REMOVAL: 1
|
FORCE_IMAGE_REMOVAL: 1
|
Mounts:
|
/var/run/secrets/kubernetes.io/serviceaccount from default-token-nkqvn (ro)
|
swarm:
|
Container ID: docker://2190d76765e077023b8d1dc284a68295aec7417a6082a401ad4d3fdbb9a96d81
|
Image: lsstsqre/jenkins-swarm-client:3.15-ldfc
|
Image ID: docker-pullable://lsstsqre/jenkins-swarm-client@sha256:8dfa62ed9c4323982e552c353b040701b8051c781df26f9bd77302ff58ea2f76
|
Port: <none>
|
Host Port: <none>
|
State: Running
|
Started: Sun, 23 Jun 2019 12:55:07 -0500
|
Ready: False
|
Restart Count: 1
|
Limits:
|
cpu: 2
|
memory: 3Gi
|
Requests:
|
cpu: 1
|
memory: 2Gi
|
Liveness: exec [wget --spider -q http://localhost:8080/metrics] delay=5s timeout=1s period=5s #success=1 #failure=2
|
Readiness: exec [wget --spider -q http://localhost:8080/metrics] delay=5s timeout=1s period=5s #success=1 #failure=2
|
Environment:
|
DOCKER_HOST: tcp://localhost:2375
|
JSWARM_MASTER_URL: https://ci.lsst.codes
|
JSWARM_MODE: normal
|
JSWARM_LABELS: docker ldfc
|
JSWARM_EXECUTORS: 1
|
JSWARM_AGENT_NAME: agent-ldfc-0 (v1:metadata.name)
|
JSWARM_DISABLE_CLIENTS_UNIQUE_ID: true
|
JSWARM_DELETE_EXISTING_CLIENTS: true
|
JSWARM_USERNAME: <set to the key 'JSWARM_USERNAME' in secret 'agent-ldfc'> Optional: false
|
JSWARM_PASSWORD: <set to the key 'JSWARM_PASSWORD' in secret 'agent-ldfc'> Optional: false
|
Mounts:
|
/j from ws (rw)
|
/var/run/secrets/kubernetes.io/serviceaccount from default-token-nkqvn (ro)
|
Conditions:
|
Type Status
|
Initialized True
|
Ready False
|
ContainersReady False
|
PodScheduled True
|
Volumes:
|
ws:
|
Type: PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)
|
ClaimName: ws-agent-ldfc-0
|
ReadOnly: false
|
docker-graph-storage:
|
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
|
Medium:
|
SizeLimit: <unset>
|
default-token-nkqvn:
|
Type: Secret (a volume populated by a Secret)
|
SecretName: default-token-nkqvn
|
Optional: false
|
QoS Class: Burstable
|
Node-Selectors: <none>
|
Tolerations: node.kubernetes.io/not-ready:NoExecute for 300s
|
node.kubernetes.io/unreachable:NoExecute for 300s
|
Events: <none>
|
|
Trying to delete the pod had no change in status after ~5minutes. Force deleting it got it into the pending state, but the pod is unschedulable as not enough usable nodes/capacity is online. IHS-2333 has been opened to report that lsst-kub016 is down.
NAME READY STATUS RESTARTS AGE
Namespace: jenkins-prod
PriorityClassName: <none>
Node: <none>
Labels: app.k8s.io/component=agent
app.k8s.io/instance=prod
app.k8s.io/managed-by=terraform
app.k8s.io/name=agent-ldfc
app.k8s.io/part-of=jenkins
controller-revision-hash=agent-ldfc-748c96c6f6
Annotations: kubernetes.io/psp: privileged
Status: Pending
IP:
Controlled By: StatefulSet/agent-ldfc
Containers:
dind:
Port: <none>
Host Port: <none>
Command:
/usr/local/bin/dockerd
Args:
Limits:
memory: 96Gi
Requests:
memory: 12Gi
Environment:
Mounts:
/j from ws (rw)
/var/lib/docker from docker-graph-storage (rw)
docker-gc:
Image: lsstsqre/docker-gc:latest
Port: <none>
Host Port: <none>
Command:
sh
-c
Limits:
cpu: 500m
memory: 512Mi
Requests:
cpu: 200m
memory: 100Mi
Environment:
Mounts:
swarm:
Port: <none>
Host Port: <none>
Limits:
memory: 3Gi
Requests:
memory: 2Gi
Environment:
JSWARM_MODE: normal
JSWARM_LABELS: docker ldfc
Mounts:
/j from ws (rw)
Conditions:
Type Status
PodScheduled False
Volumes:
ws:
Type: PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)
docker-graph-storage:
Type: EmptyDir (a temporary directory that shares a pod's lifetime)
Medium:
SizeLimit: <unset>
Type: Secret (a volume populated by a Secret)
QoS Class: Burstable
Node-Selectors: <none>
Events:
Type Reason Age From Message
---- ------ ---- ---- -------