mirror of
https://github.com/moby/moby.git
synced 2026-01-11 18:51:37 +00:00
Shortened to minimum code because OCI v1 is essentially equivalent to Docker v2.2 format. Also fixed formatting issue with jq where it wouldn't accept newline character in chained jq commands. Co-authored-by: Tianon Gravi <admwiggin@gmail.com> Signed-off-by: Jimbo Jones <jjjimbo137@gmail.com> Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
449 lines
14 KiB
Bash
Executable File
449 lines
14 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -eo pipefail
|
|
|
|
# This script was developed for use in Moby's CI, and as such the use cases and
|
|
# usability are (intentionally) limited. You may find this script useful for
|
|
# educational purposes, for example, to learn how pulling images works "under
|
|
# the hood", and you may be able to use it for other purposes, but it should not
|
|
# be considered a "general purpose" tool for pulling images.
|
|
#
|
|
# The project maintainers accept contributions to this script within its intended
|
|
# scope, but may not accept contributions beyond that.
|
|
#
|
|
# For users who have a similar need but require more flexibility/functionality,
|
|
# refer to the discussion on GitHub, which mentions various alternatives that
|
|
# are more suitable for other uses: https://github.com/moby/moby/issues/40857
|
|
|
|
# hello-world latest ef872312fe1b 3 months ago 910 B
|
|
# hello-world latest ef872312fe1bbc5e05aae626791a47ee9b032efa8f3bda39cc0be7b56bfe59b9 3 months ago 910 B
|
|
|
|
# debian latest f6fab3b798be 10 weeks ago 85.1 MB
|
|
# debian latest f6fab3b798be3174f45aa1eb731f8182705555f89c9026d8c1ef230cbf8301dd 10 weeks ago 85.1 MB
|
|
|
|
# check if essential commands are in our PATH
|
|
for cmd in curl jq; do
|
|
if ! command -v $cmd &> /dev/null; then
|
|
echo >&2 "error: \"$cmd\" not found!"
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
usage() {
|
|
echo "usage: $0 dir image[:tag][@digest] ..."
|
|
echo " $0 /tmp/old-hello-world hello-world:latest@sha256:8be990ef2aeb16dbcb9271ddfe2610fa6658d13f6dfb8bc72074cc1ca36966a7"
|
|
[ -z "$1" ] || exit "$1"
|
|
}
|
|
|
|
dir="$1" # dir for building tar in
|
|
shift || usage 1 >&2
|
|
|
|
if ! [ $# -gt 0 ] && [ "$dir" ]; then
|
|
usage 2 >&2
|
|
fi
|
|
mkdir -p "$dir"
|
|
|
|
# hacky workarounds for Bash 3 support (no associative arrays)
|
|
images=()
|
|
rm -f "$dir"/tags-*.tmp
|
|
manifestJsonEntries=()
|
|
doNotGenerateManifestJson=
|
|
# repositories[busybox]='"latest": "...", "ubuntu-14.04": "..."'
|
|
|
|
# bash v4 on Windows CI requires CRLF separator... and linux doesn't seem to care either way
|
|
newlineIFS=$'\n'
|
|
major=$(echo "${BASH_VERSION%%[^0.9]}" | cut -d. -f1)
|
|
if [ "$major" -ge 4 ]; then
|
|
newlineIFS=$'\r\n'
|
|
fi
|
|
|
|
registryBase='https://registry-1.docker.io'
|
|
authBase='https://auth.docker.io'
|
|
authService='registry.docker.io'
|
|
|
|
# https://github.com/moby/moby/issues/33700
|
|
fetch_blob() {
|
|
local token="$1"
|
|
shift
|
|
local image="$1"
|
|
shift
|
|
local digest="$1"
|
|
shift
|
|
local targetFile="$1"
|
|
shift
|
|
local curlArgs=("$@")
|
|
|
|
local curlHeaders
|
|
curlHeaders="$(
|
|
curl -S "${curlArgs[@]}" \
|
|
-H "Authorization: Bearer $token" \
|
|
"$registryBase/v2/$image/blobs/$digest" \
|
|
-o "$targetFile" \
|
|
-D-
|
|
)"
|
|
curlHeaders="$(echo "$curlHeaders" | tr -d '\r')"
|
|
if grep -qE "^HTTP/[0-9].[0-9] 3" <<< "$curlHeaders"; then
|
|
rm -f "$targetFile"
|
|
|
|
local blobRedirect
|
|
blobRedirect="$(echo "$curlHeaders" | awk -F ': ' 'tolower($1) == "location" { print $2; exit }')"
|
|
if [ -z "$blobRedirect" ]; then
|
|
echo >&2 "error: failed fetching '$image' blob '$digest'"
|
|
echo "$curlHeaders" | head -1 >&2
|
|
return 1
|
|
fi
|
|
|
|
curl -fSL "${curlArgs[@]}" \
|
|
"$blobRedirect" \
|
|
-o "$targetFile"
|
|
fi
|
|
}
|
|
|
|
# handle 'application/vnd.docker.distribution.manifest.v2+json' manifest
|
|
handle_single_manifest_v2() {
|
|
local manifestJson="$1"
|
|
shift
|
|
|
|
local configDigest
|
|
configDigest="$(echo "$manifestJson" | jq --raw-output '.config.digest')"
|
|
local imageId="${configDigest#*:}" # strip off "sha256:"
|
|
|
|
local configFile="$imageId.json"
|
|
fetch_blob "$token" "$image" "$configDigest" "$dir/$configFile" -s
|
|
|
|
local layersFs
|
|
layersFs="$(echo "$manifestJson" | jq --raw-output --compact-output '.layers[]')"
|
|
local IFS="$newlineIFS"
|
|
local layers
|
|
mapfile -t layers <<< "$layersFs"
|
|
unset IFS
|
|
|
|
echo "Downloading '$imageIdentifier' (${#layers[@]} layers)..."
|
|
local layerId=
|
|
local layerFiles=()
|
|
for i in "${!layers[@]}"; do
|
|
local layerMeta="${layers[$i]}"
|
|
|
|
local layerMediaType
|
|
layerMediaType="$(echo "$layerMeta" | jq --raw-output '.mediaType')"
|
|
local layerDigest
|
|
layerDigest="$(echo "$layerMeta" | jq --raw-output '.digest')"
|
|
|
|
# save the previous layer's ID
|
|
local parentId="$layerId"
|
|
# create a new fake layer ID based on this layer's digest and the previous layer's fake ID
|
|
layerId="$(echo "$parentId"$'\n'"$layerDigest" | sha256sum | cut -d' ' -f1)"
|
|
# this accounts for the possibility that an image contains the same layer twice (and thus has a duplicate digest value)
|
|
|
|
mkdir -p "$dir/$layerId"
|
|
echo '1.0' > "$dir/$layerId/VERSION"
|
|
|
|
if [ ! -s "$dir/$layerId/json" ]; then
|
|
local parentJson
|
|
parentJson="$(printf ', parent: "%s"' "$parentId")"
|
|
local addJson
|
|
addJson="$(printf '{ id: "%s"%s }' "$layerId" "${parentId:+$parentJson}")"
|
|
# this starter JSON is taken directly from Docker's own "docker save" output for unimportant layers
|
|
jq "$addJson + ." > "$dir/$layerId/json" <<- 'EOJSON'
|
|
{
|
|
"created": "0001-01-01T00:00:00Z",
|
|
"container_config": {
|
|
"Hostname": "",
|
|
"Domainname": "",
|
|
"User": "",
|
|
"AttachStdin": false,
|
|
"AttachStdout": false,
|
|
"AttachStderr": false,
|
|
"Tty": false,
|
|
"OpenStdin": false,
|
|
"StdinOnce": false,
|
|
"Env": null,
|
|
"Cmd": null,
|
|
"Image": "",
|
|
"Volumes": null,
|
|
"WorkingDir": "",
|
|
"Entrypoint": null,
|
|
"OnBuild": null,
|
|
"Labels": null
|
|
}
|
|
}
|
|
EOJSON
|
|
fi
|
|
|
|
case "$layerMediaType" in
|
|
application/vnd.oci.image.layer.v1.tar+gzip | application/vnd.docker.image.rootfs.diff.tar.gzip)
|
|
local layerTar="$layerId/layer.tar"
|
|
layerFiles=("${layerFiles[@]}" "$layerTar")
|
|
# TODO figure out why "-C -" doesn't work here
|
|
# "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume."
|
|
# "HTTP/1.1 416 Requested Range Not Satisfiable"
|
|
if [ -f "$dir/$layerTar" ]; then
|
|
# TODO hackpatch for no -C support :'(
|
|
echo "skipping existing ${layerId:0:12}"
|
|
continue
|
|
fi
|
|
local token
|
|
token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')"
|
|
fetch_blob "$token" "$image" "$layerDigest" "$dir/$layerTar" --progress-bar
|
|
;;
|
|
|
|
*)
|
|
echo >&2 "error: unknown layer mediaType ($imageIdentifier, $layerDigest): '$layerMediaType'"
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# change "$imageId" to be the ID of the last layer we added (needed for old-style "repositories" file which is created later -- specifically for older Docker daemons)
|
|
imageId="$layerId"
|
|
|
|
# munge the top layer image manifest to have the appropriate image configuration for older daemons
|
|
local imageOldConfig
|
|
imageOldConfig="$(jq --raw-output --compact-output '{ id: .id } + if .parent then { parent: .parent } else {} end' "$dir/$imageId/json")"
|
|
jq --raw-output "$imageOldConfig + del(.history, .rootfs)" "$dir/$configFile" > "$dir/$imageId/json"
|
|
|
|
local manifestJsonEntry
|
|
manifestJsonEntry="$(
|
|
echo '{}' | jq --raw-output --compact-output '. + {
|
|
Config: "'"$configFile"'",
|
|
RepoTags: ["'"${image#library\/}:$tag"'"],
|
|
Layers: '"$(echo '[]' | jq --raw-output --compact-output ".$(for layerFile in "${layerFiles[@]}"; do echo " + [ \"$layerFile\" ]"; done)")"'
|
|
}'
|
|
)"
|
|
manifestJsonEntries=("${manifestJsonEntries[@]}" "$manifestJsonEntry")
|
|
}
|
|
|
|
get_target_arch() {
|
|
if [ -n "${TARGETARCH:-}" ]; then
|
|
echo "${TARGETARCH}"
|
|
return 0
|
|
fi
|
|
|
|
if type go > /dev/null; then
|
|
go env GOARCH
|
|
return 0
|
|
fi
|
|
|
|
if type dpkg > /dev/null; then
|
|
debArch="$(dpkg --print-architecture)"
|
|
case "${debArch}" in
|
|
armel | armhf)
|
|
echo "arm"
|
|
return 0
|
|
;;
|
|
*64el)
|
|
echo "${debArch%el}le"
|
|
return 0
|
|
;;
|
|
*)
|
|
echo "${debArch}"
|
|
return 0
|
|
;;
|
|
esac
|
|
fi
|
|
|
|
if type uname > /dev/null; then
|
|
uArch="$(uname -m)"
|
|
case "${uArch}" in
|
|
x86_64)
|
|
echo amd64
|
|
return 0
|
|
;;
|
|
arm | armv[0-9]*)
|
|
echo arm
|
|
return 0
|
|
;;
|
|
aarch64)
|
|
echo arm64
|
|
return 0
|
|
;;
|
|
mips*)
|
|
echo >&2 "I see you are running on mips but I don't know how to determine endianness yet, so I cannot select a correct arch to fetch."
|
|
echo >&2 "Consider installing \"go\" on the system which I can use to determine the correct arch or specify it explicitly by setting TARGETARCH"
|
|
exit 1
|
|
;;
|
|
*)
|
|
echo "${uArch}"
|
|
return 0
|
|
;;
|
|
esac
|
|
|
|
fi
|
|
|
|
# default value
|
|
echo >&2 "Unable to determine CPU arch, falling back to amd64. You can specify a target arch by setting TARGETARCH"
|
|
echo amd64
|
|
}
|
|
|
|
get_target_variant() {
|
|
echo "${TARGETVARIANT:-}"
|
|
}
|
|
|
|
while [ $# -gt 0 ]; do
|
|
imageTag="$1"
|
|
shift
|
|
image="${imageTag%%[:@]*}"
|
|
imageTag="${imageTag#*:}"
|
|
digest="${imageTag##*@}"
|
|
tag="${imageTag%%@*}"
|
|
|
|
# add prefix library if passed official image
|
|
if [[ "$image" != *"/"* ]]; then
|
|
image="library/$image"
|
|
fi
|
|
|
|
imageFile="${image//\//_}" # "/" can't be in filenames :)
|
|
|
|
token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')"
|
|
|
|
manifestJson="$(
|
|
curl -fsSL \
|
|
-H "Authorization: Bearer $token" \
|
|
-H 'Accept: application/vnd.oci.image.manifest.v1+json' \
|
|
-H 'Accept: application/vnd.oci.image.index.v1+json' \
|
|
-H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \
|
|
-H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \
|
|
-H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \
|
|
"$registryBase/v2/$image/manifests/$digest"
|
|
)"
|
|
if [ "${manifestJson:0:1}" != '{' ]; then
|
|
echo >&2 "error: /v2/$image/manifests/$digest returned something unexpected:"
|
|
echo >&2 " $manifestJson"
|
|
exit 1
|
|
fi
|
|
|
|
imageIdentifier="$image:$tag@$digest"
|
|
|
|
schemaVersion="$(echo "$manifestJson" | jq --raw-output '.schemaVersion')"
|
|
case "$schemaVersion" in
|
|
2)
|
|
mediaType="$(echo "$manifestJson" | jq --raw-output '.mediaType')"
|
|
|
|
case "$mediaType" in
|
|
application/vnd.oci.image.manifest.v1+json | application/vnd.docker.distribution.manifest.v2+json)
|
|
handle_single_manifest_v2 "$manifestJson"
|
|
;;
|
|
application/vnd.oci.image.index.v1+json | application/vnd.docker.distribution.manifest.list.v2+json)
|
|
layersFs="$(echo "$manifestJson" | jq --raw-output --compact-output '.manifests[]')"
|
|
IFS="$newlineIFS"
|
|
mapfile -t layers <<< "$layersFs"
|
|
unset IFS
|
|
|
|
found=""
|
|
targetArch="$(get_target_arch)"
|
|
targetVariant="$(get_target_variant)"
|
|
# parse first level multi-arch manifest
|
|
for i in "${!layers[@]}"; do
|
|
layerMeta="${layers[$i]}"
|
|
maniArch="$(echo "$layerMeta" | jq --raw-output '.platform.architecture')"
|
|
maniVariant="$(echo "$layerMeta" | jq --raw-output '.platform.variant')"
|
|
if [[ "$maniArch" = "${targetArch}" ]] && [[ -z "${targetVariant}" || "$maniVariant" = "${targetVariant}" ]]; then
|
|
digest="$(echo "$layerMeta" | jq --raw-output '.digest')"
|
|
# get second level single manifest
|
|
submanifestJson="$(
|
|
curl -fsSL \
|
|
-H "Authorization: Bearer $token" \
|
|
-H 'Accept: application/vnd.oci.image.manifest.v1+json' \
|
|
-H 'Accept: application/vnd.oci.image.index.v1+json' \
|
|
-H 'Accept: application/vnd.docker.distribution.manifest.v2+json' \
|
|
-H 'Accept: application/vnd.docker.distribution.manifest.list.v2+json' \
|
|
-H 'Accept: application/vnd.docker.distribution.manifest.v1+json' \
|
|
"$registryBase/v2/$image/manifests/$digest"
|
|
)"
|
|
handle_single_manifest_v2 "$submanifestJson"
|
|
found="found"
|
|
break
|
|
fi
|
|
done
|
|
if [ -z "$found" ]; then
|
|
echo >&2 "error: manifest for ${targetArch}${targetVariant:+/${targetVariant}} is not found"
|
|
exit 1
|
|
fi
|
|
;;
|
|
*)
|
|
echo >&2 "error: unknown manifest mediaType ($imageIdentifier): '$mediaType'"
|
|
exit 1
|
|
;;
|
|
esac
|
|
;;
|
|
|
|
1)
|
|
if [ -z "$doNotGenerateManifestJson" ]; then
|
|
echo >&2 "warning: '$imageIdentifier' uses schemaVersion '$schemaVersion'"
|
|
echo >&2 " this script cannot (currently) recreate the 'image config' to put in a 'manifest.json' (thus any schemaVersion 2+ images will be imported in the old way, and their 'docker history' will suffer)"
|
|
echo >&2
|
|
doNotGenerateManifestJson=1
|
|
fi
|
|
|
|
layersFs="$(echo "$manifestJson" | jq --raw-output '.fsLayers | .[] | .blobSum')"
|
|
IFS="$newlineIFS"
|
|
mapfile -t layers <<< "$layersFs"
|
|
unset IFS
|
|
|
|
history="$(echo "$manifestJson" | jq '.history | [.[] | .v1Compatibility]')"
|
|
imageId="$(echo "$history" | jq --raw-output '.[0]' | jq --raw-output '.id')"
|
|
|
|
echo "Downloading '$imageIdentifier' (${#layers[@]} layers)..."
|
|
for i in "${!layers[@]}"; do
|
|
imageJson="$(echo "$history" | jq --raw-output ".[${i}]")"
|
|
layerId="$(echo "$imageJson" | jq --raw-output '.id')"
|
|
imageLayer="${layers[$i]}"
|
|
|
|
mkdir -p "$dir/$layerId"
|
|
echo '1.0' > "$dir/$layerId/VERSION"
|
|
|
|
echo "$imageJson" > "$dir/$layerId/json"
|
|
|
|
# TODO figure out why "-C -" doesn't work here
|
|
# "curl: (33) HTTP server doesn't seem to support byte ranges. Cannot resume."
|
|
# "HTTP/1.1 416 Requested Range Not Satisfiable"
|
|
if [ -f "$dir/$layerId/layer.tar" ]; then
|
|
# TODO hackpatch for no -C support :'(
|
|
echo "skipping existing ${layerId:0:12}"
|
|
continue
|
|
fi
|
|
token="$(curl -fsSL "$authBase/token?service=$authService&scope=repository:$image:pull" | jq --raw-output '.token')"
|
|
fetch_blob "$token" "$image" "$imageLayer" "$dir/$layerId/layer.tar" --progress-bar
|
|
done
|
|
;;
|
|
|
|
*)
|
|
echo >&2 "error: unknown manifest schemaVersion ($imageIdentifier): '$schemaVersion'"
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
echo
|
|
|
|
if [ -s "$dir/tags-$imageFile.tmp" ]; then
|
|
echo -n ', ' >> "$dir/tags-$imageFile.tmp"
|
|
else
|
|
images=("${images[@]}" "$image")
|
|
fi
|
|
echo -n '"'"$tag"'": "'"$imageId"'"' >> "$dir/tags-$imageFile.tmp"
|
|
done
|
|
|
|
echo -n '{' > "$dir/repositories"
|
|
firstImage=1
|
|
for image in "${images[@]}"; do
|
|
imageFile="${image//\//_}" # "/" can't be in filenames :)
|
|
image="${image#library\/}"
|
|
|
|
[ "$firstImage" ] || echo -n ',' >> "$dir/repositories"
|
|
firstImage=
|
|
echo -n $'\n\t' >> "$dir/repositories"
|
|
echo -n '"'"$image"'": { '"$(cat "$dir/tags-$imageFile.tmp")"' }' >> "$dir/repositories"
|
|
done
|
|
echo -n $'\n}\n' >> "$dir/repositories"
|
|
|
|
rm -f "$dir"/tags-*.tmp
|
|
|
|
if [ -z "$doNotGenerateManifestJson" ] && [ "${#manifestJsonEntries[@]}" -gt 0 ]; then
|
|
echo '[]' | jq --raw-output ".$(for entry in "${manifestJsonEntries[@]}"; do echo " + [ $entry ]"; done)" > "$dir/manifest.json"
|
|
else
|
|
rm -f "$dir/manifest.json"
|
|
fi
|
|
|
|
echo "Download of images into '$dir' complete."
|
|
echo "Use something like the following to load the result into a Docker daemon:"
|
|
echo " tar -cC '$dir' . | docker load"
|