summaryrefslogtreecommitdiff
path: root/registry-scanner/pkg/registry/client.go
blob: 7a2694606428747b180e2d7b6dbf5a89e001f595 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
package registry

import (
	"context"
	"crypto/sha256"
	"fmt"
	"time"

	"github.com/argoproj/pkg/json"

	"github.com/argoproj-labs/argocd-image-updater/registry-scanner/pkg/log"
	"github.com/argoproj-labs/argocd-image-updater/registry-scanner/pkg/options"
	"github.com/argoproj-labs/argocd-image-updater/registry-scanner/pkg/tag"

	"github.com/distribution/distribution/v3"
	"github.com/distribution/distribution/v3/manifest/manifestlist"
	"github.com/distribution/distribution/v3/manifest/ocischema"
	"github.com/distribution/distribution/v3/manifest/schema1" //nolint:staticcheck
	"github.com/distribution/distribution/v3/manifest/schema2"
	"github.com/distribution/distribution/v3/reference"
	"github.com/distribution/distribution/v3/registry/client"
	"github.com/distribution/distribution/v3/registry/client/auth"
	"github.com/distribution/distribution/v3/registry/client/auth/challenge"
	"github.com/distribution/distribution/v3/registry/client/transport"

	"github.com/opencontainers/go-digest"
	ociv1 "github.com/opencontainers/image-spec/specs-go/v1"

	"go.uber.org/ratelimit"

	"net/http"
	"net/url"
	"strings"
)

// TODO: Check image's architecture and OS

// knownMediaTypes is the list of media types we can process
var knownMediaTypes = []string{
	ocischema.SchemaVersion.MediaType,
	schema1.MediaTypeSignedManifest, //nolint:staticcheck
	schema2.SchemaVersion.MediaType,
	manifestlist.SchemaVersion.MediaType,
	ociv1.MediaTypeImageIndex,
}

// RegistryClient defines the methods we need for querying container registries
type RegistryClient interface {
	NewRepository(nameInRepository string) error
	Tags() ([]string, error)
	ManifestForTag(tagStr string) (distribution.Manifest, error)
	ManifestForDigest(dgst digest.Digest) (distribution.Manifest, error)
	TagMetadata(manifest distribution.Manifest, opts *options.ManifestOptions) (*tag.TagInfo, error)
}

type NewRegistryClient func(*RegistryEndpoint, string, string) (RegistryClient, error)

// Helper type for registry clients
type registryClient struct {
	regClient distribution.Repository
	endpoint  *RegistryEndpoint
	creds     credentials
}

// credentials is an implementation of distribution/V3/session struct
// to manage registry credentials and token
type credentials struct {
	username      string
	password      string
	refreshTokens map[string]string
}

func (c credentials) Basic(url *url.URL) (string, string) {
	return c.username, c.password
}

func (c credentials) RefreshToken(url *url.URL, service string) string {
	return c.refreshTokens[service]
}

func (c credentials) SetRefreshToken(realm *url.URL, service, token string) {
	if c.refreshTokens != nil {
		c.refreshTokens[service] = token
	}
}

// rateLimitTransport encapsulates our custom HTTP round tripper with rate
// limiter from the endpoint.
type rateLimitTransport struct {
	limiter   ratelimit.Limiter
	transport http.RoundTripper
	endpoint  *RegistryEndpoint
}

// RoundTrip is a custom RoundTrip method with rate-limiter
func (rlt *rateLimitTransport) RoundTrip(r *http.Request) (*http.Response, error) {
	rlt.limiter.Take()
	log.Tracef("Performing HTTP %s %s", r.Method, r.URL)
	resp, err := rlt.transport.RoundTrip(r)
	return resp, err
}

// NewRepository is a wrapper for creating a registry client that is possibly
// rate-limited by using a custom HTTP round tripper method.
func (clt *registryClient) NewRepository(nameInRepository string) error {
	urlToCall := strings.TrimSuffix(clt.endpoint.RegistryAPI, "/")
	challengeManager1 := challenge.NewSimpleManager()
	_, err := ping(challengeManager1, clt.endpoint, "")
	if err != nil {
		return err
	}

	authTransport := transport.NewTransport(
		clt.endpoint.GetTransport(), auth.NewAuthorizer(
			challengeManager1,
			auth.NewTokenHandler(clt.endpoint.GetTransport(), clt.creds, nameInRepository, "pull"),
			auth.NewBasicHandler(clt.creds)))

	rlt := &rateLimitTransport{
		limiter:   clt.endpoint.Limiter,
		transport: authTransport,
		endpoint:  clt.endpoint,
	}

	named, err := reference.WithName(nameInRepository)
	if err != nil {
		return err
	}
	clt.regClient, err = client.NewRepository(named, urlToCall, rlt)
	if err != nil {
		return err
	}
	return nil
}

// NewClient returns a new RegistryClient for the given endpoint information
func NewClient(endpoint *RegistryEndpoint, username, password string) (RegistryClient, error) {
	if username == "" && endpoint.Username != "" {
		username = endpoint.Username
	}
	if password == "" && endpoint.Password != "" {
		password = endpoint.Password
	}
	creds := credentials{
		username: username,
		password: password,
	}
	return &registryClient{
		creds:    creds,
		endpoint: endpoint,
	}, nil
}

// Tags returns a list of tags for given name in repository
func (clt *registryClient) Tags() ([]string, error) {
	tagService := clt.regClient.Tags(context.Background())
	tTags, err := tagService.All(context.Background())
	if err != nil {
		return nil, err
	}
	return tTags, nil
}

// Manifest  returns a Manifest for a given tag in repository
func (clt *registryClient) ManifestForTag(tagStr string) (distribution.Manifest, error) {
	manService, err := clt.regClient.Manifests(context.Background())
	if err != nil {
		return nil, err
	}
	manifest, err := manService.Get(
		context.Background(),
		digest.FromString(tagStr),
		distribution.WithTag(tagStr), distribution.WithManifestMediaTypes(knownMediaTypes))
	if err != nil {
		return nil, err
	}
	return manifest, nil
}

// ManifestForDigest  returns a Manifest for a given digest in repository
func (clt *registryClient) ManifestForDigest(dgst digest.Digest) (distribution.Manifest, error) {
	manService, err := clt.regClient.Manifests(context.Background())
	if err != nil {
		return nil, err
	}
	manifest, err := manService.Get(
		context.Background(),
		dgst,
		distribution.WithManifestMediaTypes(knownMediaTypes))
	if err != nil {
		return nil, err
	}
	return manifest, nil
}

// TagMetadata retrieves metadata for a given manifest of given repository
func (client *registryClient) TagMetadata(manifest distribution.Manifest, opts *options.ManifestOptions) (*tag.TagInfo, error) {
	ti := &tag.TagInfo{}
	logCtx := opts.Logger()
	var info struct {
		Arch    string `json:"architecture"`
		Created string `json:"created"`
		OS      string `json:"os"`
		Variant string `json:"variant"`
	}

	// We support the following types of manifests as returned by the registry:
	//
	// V1 (legacy, might go away), V2 and OCI
	//
	// Also ManifestLists (e.g. on multi-arch images) are supported.
	//
	switch deserialized := manifest.(type) {

	case *schema1.SignedManifest: //nolint:staticcheck
		var man schema1.Manifest = deserialized.Manifest //nolint:staticcheck
		if len(man.History) == 0 {
			return nil, fmt.Errorf("no history information found in schema V1")
		}

		_, mBytes, err := manifest.Payload()
		if err != nil {
			return nil, err
		}
		ti.Digest = sha256.Sum256(mBytes)

		logCtx.Tracef("v1 SHA digest is %s", ti.EncodedDigest())
		if err := json.Unmarshal([]byte(man.History[0].V1Compatibility), &info); err != nil {
			return nil, err
		}
		if !opts.WantsPlatform(info.OS, info.Arch, "") {
			logCtx.Debugf("ignoring v1 manifest %v. Manifest platform: %s, requested: %s",
				ti.EncodedDigest(), options.PlatformKey(info.OS, info.Arch, info.Variant), strings.Join(opts.Platforms(), ","))
			return nil, nil
		}
		if createdAt, err := time.Parse(time.RFC3339Nano, info.Created); err != nil {
			return nil, err
		} else {
			ti.CreatedAt = createdAt
		}
		return ti, nil

	case *manifestlist.DeserializedManifestList:
		var list manifestlist.DeserializedManifestList = *deserialized

		// List must contain at least one image manifest
		if len(list.Manifests) == 0 {
			return nil, fmt.Errorf("empty manifestlist not supported")
		}

		// We use the SHA from the manifest list to let the container engine
		// decide which image to pull, in case of multi-arch clusters.
		_, mBytes, err := list.Payload()
		if err != nil {
			return nil, fmt.Errorf("could not retrieve manifestlist payload: %v", err)
		}
		ti.Digest = sha256.Sum256(mBytes)

		logCtx.Tracef("SHA256 of manifest parent is %v", ti.EncodedDigest())

		return TagInfoFromReferences(client, opts, logCtx, ti, list.References())

	case *ocischema.DeserializedImageIndex:
		var index ocischema.DeserializedImageIndex = *deserialized

		// Index must contain at least one image manifest
		if len(index.Manifests) == 0 {
			return nil, fmt.Errorf("empty index not supported")
		}

		// We use the SHA from the manifest index to let the container engine
		// decide which image to pull, in case of multi-arch clusters.
		_, mBytes, err := index.Payload()
		if err != nil {
			return nil, fmt.Errorf("could not retrieve index payload: %v", err)
		}
		ti.Digest = sha256.Sum256(mBytes)

		logCtx.Tracef("SHA256 of manifest parent is %v", ti.EncodedDigest())

		return TagInfoFromReferences(client, opts, logCtx, ti, index.References())

	case *schema2.DeserializedManifest:
		var man schema2.Manifest = deserialized.Manifest

		logCtx.Tracef("Manifest digest is %v", man.Config.Digest.Encoded())

		_, mBytes, err := manifest.Payload()
		if err != nil {
			return nil, err
		}
		ti.Digest = sha256.Sum256(mBytes)
		logCtx.Tracef("v2 SHA digest is %s", ti.EncodedDigest())

		// The data we require from a V2 manifest is in a blob that we need to
		// fetch from the registry.
		blobReader, err := client.regClient.Blobs(context.Background()).Get(context.Background(), man.Config.Digest)
		if err != nil {
			return nil, err
		}

		if err := json.Unmarshal(blobReader, &info); err != nil {
			return nil, err
		}

		if !opts.WantsPlatform(info.OS, info.Arch, info.Variant) {
			logCtx.Debugf("ignoring v2 manifest %v. Manifest platform: %s, requested: %s",
				ti.EncodedDigest(), options.PlatformKey(info.OS, info.Arch, info.Variant), strings.Join(opts.Platforms(), ","))
			return nil, nil
		}

		if ti.CreatedAt, err = time.Parse(time.RFC3339Nano, info.Created); err != nil {
			return nil, err
		}

		return ti, nil
	case *ocischema.DeserializedManifest:
		var man ocischema.Manifest = deserialized.Manifest

		_, mBytes, err := manifest.Payload()
		if err != nil {
			return nil, err
		}
		ti.Digest = sha256.Sum256(mBytes)
		logCtx.Tracef("OCI SHA digest is %s", ti.EncodedDigest())

		// The data we require from a V2 manifest is in a blob that we need to
		// fetch from the registry.
		blobReader, err := client.regClient.Blobs(context.Background()).Get(context.Background(), man.Config.Digest)
		if err != nil {
			return nil, err
		}

		if err := json.Unmarshal(blobReader, &info); err != nil {
			return nil, err
		}

		if !opts.WantsPlatform(info.OS, info.Arch, info.Variant) {
			logCtx.Debugf("ignoring OCI manifest %v. Manifest platform: %s, requested: %s",
				ti.EncodedDigest(), options.PlatformKey(info.OS, info.Arch, info.Variant), strings.Join(opts.Platforms(), ","))
			return nil, nil
		}

		if ti.CreatedAt, err = time.Parse(time.RFC3339Nano, info.Created); err != nil {
			return nil, err
		}

		return ti, nil
	default:
		return nil, fmt.Errorf("invalid manifest type %T", manifest)
	}
}

// TagInfoFromReferences is a helper method to retrieve metadata for a given
// list of references. It will return the most recent pushed manifest from the
// list of references.
func TagInfoFromReferences(client *registryClient, opts *options.ManifestOptions, logCtx *log.LogContext, ti *tag.TagInfo, references []distribution.Descriptor) (*tag.TagInfo, error) {
	var ml []distribution.Descriptor
	platforms := []string{}

	for _, ref := range references {
		var refOS, refArch, refVariant string
		if ref.Platform != nil {
			refOS = ref.Platform.OS
			refArch = ref.Platform.Architecture
			refVariant = ref.Platform.Variant
		}
		platform1 := options.PlatformKey(refOS, refArch, refVariant)
		platforms = append(platforms, platform1)
		logCtx.Tracef("Found %s", platform1)
		if !opts.WantsPlatform(refOS, refArch, refVariant) {
			logCtx.Tracef("Ignoring referenced manifest %v because platform %s does not match any of: %s",
				ref.Digest,
				platform1,
				strings.Join(opts.Platforms(), ","))
			continue
		}
		ml = append(ml, ref)
	}

	// We need at least one reference that matches requested platforms
	if len(ml) == 0 {
		logCtx.Debugf("Manifest list did not contain any usable reference. Platforms requested: (%s), platforms included: (%s)",
			strings.Join(opts.Platforms(), ","), strings.Join(platforms, ","))
		return nil, nil
	}

	// For some strategies, we do not need to fetch metadata for further
	// processing.
	if !opts.WantsMetadata() {
		return ti, nil
	}

	// Loop through all referenced manifests to get their metadata. We only
	// consider manifests for platforms we are interested in.
	for _, ref := range ml {
		logCtx.Tracef("Inspecting metadata of reference: %v", ref.Digest)

		man, err := client.ManifestForDigest(ref.Digest)
		if err != nil {
			return nil, fmt.Errorf("could not fetch manifest %v: %v", ref.Digest, err)
		}

		cti, err := client.TagMetadata(man, opts)
		if err != nil {
			return nil, fmt.Errorf("could not fetch metadata for manifest %v: %v", ref.Digest, err)
		}

		// We save the timestamp of the most recent pushed manifest for any
		// given reference, if the metadata for the tag was correctly
		// retrieved. This is important for the latest update strategy to
		// be able to handle multi-arch images. The latest strategy will
		// consider the most recent reference from an image index.
		if cti != nil {
			if cti.CreatedAt.After(ti.CreatedAt) {
				ti.CreatedAt = cti.CreatedAt
			}
		} else {
			logCtx.Warnf("returned metadata for manifest %v is nil, this should not happen.", ref.Digest)
			continue
		}
	}

	return ti, nil
}

// Implementation of ping method to initialize the challenge list
// Without this, tokenHandler and AuthorizationHandler won't work
func ping(manager challenge.Manager, endpoint *RegistryEndpoint, versionHeader string) ([]auth.APIVersion, error) {
	httpc := &http.Client{Transport: endpoint.GetTransport()}
	url := endpoint.RegistryAPI + "/v2/"
	resp, err := httpc.Get(url)
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()
	// Let's consider only HTTP 200 and 401 valid responses for the initial request
	if resp.StatusCode != 200 && resp.StatusCode != 401 {
		return nil, fmt.Errorf("endpoint %s does not seem to be a valid v2 Docker Registry API (received HTTP code %d for GET %s)", endpoint.RegistryAPI, resp.StatusCode, url)
	}

	if err := manager.AddResponse(resp); err != nil {
		return nil, err
	}

	return auth.APIVersions(resp, versionHeader), err
}