summaryrefslogtreecommitdiff
path: root/data/datasource_git.go
diff options
context:
space:
mode:
authorDave Henderson <dhenderson@gmail.com>2024-01-22 09:06:33 -0500
committerGitHub <noreply@github.com>2024-01-22 09:06:33 -0500
commit0ac3aa24bf2e4ada9c26fd9ef5b7f0ae8c6b6cfb (patch)
tree9a95f27eec1e77ef8bfefcb2810f7e41681627a5 /data/datasource_git.go
parentf837061f953bda1e8b42095c6dba0496de11d993 (diff)
Use go-fsimpl to read from datasources (#1336)
* Use go-fsimpl to read from datasources Signed-off-by: Dave Henderson <dhenderson@gmail.com> * trying to fix windows bug Signed-off-by: Dave Henderson <dhenderson@gmail.com> * attempts to fix some of the path madness Signed-off-by: Dave Henderson <dhenderson@gmail.com> * remove 'HOME' from expected env vars Signed-off-by: Dave Henderson <dhenderson@gmail.com> * more tweaks Signed-off-by: Dave Henderson <dhenderson@gmail.com> * lint fix Signed-off-by: Dave Henderson <dhenderson@gmail.com> --------- Signed-off-by: Dave Henderson <dhenderson@gmail.com>
Diffstat (limited to 'data/datasource_git.go')
-rw-r--r--data/datasource_git.go328
1 files changed, 0 insertions, 328 deletions
diff --git a/data/datasource_git.go b/data/datasource_git.go
deleted file mode 100644
index 3859d344..00000000
--- a/data/datasource_git.go
+++ /dev/null
@@ -1,328 +0,0 @@
-package data
-
-import (
- "bytes"
- "context"
- "encoding/json"
- "fmt"
- "io"
- "net/url"
- "os"
- "path"
- "path/filepath"
- "strings"
-
- "github.com/hairyhenderson/gomplate/v4/base64"
- "github.com/hairyhenderson/gomplate/v4/env"
- "github.com/rs/zerolog"
-
- "github.com/go-git/go-billy/v5"
- "github.com/go-git/go-billy/v5/memfs"
- "github.com/go-git/go-git/v5"
- "github.com/go-git/go-git/v5/plumbing"
- "github.com/go-git/go-git/v5/plumbing/transport"
- "github.com/go-git/go-git/v5/plumbing/transport/client"
- "github.com/go-git/go-git/v5/plumbing/transport/http"
- "github.com/go-git/go-git/v5/plumbing/transport/ssh"
- "github.com/go-git/go-git/v5/storage/memory"
-)
-
-func readGit(ctx context.Context, source *Source, args ...string) ([]byte, error) {
- g := gitsource{}
-
- u := source.URL
- repoURL, path, err := g.parseGitPath(u, args...)
- if err != nil {
- return nil, err
- }
-
- depth := 1
- if u.Scheme == "git+file" {
- // we can't do shallow clones for filesystem repos apparently
- depth = 0
- }
-
- fs, _, err := g.clone(ctx, repoURL, depth)
- if err != nil {
- return nil, err
- }
-
- mimeType, out, err := g.read(fs, path)
- if mimeType != "" {
- source.mediaType = mimeType
- }
- return out, err
-}
-
-type gitsource struct{}
-
-func (g gitsource) parseArgURL(arg string) (u *url.URL, err error) {
- if strings.HasPrefix(arg, "//") {
- u, err = url.Parse(arg[1:])
- u.Path = "/" + u.Path
- } else {
- u, err = url.Parse(arg)
- }
-
- if err != nil {
- return nil, fmt.Errorf("failed to parse arg %s: %w", arg, err)
- }
- return u, err
-}
-
-func (g gitsource) parseQuery(orig, arg *url.URL) string {
- q := orig.Query()
- pq := arg.Query()
- for k, vs := range pq {
- for _, v := range vs {
- q.Add(k, v)
- }
- }
- return q.Encode()
-}
-
-func (g gitsource) parseArgPath(u *url.URL, arg string) (repo, p string) {
- // if the source URL already specified a repo and subpath, the whole
- // arg is interpreted as subpath
- if strings.Contains(u.Path, "//") || strings.HasPrefix(arg, "//") {
- return "", arg
- }
-
- parts := strings.SplitN(arg, "//", 2)
- repo = parts[0]
- if len(parts) == 2 {
- p = "/" + parts[1]
- }
- return repo, p
-}
-
-// Massage the URL and args together to produce the repo to clone,
-// and the path to read.
-// The path is delimited from the repo by '//'
-func (g gitsource) parseGitPath(u *url.URL, args ...string) (out *url.URL, p string, err error) {
- if u == nil {
- return nil, "", fmt.Errorf("parseGitPath: no url provided (%v)", u)
- }
- // copy the input url so we can modify it
- out = cloneURL(u)
-
- parts := strings.SplitN(out.Path, "//", 2)
- switch len(parts) {
- case 1:
- p = "/"
- case 2:
- p = "/" + parts[1]
-
- i := strings.LastIndex(out.Path, p)
- out.Path = out.Path[:i-1]
- }
-
- if len(args) > 0 {
- argURL, uerr := g.parseArgURL(args[0])
- if uerr != nil {
- return nil, "", uerr
- }
- repo, argpath := g.parseArgPath(u, argURL.Path)
- out.Path = path.Join(out.Path, repo)
- p = path.Join(p, argpath)
-
- out.RawQuery = g.parseQuery(u, argURL)
-
- if argURL.Fragment != "" {
- out.Fragment = argURL.Fragment
- }
- }
- return out, p, err
-}
-
-//nolint:interfacer
-func cloneURL(u *url.URL) *url.URL {
- out, _ := url.Parse(u.String())
- return out
-}
-
-// refFromURL - extract the ref from the URL fragment if present
-func (g gitsource) refFromURL(u *url.URL) plumbing.ReferenceName {
- switch {
- case strings.HasPrefix(u.Fragment, "refs/"):
- return plumbing.ReferenceName(u.Fragment)
- case u.Fragment != "":
- return plumbing.NewBranchReferenceName(u.Fragment)
- default:
- return plumbing.ReferenceName("")
- }
-}
-
-// refFromRemoteHead - extract the ref from the remote HEAD, to work around
-// hard-coded 'master' default branch in go-git.
-// Should be unnecessary once https://github.com/go-git/go-git/issues/249 is
-// fixed.
-func (g gitsource) refFromRemoteHead(ctx context.Context, u *url.URL, auth transport.AuthMethod) (plumbing.ReferenceName, error) {
- e, err := transport.NewEndpoint(u.String())
- if err != nil {
- return "", err
- }
-
- cli, err := client.NewClient(e)
- if err != nil {
- return "", err
- }
-
- s, err := cli.NewUploadPackSession(e, auth)
- if err != nil {
- return "", err
- }
-
- info, err := s.AdvertisedReferencesContext(ctx)
- if err != nil {
- return "", err
- }
-
- refs, err := info.AllReferences()
- if err != nil {
- return "", err
- }
-
- headRef, ok := refs["HEAD"]
- if !ok {
- return "", fmt.Errorf("no HEAD ref found")
- }
-
- return headRef.Target(), nil
-}
-
-// clone a repo for later reading through http(s), git, or ssh. u must be the URL to the repo
-// itself, and must have any file path stripped
-func (g gitsource) clone(ctx context.Context, repoURL *url.URL, depth int) (billy.Filesystem, *git.Repository, error) {
- fs := memfs.New()
- storer := memory.NewStorage()
-
- // preserve repoURL by cloning it
- u := cloneURL(repoURL)
-
- auth, err := g.auth(u)
- if err != nil {
- return nil, nil, err
- }
-
- if strings.HasPrefix(u.Scheme, "git+") {
- scheme := u.Scheme[len("git+"):]
- u.Scheme = scheme
- }
-
- ref := g.refFromURL(u)
- u.Fragment = ""
- u.RawQuery = ""
-
- // attempt to get the ref from the remote so we don't default to master
- if ref == "" {
- ref, err = g.refFromRemoteHead(ctx, u, auth)
- if err != nil {
- zerolog.Ctx(ctx).Warn().
- Stringer("repoURL", u).
- Err(err).
- Msg("failed to get ref from remote, using default")
- }
- }
-
- opts := &git.CloneOptions{
- URL: u.String(),
- Auth: auth,
- Depth: depth,
- ReferenceName: ref,
- SingleBranch: true,
- Tags: git.NoTags,
- }
- repo, err := git.CloneContext(ctx, storer, fs, opts)
- if u.Scheme == "file" && err == transport.ErrRepositoryNotFound && !strings.HasSuffix(u.Path, ".git") {
- // maybe this has a `.git` subdirectory...
- u = cloneURL(repoURL)
- u.Path = path.Join(u.Path, ".git")
- return g.clone(ctx, u, depth)
- }
- if err != nil {
- return nil, nil, fmt.Errorf("git clone %s: %w", u, err)
- }
- return fs, repo, nil
-}
-
-// read - reads the provided path out of a git repo
-func (g gitsource) read(fsys billy.Filesystem, path string) (string, []byte, error) {
- fi, err := fsys.Stat(path)
- if err != nil {
- return "", nil, fmt.Errorf("can't stat %s: %w", path, err)
- }
- if fi.IsDir() || strings.HasSuffix(path, string(filepath.Separator)) {
- out, rerr := g.readDir(fsys, path)
- return jsonArrayMimetype, out, rerr
- }
-
- f, err := fsys.OpenFile(path, os.O_RDONLY, 0)
- if err != nil {
- return "", nil, fmt.Errorf("can't open %s: %w", path, err)
- }
-
- b, err := io.ReadAll(f)
- if err != nil {
- return "", nil, fmt.Errorf("can't read %s: %w", path, err)
- }
-
- return "", b, nil
-}
-
-func (g gitsource) readDir(fs billy.Filesystem, path string) ([]byte, error) {
- names, err := fs.ReadDir(path)
- if err != nil {
- return nil, fmt.Errorf("couldn't read dir %s: %w", path, err)
- }
- files := make([]string, len(names))
- for i, v := range names {
- files[i] = v.Name()
- }
-
- var buf bytes.Buffer
- enc := json.NewEncoder(&buf)
- if err := enc.Encode(files); err != nil {
- return nil, err
- }
- b := buf.Bytes()
- // chop off the newline added by the json encoder
- return b[:len(b)-1], nil
-}
-
-/*
-auth methods:
-- ssh named key (no password support)
- - GIT_SSH_KEY (base64-encoded) or GIT_SSH_KEY_FILE (base64-encoded, or not)
-
-- ssh agent auth (preferred)
-- http basic auth (for github, gitlab, bitbucket tokens)
-- http token auth (bearer token, somewhat unusual)
-*/
-func (g gitsource) auth(u *url.URL) (auth transport.AuthMethod, err error) {
- user := u.User.Username()
- switch u.Scheme {
- case "git+http", "git+https":
- if pass, ok := u.User.Password(); ok {
- auth = &http.BasicAuth{Username: user, Password: pass}
- } else if pass := env.Getenv("GIT_HTTP_PASSWORD"); pass != "" {
- auth = &http.BasicAuth{Username: user, Password: pass}
- } else if tok := env.Getenv("GIT_HTTP_TOKEN"); tok != "" {
- // note docs on TokenAuth - this is rarely to be used
- auth = &http.TokenAuth{Token: tok}
- }
- case "git+ssh":
- k := env.Getenv("GIT_SSH_KEY")
- if k != "" {
- var key []byte
- key, err = base64.Decode(k)
- if err != nil {
- key = []byte(k)
- }
- auth, err = ssh.NewPublicKeys(user, key, "")
- } else {
- auth, err = ssh.NewSSHAgentAuth(user)
- }
- }
- return auth, err
-}