1 files changed, 194 insertions, 207 deletions
diff --git a/data/datasource.go b/data/datasource.go
index fe3f0877..27260ac1 100644
--- a/data/datasource.go
+++ b/data/datasource.go
@@ -2,98 +2,43 @@ package data
 
 import (
 	"context"
+	"encoding/json"
 	"fmt"
+	"io"
 	"io/fs"
-	"mime"
 	"net/http"
 	"net/url"
-	"path/filepath"
+	"runtime"
 	"sort"
 	"strings"
 
+	"github.com/hairyhenderson/go-fsimpl"
 	"github.com/hairyhenderson/gomplate/v4/internal/config"
 	"github.com/hairyhenderson/gomplate/v4/internal/datafs"
-	"github.com/hairyhenderson/gomplate/v4/libkv"
-	"github.com/hairyhenderson/gomplate/v4/vault"
+	"github.com/hairyhenderson/gomplate/v4/internal/parsers"
+	"github.com/hairyhenderson/gomplate/v4/internal/urlhelpers"
 )
 
-func regExtension(ext, typ string) {
-	err := mime.AddExtensionType(ext, typ)
-	if err != nil {
-		panic(err)
-	}
-}
-
-func init() {
-	// Add some types we want to be able to handle which can be missing by default
-	regExtension(".json", jsonMimetype)
-	regExtension(".yml", yamlMimetype)
-	regExtension(".yaml", yamlMimetype)
-	regExtension(".csv", csvMimetype)
-	regExtension(".toml", tomlMimetype)
-	regExtension(".env", envMimetype)
-	regExtension(".cue", cueMimetype)
-}
-
-// registerReaders registers the source-reader functions
-func (d *Data) registerReaders() {
-	d.sourceReaders = make(map[string]func(context.Context, *Source, ...string) ([]byte, error))
-
-	d.sourceReaders["aws+smp"] = readAWSSMP
-	d.sourceReaders["aws+sm"] = readAWSSecretsManager
-	d.sourceReaders["consul"] = readConsul
-	d.sourceReaders["consul+http"] = readConsul
-	d.sourceReaders["consul+https"] = readConsul
-	d.sourceReaders["env"] = readEnv
-	d.sourceReaders["file"] = readFile
-	d.sourceReaders["http"] = readHTTP
-	d.sourceReaders["https"] = readHTTP
-	d.sourceReaders["merge"] = d.readMerge
-	d.sourceReaders["stdin"] = readStdin
-	d.sourceReaders["vault"] = readVault
-	d.sourceReaders["vault+http"] = readVault
-	d.sourceReaders["vault+https"] = readVault
-	d.sourceReaders["s3"] = readBlob
-	d.sourceReaders["gs"] = readBlob
-	d.sourceReaders["git"] = readGit
-	d.sourceReaders["git+file"] = readGit
-	d.sourceReaders["git+http"] = readGit
-	d.sourceReaders["git+https"] = readGit
-	d.sourceReaders["git+ssh"] = readGit
-}
-
-// lookupReader - return the reader function for the given scheme. Empty scheme
-// will return the file reader.
-func (d *Data) lookupReader(scheme string) (func(context.Context, *Source, ...string) ([]byte, error), error) {
-	if d.sourceReaders == nil {
-		d.registerReaders()
-	}
-	if scheme == "" {
-		scheme = "file"
-	}
-
-	r, ok := d.sourceReaders[scheme]
-	if !ok {
-		return nil, fmt.Errorf("scheme %s not registered", scheme)
-	}
-	return r, nil
-}
-
 // Data -
 //
 // Deprecated: will be replaced in future
 type Data struct {
 	Ctx context.Context
 
+	// TODO: remove this before 4.0
 	Sources map[string]*Source
 
-	sourceReaders map[string]func(context.Context, *Source, ...string) ([]byte, error)
-	cache         map[string][]byte
+	cache map[string]*fileContent
 
 	// headers from the --datasource-header/-H option that don't reference datasources from the commandline
 	ExtraHeaders map[string]http.Header
 }
 
+type fileContent struct {
+	contentType string
+	b           []byte
+}
+
 // Cleanup - clean up datasources before shutting the process down - things
 // like Logging out happen here
 func (d *Data) Cleanup() {
@@ -119,7 +64,7 @@ func NewData(datasourceArgs, headerArgs []string) (*Data, error) {
 func FromConfig(ctx context.Context, cfg *config.Config) *Data {
 	// XXX: This is temporary, and will be replaced with something a bit cleaner
 	// when datasources are refactored
-	ctx = ContextWithStdin(ctx, cfg.Stdin)
+	ctx = datafs.ContextWithStdin(ctx, cfg.Stdin)
 
 	sources := map[string]*Source{}
 	for alias, d := range cfg.DataSources {
@@ -147,89 +92,17 @@ func FromConfig(ctx context.Context, cfg *config.Config) *Data {
 //
 // Deprecated: will be replaced in future
 type Source struct {
-	Alias             string
-	URL               *url.URL
-	Header            http.Header             // used for http[s]: URLs, nil otherwise
-	fs                fs.FS                   // used for file: URLs, nil otherwise
-	hc                *http.Client            // used for http[s]: URLs, nil otherwise
-	vc                *vault.Vault            // used for vault: URLs, nil otherwise
-	kv                *libkv.LibKV            // used for consul:, etcd:, zookeeper: URLs, nil otherwise
-	asmpg             awssmpGetter            // used for aws+smp:, nil otherwise
-	awsSecretsManager awsSecretsManagerGetter // used for aws+sm, nil otherwise
-	mediaType         string
-}
-
-func (s *Source) inherit(parent *Source) {
-	s.fs = parent.fs
-	s.hc = parent.hc
-	s.vc = parent.vc
-	s.kv = parent.kv
-	s.asmpg = parent.asmpg
+	Alias     string
+	URL       *url.URL
+	Header    http.Header // used for http[s]: URLs, nil otherwise
+	mediaType string
 }
 
+// Deprecated: no-op
 func (s *Source) cleanup() {
-	if s.vc != nil {
-		s.vc.Logout()
-	}
-	if s.kv != nil {
-		s.kv.Logout()
-	}
-}
-
-// mimeType returns the MIME type to use as a hint for parsing the datasource.
-// It's expected that the datasource will have already been read before
-// this function is called, and so the Source's Type property may be already set.
-//
-// The MIME type is determined by these rules:
-// 1. the 'type' URL query parameter is used if present
-// 2. otherwise, the Type property on the Source is used, if present
-// 3. otherwise, a MIME type is calculated from the file extension, if the extension is registered
-// 4. otherwise, the default type of 'text/plain' is used
-func (s *Source) mimeType(arg string) (mimeType string, err error) {
-	if len(arg) > 0 {
-		if strings.HasPrefix(arg, "//") {
-			arg = arg[1:]
-		}
-		if !strings.HasPrefix(arg, "/") {
-			arg = "/" + arg
-		}
-	}
-	argURL, err := url.Parse(arg)
-	if err != nil {
-		return "", fmt.Errorf("mimeType: couldn't parse arg %q: %w", arg, err)
-	}
-	mediatype := argURL.Query().Get("type")
-	if mediatype == "" {
-		mediatype = s.URL.Query().Get("type")
-	}
-
-	if mediatype == "" {
-		mediatype = s.mediaType
-	}
-
-	// make it so + doesn't need to be escaped
-	mediatype = strings.ReplaceAll(mediatype, " ", "+")
-
-	if mediatype == "" {
-		ext := filepath.Ext(argURL.Path)
-		mediatype = mime.TypeByExtension(ext)
-	}
-
-	if mediatype == "" {
-		ext := filepath.Ext(s.URL.Path)
-		mediatype = mime.TypeByExtension(ext)
-	}
-
-	if mediatype != "" {
-		t, _, err := mime.ParseMediaType(mediatype)
-		if err != nil {
-			return "", fmt.Errorf("MIME type was %q: %w", mediatype, err)
-		}
-		mediatype = t
-		return mediatype, nil
-	}
-
-	return textMimetype, nil
+	// if s.kv != nil {
+	// 	s.kv.Logout()
+	// }
 }
 
 // String is the method to format the flag's value, part of the flag.Value interface.
@@ -246,7 +119,7 @@ func (d *Data) DefineDatasource(alias, value string) (string, error) {
 	if d.DatasourceExists(alias) {
 		return "", nil
 	}
-	srcURL, err := datafs.ParseSourceURL(value)
+	srcURL, err := urlhelpers.ParseSourceURL(value)
 	if err != nil {
 		return "", err
 	}
@@ -288,73 +161,37 @@ func (d *Data) lookupSource(alias string) (*Source, error) {
 	return source, nil
 }
 
-func (d *Data) readDataSource(ctx context.Context, alias string, args ...string) (data, mimeType string, err error) {
+func (d *Data) readDataSource(ctx context.Context, alias string, args ...string) (*fileContent, error) {
 	source, err := d.lookupSource(alias)
 	if err != nil {
-		return "", "", err
+		return nil, err
 	}
-	b, err := d.readSource(ctx, source, args...)
+	fc, err := d.readSource(ctx, source, args...)
 	if err != nil {
-		return "", "", fmt.Errorf("couldn't read datasource '%s': %w", alias, err)
+		return nil, fmt.Errorf("couldn't read datasource '%s': %w", alias, err)
 	}
 
-	subpath := ""
-	if len(args) > 0 {
-		subpath = args[0]
-	}
-	mimeType, err = source.mimeType(subpath)
-	if err != nil {
-		return "", "", err
-	}
-	return string(b), mimeType, nil
+	return fc, nil
 }
 
 // Include -
 func (d *Data) Include(alias string, args ...string) (string, error) {
-	data, _, err := d.readDataSource(d.Ctx, alias, args...)
-	return data, err
+	fc, err := d.readDataSource(d.Ctx, alias, args...)
+	if err != nil {
+		return "", err
+	}
+
+	return string(fc.b), err
 }
 
 // Datasource -
 func (d *Data) Datasource(alias string, args ...string) (interface{}, error) {
-	data, mimeType, err := d.readDataSource(d.Ctx, alias, args...)
+	fc, err := d.readDataSource(d.Ctx, alias, args...)
 	if err != nil {
 		return nil, err
 	}
 
-	return parseData(mimeType, data)
-}
-
-func parseData(mimeType, s string) (out interface{}, err error) {
-	switch mimeAlias(mimeType) {
-	case jsonMimetype:
-		out, err = JSON(s)
-		if err != nil {
-			// maybe it's a JSON array
-			out, err = JSONArray(s)
-		}
-	case jsonArrayMimetype:
-		out, err = JSONArray(s)
-	case yamlMimetype:
-		out, err = YAML(s)
-		if err != nil {
-			// maybe it's a YAML array
-			out, err = YAMLArray(s)
-		}
-	case csvMimetype:
-		out, err = CSV(s)
-	case tomlMimetype:
-		out, err = TOML(s)
-	case envMimetype:
-		out, err = dotEnv(s)
-	case textMimetype:
-		out = s
-	case cueMimetype:
-		out, err = CUE(s)
-	default:
-		return nil, fmt.Errorf("datasources of type %s not yet supported", mimeType)
-	}
-	return out, err
+	return parsers.ParseData(fc.contentType, string(fc.b))
 }
 
 // DatasourceReachable - Determines if the named datasource is reachable with
@@ -370,9 +207,9 @@ func (d *Data) DatasourceReachable(alias string, args ...string) bool {
 
 // readSource returns the (possibly cached) data from the given source,
 // as referenced by the given args
-func (d *Data) readSource(ctx context.Context, source *Source, args ...string) ([]byte, error) {
+func (d *Data) readSource(ctx context.Context, source *Source, args ...string) (*fileContent, error) {
 	if d.cache == nil {
-		d.cache = make(map[string][]byte)
+		d.cache = make(map[string]*fileContent)
 	}
 	cacheKey := source.Alias
 	for _, v := range args {
@@ -382,16 +219,107 @@ func (d *Data) readSource(ctx context.Context, source *Source, args ...string) (
 	if ok {
 		return cached, nil
 	}
-	r, err := d.lookupReader(source.URL.Scheme)
-	if err != nil {
-		return nil, fmt.Errorf("Datasource not yet supported")
+
+	arg := ""
+	if len(args) > 0 {
+		arg = args[0]
 	}
-	data, err := r(ctx, source, args...)
+	u, err := resolveURL(source.URL, arg)
 	if err != nil {
 		return nil, err
 	}
-	d.cache[cacheKey] = data
-	return data, nil
+
+	fc, err := d.readFileContent(ctx, u, source.Header)
+	if err != nil {
+		return nil, fmt.Errorf("reading %s: %w", u, err)
+	}
+	d.cache[cacheKey] = fc
+	return fc, nil
+}
+
+// readFileContent returns content from the given URL
+func (d Data) readFileContent(ctx context.Context, u *url.URL, hdr http.Header) (*fileContent, error) {
+	fsys, err := datafs.FSysForPath(ctx, u.String())
+	if err != nil {
+		return nil, fmt.Errorf("fsys for path %v: %w", u, err)
+	}
+
+	u, fname := datafs.SplitFSMuxURL(u)
+
+	// need to support absolute paths on local filesystem too
+	// TODO: this is a hack, probably fix this?
+	if u.Scheme == "file" && runtime.GOOS != "windows" {
+		fname = u.Path + fname
+	}
+
+	fsys = fsimpl.WithContextFS(ctx, fsys)
+	fsys = fsimpl.WithHeaderFS(hdr, fsys)
+
+	// convert d.Sources to a map[string]config.DataSources
+	// TODO: remove this when d.Sources is removed
+	ds := make(map[string]config.DataSource)
+	for k, v := range d.Sources {
+		ds[k] = config.DataSource{
+			URL:    v.URL,
+			Header: v.Header,
+		}
+	}
+
+	fsys = datafs.WithDataSourcesFS(ds, fsys)
+
+	f, err := fsys.Open(fname)
+	if err != nil {
+		return nil, fmt.Errorf("open (url: %q, name: %q): %w", u, fname, err)
+	}
+	defer f.Close()
+
+	fi, err := f.Stat()
+	if err != nil {
+		return nil, fmt.Errorf("stat (url: %q, name: %q): %w", u, fname, err)
+	}
+
+	// possible type hint in the type query param. Contrary to spec, we allow
+	// unescaped '+' characters to make it simpler to provide types like
+	// "application/array+json"
+	mimeType := u.Query().Get("type")
+	mimeType = strings.ReplaceAll(mimeType, " ", "+")
+
+	if mimeType == "" {
+		mimeType = fsimpl.ContentType(fi)
+	}
+
+	var data []byte
+
+	if fi.IsDir() {
+		var dirents []fs.DirEntry
+		dirents, err = fs.ReadDir(fsys, fname)
+		if err != nil {
+			return nil, fmt.Errorf("readDir (url: %q, name: %s): %w", u, fname, err)
+		}
+
+		entries := make([]string, len(dirents))
+		for i, e := range dirents {
+			entries[i] = e.Name()
+		}
+		data, err = json.Marshal(entries)
+		if err != nil {
+			return nil, fmt.Errorf("json.Marshal: %w", err)
+		}
+
+		mimeType = jsonArrayMimetype
+	} else {
+		data, err = io.ReadAll(f)
+		if err != nil {
+			return nil, fmt.Errorf("read (url: %q, name: %s): %w", u, fname, err)
+		}
+	}
+
+	if mimeType == "" {
+		// default to text/plain
+		mimeType = textMimetype
+	}
+
+	return &fileContent{contentType: mimeType, b: data}, nil
 }
 
 // Show all datasources  -
@@ -403,3 +331,62 @@ func (d *Data) ListDatasources() []string {
 	sort.Strings(datasources)
 	return datasources
 }
+
+// resolveURL parses the relative URL rel against base, and returns the
+// resolved URL. Differs from url.ResolveReference in that query parameters are
+// added. In case of duplicates, params from rel are used.
+func resolveURL(base *url.URL, rel string) (*url.URL, error) {
+	// if there's an opaque part, there's no resolving to do - just return the
+	// base URL
+	if base.Opaque != "" {
+		return base, nil
+	}
+
+	// git URLs are special - they have double-slashes that separate a repo
+	// from a path in the repo. A missing double-slash means the path is the
+	// root.
+	switch base.Scheme {
+	case "git", "git+file", "git+http", "git+https", "git+ssh":
+		if strings.Contains(base.Path, "//") && strings.Contains(rel, "//") {
+			return nil, fmt.Errorf("both base URL and subpath contain '//', which is not allowed in git URLs")
+		}
+
+		// If there's a subpath, the base path must end with '/'. This behaviour
+		// is unique to git URLs - other schemes would instead drop the last
+		// path element and replace with the subpath.
+		if rel != "" && !strings.HasSuffix(base.Path, "/") {
+			base.Path += "/"
+		}
+
+		// If subpath starts with '//', make it relative by prefixing a '.',
+		// otherwise it'll be treated as a schemeless URI and the first part
+		// will be interpreted as a hostname.
+		if strings.HasPrefix(rel, "//") {
+			rel = "." + rel
+		}
+	}
+
+	relURL, err := url.Parse(rel)
+	if err != nil {
+		return nil, err
+	}
+
+	// URL.ResolveReference requires (or assumes, at least) that the base is
+	// absolute. We want to support relative URLs too though, so we need to
+	// correct for that.
+	out := base.ResolveReference(relURL)
+	if out.Scheme == "" && out.Path[0] == '/' {
+		out.Path = out.Path[1:]
+	}
+
+	if base.RawQuery != "" {
+		bq := base.Query()
+		rq := relURL.Query()
+		for k := range rq {
+			bq.Set(k, rq.Get(k))
+		}
+		out.RawQuery = bq.Encode()
+	}
+
+	return out, nil
+}