1 files changed, 226 insertions, 0 deletions
diff --git a/internal/datafs/reader.go b/internal/datafs/reader.go
new file mode 100644
index 00000000..f1af07cf
--- /dev/null
+++ b/internal/datafs/reader.go
@@ -0,0 +1,226 @@
+package datafs
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"io/fs"
+	"net/http"
+	"net/url"
+	"runtime"
+	"strings"
+
+	"github.com/hairyhenderson/go-fsimpl"
+	"github.com/hairyhenderson/gomplate/v4/internal/config"
+	"github.com/hairyhenderson/gomplate/v4/internal/iohelpers"
+)
+
+// DataSourceReader reads content from a datasource
+type DataSourceReader interface {
+	// ReadSource reads the content of a datasource, given an alias and optional
+	// arguments. If the datasource is not found, the alias is interpreted as a
+	// URL. If the alias is not a valid URL, an error is returned.
+	//
+	// Returned content is cached, so subsequent calls with the same alias and
+	// arguments will return the same content.
+	ReadSource(ctx context.Context, alias string, args ...string) (string, []byte, error)
+
+	// contains registry
+	Registry
+}
+
+type dsReader struct {
+	cache map[string]*content
+
+	Registry
+}
+
+// content type mainly for caching
+type content struct {
+	contentType string
+	b           []byte
+}
+
+func NewSourceReader(reg Registry) DataSourceReader {
+	return &dsReader{Registry: reg}
+}
+
+func (d *dsReader) ReadSource(ctx context.Context, alias string, args ...string) (string, []byte, error) {
+	source, ok := d.Lookup(alias)
+	if !ok {
+		srcURL, err := url.Parse(alias)
+		if err != nil || !srcURL.IsAbs() {
+			return "", nil, fmt.Errorf("undefined datasource '%s': %w", alias, err)
+		}
+
+		d.Register(alias, config.DataSource{URL: srcURL})
+
+		// repeat the lookup now that it's registered - we shouldn't just use
+		// it directly because registration may include extra headers
+		source, _ = d.Lookup(alias)
+	}
+
+	if d.cache == nil {
+		d.cache = make(map[string]*content)
+	}
+	cacheKey := alias
+	for _, v := range args {
+		cacheKey += v
+	}
+	cached, ok := d.cache[cacheKey]
+	if ok {
+		return cached.contentType, cached.b, nil
+	}
+
+	arg := ""
+	if len(args) > 0 {
+		arg = args[0]
+	}
+	u, err := resolveURL(source.URL, arg)
+	if err != nil {
+		return "", nil, err
+	}
+
+	fc, err := d.readFileContent(ctx, u, source.Header)
+	if err != nil {
+		return "", nil, fmt.Errorf("couldn't read datasource '%s' (%s): %w", alias, u, err)
+	}
+	d.cache[cacheKey] = fc
+
+	return fc.contentType, fc.b, nil
+}
+
+func (d *dsReader) readFileContent(ctx context.Context, u *url.URL, hdr http.Header) (*content, error) {
+	fsys, err := FSysForPath(ctx, u.String())
+	if err != nil {
+		return nil, fmt.Errorf("fsys for path %v: %w", u, err)
+	}
+
+	u, fname := SplitFSMuxURL(u)
+
+	// need to support absolute paths on local filesystem too
+	// TODO: this is a hack, probably fix this?
+	if u.Scheme == "file" && runtime.GOOS != "windows" {
+		fname = u.Path + fname
+	}
+
+	fsys = fsimpl.WithContextFS(ctx, fsys)
+	fsys = fsimpl.WithHeaderFS(hdr, fsys)
+	fsys = WithDataSourceRegistryFS(d.Registry, fsys)
+
+	f, err := fsys.Open(fname)
+	if err != nil {
+		return nil, fmt.Errorf("open (url: %q, name: %q): %w", u, fname, err)
+	}
+	defer f.Close()
+
+	fi, err := f.Stat()
+	if err != nil {
+		return nil, fmt.Errorf("stat (url: %q, name: %q): %w", u, fname, err)
+	}
+
+	// possible type hint in the type query param. Contrary to spec, we allow
+	// unescaped '+' characters to make it simpler to provide types like
+	// "application/array+json"
+	mimeType := u.Query().Get("type")
+	mimeType = strings.ReplaceAll(mimeType, " ", "+")
+
+	if mimeType == "" {
+		mimeType = fsimpl.ContentType(fi)
+	}
+
+	var data []byte
+
+	if fi.IsDir() {
+		var dirents []fs.DirEntry
+		dirents, err = fs.ReadDir(fsys, fname)
+		if err != nil {
+			return nil, fmt.Errorf("readDir (url: %q, name: %s): %w", u, fname, err)
+		}
+
+		entries := make([]string, len(dirents))
+		for i, e := range dirents {
+			entries[i] = e.Name()
+		}
+		data, err = json.Marshal(entries)
+		if err != nil {
+			return nil, fmt.Errorf("json.Marshal: %w", err)
+		}
+
+		mimeType = iohelpers.JSONArrayMimetype
+	} else {
+		data, err = io.ReadAll(f)
+		if err != nil {
+			return nil, fmt.Errorf("read (url: %q, name: %s): %w", u, fname, err)
+		}
+	}
+
+	if mimeType == "" {
+		// default to text/plain
+		mimeType = iohelpers.TextMimetype
+	}
+
+	return &content{contentType: mimeType, b: data}, nil
+}
+
+// COPIED FROM /data/datasource.go
+//
+// resolveURL parses the relative URL rel against base, and returns the
+// resolved URL. Differs from url.ResolveReference in that query parameters are
+// added. In case of duplicates, params from rel are used.
+func resolveURL(base *url.URL, rel string) (*url.URL, error) {
+	// if there's an opaque part, there's no resolving to do - just return the
+	// base URL
+	if base.Opaque != "" {
+		return base, nil
+	}
+
+	// git URLs are special - they have double-slashes that separate a repo
+	// from a path in the repo. A missing double-slash means the path is the
+	// root.
+	switch base.Scheme {
+	case "git", "git+file", "git+http", "git+https", "git+ssh":
+		if strings.Contains(base.Path, "//") && strings.Contains(rel, "//") {
+			return nil, fmt.Errorf("both base URL and subpath contain '//', which is not allowed in git URLs")
+		}
+
+		// If there's a subpath, the base path must end with '/'. This behaviour
+		// is unique to git URLs - other schemes would instead drop the last
+		// path element and replace with the subpath.
+		if rel != "" && !strings.HasSuffix(base.Path, "/") {
+			base.Path += "/"
+		}
+
+		// If subpath starts with '//', make it relative by prefixing a '.',
+		// otherwise it'll be treated as a schemeless URI and the first part
+		// will be interpreted as a hostname.
+		if strings.HasPrefix(rel, "//") {
+			rel = "." + rel
+		}
+	}
+
+	relURL, err := url.Parse(rel)
+	if err != nil {
+		return nil, err
+	}
+
+	// URL.ResolveReference requires (or assumes, at least) that the base is
+	// absolute. We want to support relative URLs too though, so we need to
+	// correct for that.
+	out := base.ResolveReference(relURL)
+	if out.Scheme == "" && out.Path[0] == '/' {
+		out.Path = out.Path[1:]
+	}
+
+	if base.RawQuery != "" {
+		bq := base.Query()
+		rq := relURL.Query()
+		for k := range rq {
+			bq.Set(k, rq.Get(k))
+		}
+		out.RawQuery = bq.Encode()
+	}
+
+	return out, nil
+}