summaryrefslogtreecommitdiff
path: root/data/datasource.go
diff options
context:
space:
mode:
Diffstat (limited to 'data/datasource.go')
-rw-r--r--data/datasource.go401
1 files changed, 194 insertions, 207 deletions
diff --git a/data/datasource.go b/data/datasource.go
index fe3f0877..27260ac1 100644
--- a/data/datasource.go
+++ b/data/datasource.go
@@ -2,98 +2,43 @@ package data
import (
"context"
+ "encoding/json"
"fmt"
+ "io"
"io/fs"
- "mime"
"net/http"
"net/url"
- "path/filepath"
+ "runtime"
"sort"
"strings"
+ "github.com/hairyhenderson/go-fsimpl"
"github.com/hairyhenderson/gomplate/v4/internal/config"
"github.com/hairyhenderson/gomplate/v4/internal/datafs"
- "github.com/hairyhenderson/gomplate/v4/libkv"
- "github.com/hairyhenderson/gomplate/v4/vault"
+ "github.com/hairyhenderson/gomplate/v4/internal/parsers"
+ "github.com/hairyhenderson/gomplate/v4/internal/urlhelpers"
)
-func regExtension(ext, typ string) {
- err := mime.AddExtensionType(ext, typ)
- if err != nil {
- panic(err)
- }
-}
-
-func init() {
- // Add some types we want to be able to handle which can be missing by default
- regExtension(".json", jsonMimetype)
- regExtension(".yml", yamlMimetype)
- regExtension(".yaml", yamlMimetype)
- regExtension(".csv", csvMimetype)
- regExtension(".toml", tomlMimetype)
- regExtension(".env", envMimetype)
- regExtension(".cue", cueMimetype)
-}
-
-// registerReaders registers the source-reader functions
-func (d *Data) registerReaders() {
- d.sourceReaders = make(map[string]func(context.Context, *Source, ...string) ([]byte, error))
-
- d.sourceReaders["aws+smp"] = readAWSSMP
- d.sourceReaders["aws+sm"] = readAWSSecretsManager
- d.sourceReaders["consul"] = readConsul
- d.sourceReaders["consul+http"] = readConsul
- d.sourceReaders["consul+https"] = readConsul
- d.sourceReaders["env"] = readEnv
- d.sourceReaders["file"] = readFile
- d.sourceReaders["http"] = readHTTP
- d.sourceReaders["https"] = readHTTP
- d.sourceReaders["merge"] = d.readMerge
- d.sourceReaders["stdin"] = readStdin
- d.sourceReaders["vault"] = readVault
- d.sourceReaders["vault+http"] = readVault
- d.sourceReaders["vault+https"] = readVault
- d.sourceReaders["s3"] = readBlob
- d.sourceReaders["gs"] = readBlob
- d.sourceReaders["git"] = readGit
- d.sourceReaders["git+file"] = readGit
- d.sourceReaders["git+http"] = readGit
- d.sourceReaders["git+https"] = readGit
- d.sourceReaders["git+ssh"] = readGit
-}
-
-// lookupReader - return the reader function for the given scheme. Empty scheme
-// will return the file reader.
-func (d *Data) lookupReader(scheme string) (func(context.Context, *Source, ...string) ([]byte, error), error) {
- if d.sourceReaders == nil {
- d.registerReaders()
- }
- if scheme == "" {
- scheme = "file"
- }
-
- r, ok := d.sourceReaders[scheme]
- if !ok {
- return nil, fmt.Errorf("scheme %s not registered", scheme)
- }
- return r, nil
-}
-
// Data -
//
// Deprecated: will be replaced in future
type Data struct {
Ctx context.Context
+ // TODO: remove this before 4.0
Sources map[string]*Source
- sourceReaders map[string]func(context.Context, *Source, ...string) ([]byte, error)
- cache map[string][]byte
+ cache map[string]*fileContent
// headers from the --datasource-header/-H option that don't reference datasources from the commandline
ExtraHeaders map[string]http.Header
}
+type fileContent struct {
+ contentType string
+ b []byte
+}
+
// Cleanup - clean up datasources before shutting the process down - things
// like Logging out happen here
func (d *Data) Cleanup() {
@@ -119,7 +64,7 @@ func NewData(datasourceArgs, headerArgs []string) (*Data, error) {
func FromConfig(ctx context.Context, cfg *config.Config) *Data {
// XXX: This is temporary, and will be replaced with something a bit cleaner
// when datasources are refactored
- ctx = ContextWithStdin(ctx, cfg.Stdin)
+ ctx = datafs.ContextWithStdin(ctx, cfg.Stdin)
sources := map[string]*Source{}
for alias, d := range cfg.DataSources {
@@ -147,89 +92,17 @@ func FromConfig(ctx context.Context, cfg *config.Config) *Data {
//
// Deprecated: will be replaced in future
type Source struct {
- Alias string
- URL *url.URL
- Header http.Header // used for http[s]: URLs, nil otherwise
- fs fs.FS // used for file: URLs, nil otherwise
- hc *http.Client // used for http[s]: URLs, nil otherwise
- vc *vault.Vault // used for vault: URLs, nil otherwise
- kv *libkv.LibKV // used for consul:, etcd:, zookeeper: URLs, nil otherwise
- asmpg awssmpGetter // used for aws+smp:, nil otherwise
- awsSecretsManager awsSecretsManagerGetter // used for aws+sm, nil otherwise
- mediaType string
-}
-
-func (s *Source) inherit(parent *Source) {
- s.fs = parent.fs
- s.hc = parent.hc
- s.vc = parent.vc
- s.kv = parent.kv
- s.asmpg = parent.asmpg
+ Alias string
+ URL *url.URL
+ Header http.Header // used for http[s]: URLs, nil otherwise
+ mediaType string
}
+// Deprecated: no-op
func (s *Source) cleanup() {
- if s.vc != nil {
- s.vc.Logout()
- }
- if s.kv != nil {
- s.kv.Logout()
- }
-}
-
-// mimeType returns the MIME type to use as a hint for parsing the datasource.
-// It's expected that the datasource will have already been read before
-// this function is called, and so the Source's Type property may be already set.
-//
-// The MIME type is determined by these rules:
-// 1. the 'type' URL query parameter is used if present
-// 2. otherwise, the Type property on the Source is used, if present
-// 3. otherwise, a MIME type is calculated from the file extension, if the extension is registered
-// 4. otherwise, the default type of 'text/plain' is used
-func (s *Source) mimeType(arg string) (mimeType string, err error) {
- if len(arg) > 0 {
- if strings.HasPrefix(arg, "//") {
- arg = arg[1:]
- }
- if !strings.HasPrefix(arg, "/") {
- arg = "/" + arg
- }
- }
- argURL, err := url.Parse(arg)
- if err != nil {
- return "", fmt.Errorf("mimeType: couldn't parse arg %q: %w", arg, err)
- }
- mediatype := argURL.Query().Get("type")
- if mediatype == "" {
- mediatype = s.URL.Query().Get("type")
- }
-
- if mediatype == "" {
- mediatype = s.mediaType
- }
-
- // make it so + doesn't need to be escaped
- mediatype = strings.ReplaceAll(mediatype, " ", "+")
-
- if mediatype == "" {
- ext := filepath.Ext(argURL.Path)
- mediatype = mime.TypeByExtension(ext)
- }
-
- if mediatype == "" {
- ext := filepath.Ext(s.URL.Path)
- mediatype = mime.TypeByExtension(ext)
- }
-
- if mediatype != "" {
- t, _, err := mime.ParseMediaType(mediatype)
- if err != nil {
- return "", fmt.Errorf("MIME type was %q: %w", mediatype, err)
- }
- mediatype = t
- return mediatype, nil
- }
-
- return textMimetype, nil
+ // if s.kv != nil {
+ // s.kv.Logout()
+ // }
}
// String is the method to format the flag's value, part of the flag.Value interface.
@@ -246,7 +119,7 @@ func (d *Data) DefineDatasource(alias, value string) (string, error) {
if d.DatasourceExists(alias) {
return "", nil
}
- srcURL, err := datafs.ParseSourceURL(value)
+ srcURL, err := urlhelpers.ParseSourceURL(value)
if err != nil {
return "", err
}
@@ -288,73 +161,37 @@ func (d *Data) lookupSource(alias string) (*Source, error) {
return source, nil
}
-func (d *Data) readDataSource(ctx context.Context, alias string, args ...string) (data, mimeType string, err error) {
+func (d *Data) readDataSource(ctx context.Context, alias string, args ...string) (*fileContent, error) {
source, err := d.lookupSource(alias)
if err != nil {
- return "", "", err
+ return nil, err
}
- b, err := d.readSource(ctx, source, args...)
+ fc, err := d.readSource(ctx, source, args...)
if err != nil {
- return "", "", fmt.Errorf("couldn't read datasource '%s': %w", alias, err)
+ return nil, fmt.Errorf("couldn't read datasource '%s': %w", alias, err)
}
- subpath := ""
- if len(args) > 0 {
- subpath = args[0]
- }
- mimeType, err = source.mimeType(subpath)
- if err != nil {
- return "", "", err
- }
- return string(b), mimeType, nil
+ return fc, nil
}
// Include -
func (d *Data) Include(alias string, args ...string) (string, error) {
- data, _, err := d.readDataSource(d.Ctx, alias, args...)
- return data, err
+ fc, err := d.readDataSource(d.Ctx, alias, args...)
+ if err != nil {
+ return "", err
+ }
+
+ return string(fc.b), err
}
// Datasource -
func (d *Data) Datasource(alias string, args ...string) (interface{}, error) {
- data, mimeType, err := d.readDataSource(d.Ctx, alias, args...)
+ fc, err := d.readDataSource(d.Ctx, alias, args...)
if err != nil {
return nil, err
}
- return parseData(mimeType, data)
-}
-
-func parseData(mimeType, s string) (out interface{}, err error) {
- switch mimeAlias(mimeType) {
- case jsonMimetype:
- out, err = JSON(s)
- if err != nil {
- // maybe it's a JSON array
- out, err = JSONArray(s)
- }
- case jsonArrayMimetype:
- out, err = JSONArray(s)
- case yamlMimetype:
- out, err = YAML(s)
- if err != nil {
- // maybe it's a YAML array
- out, err = YAMLArray(s)
- }
- case csvMimetype:
- out, err = CSV(s)
- case tomlMimetype:
- out, err = TOML(s)
- case envMimetype:
- out, err = dotEnv(s)
- case textMimetype:
- out = s
- case cueMimetype:
- out, err = CUE(s)
- default:
- return nil, fmt.Errorf("datasources of type %s not yet supported", mimeType)
- }
- return out, err
+ return parsers.ParseData(fc.contentType, string(fc.b))
}
// DatasourceReachable - Determines if the named datasource is reachable with
@@ -370,9 +207,9 @@ func (d *Data) DatasourceReachable(alias string, args ...string) bool {
// readSource returns the (possibly cached) data from the given source,
// as referenced by the given args
-func (d *Data) readSource(ctx context.Context, source *Source, args ...string) ([]byte, error) {
+func (d *Data) readSource(ctx context.Context, source *Source, args ...string) (*fileContent, error) {
if d.cache == nil {
- d.cache = make(map[string][]byte)
+ d.cache = make(map[string]*fileContent)
}
cacheKey := source.Alias
for _, v := range args {
@@ -382,16 +219,107 @@ func (d *Data) readSource(ctx context.Context, source *Source, args ...string) (
if ok {
return cached, nil
}
- r, err := d.lookupReader(source.URL.Scheme)
- if err != nil {
- return nil, fmt.Errorf("Datasource not yet supported")
+
+ arg := ""
+ if len(args) > 0 {
+ arg = args[0]
}
- data, err := r(ctx, source, args...)
+ u, err := resolveURL(source.URL, arg)
if err != nil {
return nil, err
}
- d.cache[cacheKey] = data
- return data, nil
+
+ fc, err := d.readFileContent(ctx, u, source.Header)
+ if err != nil {
+ return nil, fmt.Errorf("reading %s: %w", u, err)
+ }
+ d.cache[cacheKey] = fc
+ return fc, nil
+}
+
+// readFileContent returns content from the given URL
+func (d Data) readFileContent(ctx context.Context, u *url.URL, hdr http.Header) (*fileContent, error) {
+ fsys, err := datafs.FSysForPath(ctx, u.String())
+ if err != nil {
+ return nil, fmt.Errorf("fsys for path %v: %w", u, err)
+ }
+
+ u, fname := datafs.SplitFSMuxURL(u)
+
+ // need to support absolute paths on local filesystem too
+ // TODO: this is a hack, probably fix this?
+ if u.Scheme == "file" && runtime.GOOS != "windows" {
+ fname = u.Path + fname
+ }
+
+ fsys = fsimpl.WithContextFS(ctx, fsys)
+ fsys = fsimpl.WithHeaderFS(hdr, fsys)
+
+ // convert d.Sources to a map[string]config.DataSources
+ // TODO: remove this when d.Sources is removed
+ ds := make(map[string]config.DataSource)
+ for k, v := range d.Sources {
+ ds[k] = config.DataSource{
+ URL: v.URL,
+ Header: v.Header,
+ }
+ }
+
+ fsys = datafs.WithDataSourcesFS(ds, fsys)
+
+ f, err := fsys.Open(fname)
+ if err != nil {
+ return nil, fmt.Errorf("open (url: %q, name: %q): %w", u, fname, err)
+ }
+ defer f.Close()
+
+ fi, err := f.Stat()
+ if err != nil {
+ return nil, fmt.Errorf("stat (url: %q, name: %q): %w", u, fname, err)
+ }
+
+ // possible type hint in the type query param. Contrary to spec, we allow
+ // unescaped '+' characters to make it simpler to provide types like
+ // "application/array+json"
+ mimeType := u.Query().Get("type")
+ mimeType = strings.ReplaceAll(mimeType, " ", "+")
+
+ if mimeType == "" {
+ mimeType = fsimpl.ContentType(fi)
+ }
+
+ var data []byte
+
+ if fi.IsDir() {
+ var dirents []fs.DirEntry
+ dirents, err = fs.ReadDir(fsys, fname)
+ if err != nil {
+ return nil, fmt.Errorf("readDir (url: %q, name: %s): %w", u, fname, err)
+ }
+
+ entries := make([]string, len(dirents))
+ for i, e := range dirents {
+ entries[i] = e.Name()
+ }
+ data, err = json.Marshal(entries)
+ if err != nil {
+ return nil, fmt.Errorf("json.Marshal: %w", err)
+ }
+
+ mimeType = jsonArrayMimetype
+ } else {
+ data, err = io.ReadAll(f)
+ if err != nil {
+ return nil, fmt.Errorf("read (url: %q, name: %s): %w", u, fname, err)
+ }
+ }
+
+ if mimeType == "" {
+ // default to text/plain
+ mimeType = textMimetype
+ }
+
+ return &fileContent{contentType: mimeType, b: data}, nil
}
// Show all datasources -
@@ -403,3 +331,62 @@ func (d *Data) ListDatasources() []string {
sort.Strings(datasources)
return datasources
}
+
+// resolveURL parses the relative URL rel against base, and returns the
+// resolved URL. Differs from url.ResolveReference in that query parameters are
+// added. In case of duplicates, params from rel are used.
+func resolveURL(base *url.URL, rel string) (*url.URL, error) {
+ // if there's an opaque part, there's no resolving to do - just return the
+ // base URL
+ if base.Opaque != "" {
+ return base, nil
+ }
+
+ // git URLs are special - they have double-slashes that separate a repo
+ // from a path in the repo. A missing double-slash means the path is the
+ // root.
+ switch base.Scheme {
+ case "git", "git+file", "git+http", "git+https", "git+ssh":
+ if strings.Contains(base.Path, "//") && strings.Contains(rel, "//") {
+ return nil, fmt.Errorf("both base URL and subpath contain '//', which is not allowed in git URLs")
+ }
+
+ // If there's a subpath, the base path must end with '/'. This behaviour
+ // is unique to git URLs - other schemes would instead drop the last
+ // path element and replace with the subpath.
+ if rel != "" && !strings.HasSuffix(base.Path, "/") {
+ base.Path += "/"
+ }
+
+ // If subpath starts with '//', make it relative by prefixing a '.',
+ // otherwise it'll be treated as a schemeless URI and the first part
+ // will be interpreted as a hostname.
+ if strings.HasPrefix(rel, "//") {
+ rel = "." + rel
+ }
+ }
+
+ relURL, err := url.Parse(rel)
+ if err != nil {
+ return nil, err
+ }
+
+ // URL.ResolveReference requires (or assumes, at least) that the base is
+ // absolute. We want to support relative URLs too though, so we need to
+ // correct for that.
+ out := base.ResolveReference(relURL)
+ if out.Scheme == "" && out.Path[0] == '/' {
+ out.Path = out.Path[1:]
+ }
+
+ if base.RawQuery != "" {
+ bq := base.Query()
+ rq := relURL.Query()
+ for k := range rq {
+ bq.Set(k, rq.Get(k))
+ }
+ out.RawQuery = bq.Encode()
+ }
+
+ return out, nil
+}