diff options
Diffstat (limited to 'data/datasource.go')
| -rw-r--r-- | data/datasource.go | 401 |
1 files changed, 194 insertions, 207 deletions
diff --git a/data/datasource.go b/data/datasource.go index fe3f0877..27260ac1 100644 --- a/data/datasource.go +++ b/data/datasource.go @@ -2,98 +2,43 @@ package data import ( "context" + "encoding/json" "fmt" + "io" "io/fs" - "mime" "net/http" "net/url" - "path/filepath" + "runtime" "sort" "strings" + "github.com/hairyhenderson/go-fsimpl" "github.com/hairyhenderson/gomplate/v4/internal/config" "github.com/hairyhenderson/gomplate/v4/internal/datafs" - "github.com/hairyhenderson/gomplate/v4/libkv" - "github.com/hairyhenderson/gomplate/v4/vault" + "github.com/hairyhenderson/gomplate/v4/internal/parsers" + "github.com/hairyhenderson/gomplate/v4/internal/urlhelpers" ) -func regExtension(ext, typ string) { - err := mime.AddExtensionType(ext, typ) - if err != nil { - panic(err) - } -} - -func init() { - // Add some types we want to be able to handle which can be missing by default - regExtension(".json", jsonMimetype) - regExtension(".yml", yamlMimetype) - regExtension(".yaml", yamlMimetype) - regExtension(".csv", csvMimetype) - regExtension(".toml", tomlMimetype) - regExtension(".env", envMimetype) - regExtension(".cue", cueMimetype) -} - -// registerReaders registers the source-reader functions -func (d *Data) registerReaders() { - d.sourceReaders = make(map[string]func(context.Context, *Source, ...string) ([]byte, error)) - - d.sourceReaders["aws+smp"] = readAWSSMP - d.sourceReaders["aws+sm"] = readAWSSecretsManager - d.sourceReaders["consul"] = readConsul - d.sourceReaders["consul+http"] = readConsul - d.sourceReaders["consul+https"] = readConsul - d.sourceReaders["env"] = readEnv - d.sourceReaders["file"] = readFile - d.sourceReaders["http"] = readHTTP - d.sourceReaders["https"] = readHTTP - d.sourceReaders["merge"] = d.readMerge - d.sourceReaders["stdin"] = readStdin - d.sourceReaders["vault"] = readVault - d.sourceReaders["vault+http"] = readVault - d.sourceReaders["vault+https"] = readVault - d.sourceReaders["s3"] = readBlob - d.sourceReaders["gs"] = readBlob - d.sourceReaders["git"] = readGit - d.sourceReaders["git+file"] = readGit - d.sourceReaders["git+http"] = readGit - d.sourceReaders["git+https"] = readGit - d.sourceReaders["git+ssh"] = readGit -} - -// lookupReader - return the reader function for the given scheme. Empty scheme -// will return the file reader. -func (d *Data) lookupReader(scheme string) (func(context.Context, *Source, ...string) ([]byte, error), error) { - if d.sourceReaders == nil { - d.registerReaders() - } - if scheme == "" { - scheme = "file" - } - - r, ok := d.sourceReaders[scheme] - if !ok { - return nil, fmt.Errorf("scheme %s not registered", scheme) - } - return r, nil -} - // Data - // // Deprecated: will be replaced in future type Data struct { Ctx context.Context + // TODO: remove this before 4.0 Sources map[string]*Source - sourceReaders map[string]func(context.Context, *Source, ...string) ([]byte, error) - cache map[string][]byte + cache map[string]*fileContent // headers from the --datasource-header/-H option that don't reference datasources from the commandline ExtraHeaders map[string]http.Header } +type fileContent struct { + contentType string + b []byte +} + // Cleanup - clean up datasources before shutting the process down - things // like Logging out happen here func (d *Data) Cleanup() { @@ -119,7 +64,7 @@ func NewData(datasourceArgs, headerArgs []string) (*Data, error) { func FromConfig(ctx context.Context, cfg *config.Config) *Data { // XXX: This is temporary, and will be replaced with something a bit cleaner // when datasources are refactored - ctx = ContextWithStdin(ctx, cfg.Stdin) + ctx = datafs.ContextWithStdin(ctx, cfg.Stdin) sources := map[string]*Source{} for alias, d := range cfg.DataSources { @@ -147,89 +92,17 @@ func FromConfig(ctx context.Context, cfg *config.Config) *Data { // // Deprecated: will be replaced in future type Source struct { - Alias string - URL *url.URL - Header http.Header // used for http[s]: URLs, nil otherwise - fs fs.FS // used for file: URLs, nil otherwise - hc *http.Client // used for http[s]: URLs, nil otherwise - vc *vault.Vault // used for vault: URLs, nil otherwise - kv *libkv.LibKV // used for consul:, etcd:, zookeeper: URLs, nil otherwise - asmpg awssmpGetter // used for aws+smp:, nil otherwise - awsSecretsManager awsSecretsManagerGetter // used for aws+sm, nil otherwise - mediaType string -} - -func (s *Source) inherit(parent *Source) { - s.fs = parent.fs - s.hc = parent.hc - s.vc = parent.vc - s.kv = parent.kv - s.asmpg = parent.asmpg + Alias string + URL *url.URL + Header http.Header // used for http[s]: URLs, nil otherwise + mediaType string } +// Deprecated: no-op func (s *Source) cleanup() { - if s.vc != nil { - s.vc.Logout() - } - if s.kv != nil { - s.kv.Logout() - } -} - -// mimeType returns the MIME type to use as a hint for parsing the datasource. -// It's expected that the datasource will have already been read before -// this function is called, and so the Source's Type property may be already set. -// -// The MIME type is determined by these rules: -// 1. the 'type' URL query parameter is used if present -// 2. otherwise, the Type property on the Source is used, if present -// 3. otherwise, a MIME type is calculated from the file extension, if the extension is registered -// 4. otherwise, the default type of 'text/plain' is used -func (s *Source) mimeType(arg string) (mimeType string, err error) { - if len(arg) > 0 { - if strings.HasPrefix(arg, "//") { - arg = arg[1:] - } - if !strings.HasPrefix(arg, "/") { - arg = "/" + arg - } - } - argURL, err := url.Parse(arg) - if err != nil { - return "", fmt.Errorf("mimeType: couldn't parse arg %q: %w", arg, err) - } - mediatype := argURL.Query().Get("type") - if mediatype == "" { - mediatype = s.URL.Query().Get("type") - } - - if mediatype == "" { - mediatype = s.mediaType - } - - // make it so + doesn't need to be escaped - mediatype = strings.ReplaceAll(mediatype, " ", "+") - - if mediatype == "" { - ext := filepath.Ext(argURL.Path) - mediatype = mime.TypeByExtension(ext) - } - - if mediatype == "" { - ext := filepath.Ext(s.URL.Path) - mediatype = mime.TypeByExtension(ext) - } - - if mediatype != "" { - t, _, err := mime.ParseMediaType(mediatype) - if err != nil { - return "", fmt.Errorf("MIME type was %q: %w", mediatype, err) - } - mediatype = t - return mediatype, nil - } - - return textMimetype, nil + // if s.kv != nil { + // s.kv.Logout() + // } } // String is the method to format the flag's value, part of the flag.Value interface. @@ -246,7 +119,7 @@ func (d *Data) DefineDatasource(alias, value string) (string, error) { if d.DatasourceExists(alias) { return "", nil } - srcURL, err := datafs.ParseSourceURL(value) + srcURL, err := urlhelpers.ParseSourceURL(value) if err != nil { return "", err } @@ -288,73 +161,37 @@ func (d *Data) lookupSource(alias string) (*Source, error) { return source, nil } -func (d *Data) readDataSource(ctx context.Context, alias string, args ...string) (data, mimeType string, err error) { +func (d *Data) readDataSource(ctx context.Context, alias string, args ...string) (*fileContent, error) { source, err := d.lookupSource(alias) if err != nil { - return "", "", err + return nil, err } - b, err := d.readSource(ctx, source, args...) + fc, err := d.readSource(ctx, source, args...) if err != nil { - return "", "", fmt.Errorf("couldn't read datasource '%s': %w", alias, err) + return nil, fmt.Errorf("couldn't read datasource '%s': %w", alias, err) } - subpath := "" - if len(args) > 0 { - subpath = args[0] - } - mimeType, err = source.mimeType(subpath) - if err != nil { - return "", "", err - } - return string(b), mimeType, nil + return fc, nil } // Include - func (d *Data) Include(alias string, args ...string) (string, error) { - data, _, err := d.readDataSource(d.Ctx, alias, args...) - return data, err + fc, err := d.readDataSource(d.Ctx, alias, args...) + if err != nil { + return "", err + } + + return string(fc.b), err } // Datasource - func (d *Data) Datasource(alias string, args ...string) (interface{}, error) { - data, mimeType, err := d.readDataSource(d.Ctx, alias, args...) + fc, err := d.readDataSource(d.Ctx, alias, args...) if err != nil { return nil, err } - return parseData(mimeType, data) -} - -func parseData(mimeType, s string) (out interface{}, err error) { - switch mimeAlias(mimeType) { - case jsonMimetype: - out, err = JSON(s) - if err != nil { - // maybe it's a JSON array - out, err = JSONArray(s) - } - case jsonArrayMimetype: - out, err = JSONArray(s) - case yamlMimetype: - out, err = YAML(s) - if err != nil { - // maybe it's a YAML array - out, err = YAMLArray(s) - } - case csvMimetype: - out, err = CSV(s) - case tomlMimetype: - out, err = TOML(s) - case envMimetype: - out, err = dotEnv(s) - case textMimetype: - out = s - case cueMimetype: - out, err = CUE(s) - default: - return nil, fmt.Errorf("datasources of type %s not yet supported", mimeType) - } - return out, err + return parsers.ParseData(fc.contentType, string(fc.b)) } // DatasourceReachable - Determines if the named datasource is reachable with @@ -370,9 +207,9 @@ func (d *Data) DatasourceReachable(alias string, args ...string) bool { // readSource returns the (possibly cached) data from the given source, // as referenced by the given args -func (d *Data) readSource(ctx context.Context, source *Source, args ...string) ([]byte, error) { +func (d *Data) readSource(ctx context.Context, source *Source, args ...string) (*fileContent, error) { if d.cache == nil { - d.cache = make(map[string][]byte) + d.cache = make(map[string]*fileContent) } cacheKey := source.Alias for _, v := range args { @@ -382,16 +219,107 @@ func (d *Data) readSource(ctx context.Context, source *Source, args ...string) ( if ok { return cached, nil } - r, err := d.lookupReader(source.URL.Scheme) - if err != nil { - return nil, fmt.Errorf("Datasource not yet supported") + + arg := "" + if len(args) > 0 { + arg = args[0] } - data, err := r(ctx, source, args...) + u, err := resolveURL(source.URL, arg) if err != nil { return nil, err } - d.cache[cacheKey] = data - return data, nil + + fc, err := d.readFileContent(ctx, u, source.Header) + if err != nil { + return nil, fmt.Errorf("reading %s: %w", u, err) + } + d.cache[cacheKey] = fc + return fc, nil +} + +// readFileContent returns content from the given URL +func (d Data) readFileContent(ctx context.Context, u *url.URL, hdr http.Header) (*fileContent, error) { + fsys, err := datafs.FSysForPath(ctx, u.String()) + if err != nil { + return nil, fmt.Errorf("fsys for path %v: %w", u, err) + } + + u, fname := datafs.SplitFSMuxURL(u) + + // need to support absolute paths on local filesystem too + // TODO: this is a hack, probably fix this? + if u.Scheme == "file" && runtime.GOOS != "windows" { + fname = u.Path + fname + } + + fsys = fsimpl.WithContextFS(ctx, fsys) + fsys = fsimpl.WithHeaderFS(hdr, fsys) + + // convert d.Sources to a map[string]config.DataSources + // TODO: remove this when d.Sources is removed + ds := make(map[string]config.DataSource) + for k, v := range d.Sources { + ds[k] = config.DataSource{ + URL: v.URL, + Header: v.Header, + } + } + + fsys = datafs.WithDataSourcesFS(ds, fsys) + + f, err := fsys.Open(fname) + if err != nil { + return nil, fmt.Errorf("open (url: %q, name: %q): %w", u, fname, err) + } + defer f.Close() + + fi, err := f.Stat() + if err != nil { + return nil, fmt.Errorf("stat (url: %q, name: %q): %w", u, fname, err) + } + + // possible type hint in the type query param. Contrary to spec, we allow + // unescaped '+' characters to make it simpler to provide types like + // "application/array+json" + mimeType := u.Query().Get("type") + mimeType = strings.ReplaceAll(mimeType, " ", "+") + + if mimeType == "" { + mimeType = fsimpl.ContentType(fi) + } + + var data []byte + + if fi.IsDir() { + var dirents []fs.DirEntry + dirents, err = fs.ReadDir(fsys, fname) + if err != nil { + return nil, fmt.Errorf("readDir (url: %q, name: %s): %w", u, fname, err) + } + + entries := make([]string, len(dirents)) + for i, e := range dirents { + entries[i] = e.Name() + } + data, err = json.Marshal(entries) + if err != nil { + return nil, fmt.Errorf("json.Marshal: %w", err) + } + + mimeType = jsonArrayMimetype + } else { + data, err = io.ReadAll(f) + if err != nil { + return nil, fmt.Errorf("read (url: %q, name: %s): %w", u, fname, err) + } + } + + if mimeType == "" { + // default to text/plain + mimeType = textMimetype + } + + return &fileContent{contentType: mimeType, b: data}, nil } // Show all datasources - @@ -403,3 +331,62 @@ func (d *Data) ListDatasources() []string { sort.Strings(datasources) return datasources } + +// resolveURL parses the relative URL rel against base, and returns the +// resolved URL. Differs from url.ResolveReference in that query parameters are +// added. In case of duplicates, params from rel are used. +func resolveURL(base *url.URL, rel string) (*url.URL, error) { + // if there's an opaque part, there's no resolving to do - just return the + // base URL + if base.Opaque != "" { + return base, nil + } + + // git URLs are special - they have double-slashes that separate a repo + // from a path in the repo. A missing double-slash means the path is the + // root. + switch base.Scheme { + case "git", "git+file", "git+http", "git+https", "git+ssh": + if strings.Contains(base.Path, "//") && strings.Contains(rel, "//") { + return nil, fmt.Errorf("both base URL and subpath contain '//', which is not allowed in git URLs") + } + + // If there's a subpath, the base path must end with '/'. This behaviour + // is unique to git URLs - other schemes would instead drop the last + // path element and replace with the subpath. + if rel != "" && !strings.HasSuffix(base.Path, "/") { + base.Path += "/" + } + + // If subpath starts with '//', make it relative by prefixing a '.', + // otherwise it'll be treated as a schemeless URI and the first part + // will be interpreted as a hostname. + if strings.HasPrefix(rel, "//") { + rel = "." + rel + } + } + + relURL, err := url.Parse(rel) + if err != nil { + return nil, err + } + + // URL.ResolveReference requires (or assumes, at least) that the base is + // absolute. We want to support relative URLs too though, so we need to + // correct for that. + out := base.ResolveReference(relURL) + if out.Scheme == "" && out.Path[0] == '/' { + out.Path = out.Path[1:] + } + + if base.RawQuery != "" { + bq := base.Query() + rq := relURL.Query() + for k := range rq { + bq.Set(k, rq.Get(k)) + } + out.RawQuery = bq.Encode() + } + + return out, nil +} |
