summaryrefslogtreecommitdiff
path: root/.local
diff options
context:
space:
mode:
authorMike Vink <ivi@vinkies.net>2025-10-23 20:46:21 +0200
committerMike Vink <ivi@vinkies.net>2025-10-23 20:46:21 +0200
commit4acdda4eff137071be4d7cb6293ecaf0cb2dd8d3 (patch)
tree1580b5db2a4ecc05b8825dadf0b18eb08d8c2dc4 /.local
parent29a759d2c7614cd4bea6f7f9b656120f87f60d33 (diff)
scripts
Diffstat (limited to '.local')
-rwxr-xr-x.local/bin/memo264
1 files changed, 264 insertions, 0 deletions
diff --git a/.local/bin/memo b/.local/bin/memo
new file mode 100755
index 0000000..2a05c80
--- /dev/null
+++ b/.local/bin/memo
@@ -0,0 +1,264 @@
+#!/usr/bin/env bash
+#
+# memo(1), memoizes the output of your command-line, so you can do:
+#
+# $ memo <some long running command> | ...
+#
+# Instead of
+#
+# $ <some long running command> > tmpfile
+# $ cat tmpfile | ...
+# $ rm tmpfile
+#
+# You can even use it in the middle of a pipe if you know that the input is not
+# extremely long. Just supply the -s switch:
+#
+# $ cat sitelist | memo -s parallel curl | grep "server:"
+#
+# As long as "sitelist" isn't changed, the curl(1) invocation will not be rerun.
+#
+# Memo provides some nice to haves, like transparant (de)compressing of the
+# output so you're less likely to be disk I/O bound and save some space. The
+# order of preferred compression algorithms is: lz4, xz, gzip. If none of these
+# programs are present on the system, the output is stored uncompressed.
+#
+# If you think this is handy, you're right. Yet, there are good reasons why this
+# sort of tool isn't distributed by default on distributions: it can give
+# surprising effects. Suppose the <command> you're trying to memoize depends on
+# the current working directory. Changing the directory and running the same
+# memo invocation will now paste the wrong data to stdout. Use this only if you
+# know what it's doing.
+#
+# That said, I use it a lot, I really can't be bothered to create arbitrarily
+# named temporary files all the time when I'm grabbing output from some slow
+# networked program. Additionally, memo transparantly compresses.
+#
+# If you want to memo'ize shell functions, then you can source this script
+# instead of executing it. This will define a function called memo() that you
+# can use in exactly the same way. It made the implementation much uglier, but
+# at least I can memo'ize functions.
+#
+# Don't let the /bin/bash shebang at the top fool you, it runs just fine in zsh.
+
+# Detect being sourced from zsh and bash.
+__memo_sourced=0
+([[ -n $ZSH_EVAL_CONTEXT && $ZSH_EVAL_CONTEXT =~ :file$ ]] ||
+ [[ -n $KSH_VERSION && $(cd "$(dirname -- "$0")" &&
+ printf '%s' "${PWD%/}/")$(basename -- "$0") != "${.sh.file}" ]] ||
+ [[ -n $BASH_VERSION && $0 != "$BASH_SOURCE" ]]) && __memo_sourced=1
+
+memo() {
+ (
+ set -euo pipefail
+
+ usage() {
+ echo "SYNOPSIS"
+ echo " memo [-chs] <command>"
+ echo ""
+ echo "DESCRIPTION"
+ echo " Memoizes the output of <command> and outputs it."
+ echo ""
+ echo " NOTE: If an interrupt happens while memoizing, it is cleared."
+ echo ""
+ echo "OPTIONS"
+ echo " -c Clean the cache, if no command is specified, clean everything."
+ echo " -h Usage message."
+ echo " -s Take stdin as an input to the command. Note, that this"
+ echo " requires buffering up all the input before memo is able to"
+ echo " decide whether to replay the cached version or not."
+ return $1
+ }
+
+ # Process options.
+ local opt_clear=0
+ local opt_stdin=0
+ while getopts :chs opt ; do
+ case $opt in
+ c) opt_clear=1 ;;
+ h) usage 0 ;;
+ s) opt_stdin=1 ;;
+ ?) (( --OPTIND )) ; break ;;
+ esac
+ done
+ shift $(( OPTIND - 1 ))
+
+ (( $# > 0 )) || usage 1
+
+ # Ensures that the memodir for $USER exists. If it doesn't exist, tries to
+ # create it with the right permissions.
+ ensuredir() {
+ local dir="${TMPDIR:-/tmp}/memo"
+ [[ -d "$dir" ]] || mkdir -p -m 0777 "$dir"
+ dir="$dir/$USER"
+ [[ -d "$dir" ]] || mkdir -m 0700 "$dir"
+ echo -n "$dir"
+ }
+
+ # Generate a sha512, done this way to account for differences between most
+ # Linux distros and OSX.
+ genhash() {
+ hash sha512sum 2>/dev/null && sha512sum || shasum -a 512
+ }
+
+ # Generates a hashed path inside of the memodir. The hash is based on the
+ # stdin of this function.
+ path() {
+ # Output the first part
+ ensuredir
+ # Output a separator
+ echo -n "/"
+ # Finally, output the hashed arguments and use it as the filename The shasum
+ # family of utilities tend to output the hashes in hex format, so no need to
+ # fear strange characters being output.
+ genhash | cut -d' ' -f1
+ }
+
+ # Echo memo's preferred compressor, must take input on stdin and output
+ # a compressed stream on stdout. Replaces current shell.
+ compressor() {
+ if hash zstd 2>/dev/null ; then exec zstd
+ elif hash lz4 2>/dev/null ; then exec lz4
+ elif hash xz 2>/dev/null ; then exec xz
+ elif hash gzip 2>/dev/null ; then exec gzip
+ fi
+ exec cat -
+ }
+
+ # Same as compressor, but in reverse. Keep these two in sync. Replaces current
+ # shell.
+ decompressor() {
+ if hash zstd 2>/dev/null ; then exec zstd -dc
+ elif hash lz4 2>/dev/null ; then exec lz4 -dc
+ elif hash xz 2>/dev/null ; then exec xz -dc
+ elif hash gzip 2>/dev/null ; then exec gzip -dc
+ fi
+ exec cat -
+ }
+
+ # Cats a file, decompressing if necessary. The filename is the first argument,
+ # and it will get a compression extension (.gz, ...) applied for every
+ # available decompressor to see if the file exists.
+ catfile() {
+ if [ -f "$1.zst" ] && hash zstd 2>/dev/null ; then
+ zstd -dc < "$1.zst" || true
+ elif [ -f "$1.lz4" ] && hash lz4 2>/dev/null ; then
+ lz4 -dc < "$1.lz4" || true
+ elif [ -f "$1.xz" ] && hash xz 2>/dev/null ; then
+ xz -dc < "$1.xz" || true
+ elif [ -f "$1.gz" ] && hash gzip 2>/dev/null ; then
+ gzip -dc < "$1.gz" || true
+ elif [ -f "$1" ] ; then
+ cat "$1" || true
+ else
+ # Cache not found, sad face.
+ return 1
+ fi
+ }
+
+ # Compress standard input to $1.<ext> (.ext is based on the best compression
+ # program found). Calling this function will replace the current shell with
+ # the decompressor, so make sure this is a terminal statement.
+ compressfile() {
+ if hash zstd 2>/dev/null ; then
+ exec zstd > "$1.zst"
+ elif hash lz4 2>/dev/null ; then
+ exec lz4 > "$1.lz4"
+ elif hash xz 2>/dev/null ; then
+ exec xz > "$1.xz"
+ elif hash gzip 2>/dev/null ; then
+ exec gzip > "$1.gz"
+ else
+ exec cat - > "$1"
+ fi
+ }
+
+ # Blast the entire folder if -c was passed and nothing more.
+ if (( opt_clear )) && (( $# == 0 )) ; then
+ rm -rf "$(ensuredir)"
+ else
+ # If stdin needs to be part of the unique command fingerprint, we need to
+ # save it somewhere so we can first hash it and then check if we already
+ # have a cache. Let's create a temporary file.
+ local stdinsave
+ if (( opt_stdin )) ; then
+ # Create a temporary file and schedule its cleanup.
+ stdinsave=$(mktemp)
+ cleanup() {
+ rm "$stdinsave"
+ trap - EXIT # Clean EXIT trap.
+ }
+
+ # Cleanup file after either EXIT (if run as a standalone script) or
+ # RETURN (if run as a sourced bash function). zsh seems to also fire
+ # the EXIT trap when a sourced function returns.
+ #
+ # Check if $ZSH_VERSION is set in an antique way because macOS is
+ # stuck on pre 4.2 bash, so it doesn't know about -v (and we're in set
+ # -u mode, so most other checks will fail).
+ if [[ -z "${ZSH_VERSION-}" ]] ; then
+ trap cleanup EXIT RETURN
+ else
+ trap cleanup EXIT
+ fi
+ fi
+
+ # In the following block we construct the unique path to save the output of
+ # the command in.
+ local file
+ file=$({
+ # The command and its arguments are always part of the hash.
+ echo -n "$@"
+
+ # Save the input while compressing it and pass it through with tee(1).
+ if (( opt_stdin )) ; then
+ tee >(compressor > "$stdinsave")
+ fi
+ } | path)
+
+ if (( opt_clear )) ; then
+ # Remove the saved content(s) of this hash if -c was passed.
+ find "$(dirname "$file")" -type f -name "$(basename "$file")*" -delete
+ else
+ # Output the cached file or generate a new one.
+ catfile "$file" || {
+ # Clear the cache and forward the return/signal code.
+ nocache() {
+ # $? represents the signal on all handlers except for ERR, where it
+ # represents the process error code. Note to self: find a clever way
+ # to deal with this.
+ declare -ri ret=$?
+ trap - INT # Clear traps.
+ rm "$file"* 2>/dev/null || true
+ exit $ret
+ }
+
+ # WARNING: if you're ever thinking of adding more signals to clear the
+ # cache on, read the following:
+ #
+ # When adding the 'ERR' signal to the set, any 'INT' signal received
+ # will trigger the specified function twice (from the same process, see
+ # the commit comments). I cannot explain why this is the case, but it's
+ # so. To try it out: trap nocache INT ERR, then make the process
+ # interrupt by pressing ctrl-c.
+ trap nocache INT
+
+ if (( opt_stdin )) ; then
+ # It appears we don't have a cached file, but we already absorbed
+ # stdin, so we need to produce it again.
+ exec < <(decompressor < "$stdinsave")
+ fi
+ "$@" | tee >(compressfile "$file")
+ }
+ fi
+ fi
+ )
+}
+
+(( __memo_sourced )) && {
+ unset __memo_sourced
+ return 0
+}
+
+# If we reach here, we're not being sourced.
+set -euo pipefail
+memo "$@"