summaryrefslogtreecommitdiff
path: root/scratch
diff options
context:
space:
mode:
authorTJ DeVries <devries.timothyj@gmail.com>2020-07-31 00:05:22 -0400
committerTJ DeVries <devries.timothyj@gmail.com>2020-07-31 00:05:22 -0400
commitfa0382d93e73b66e7ec769cec27b9fbb21020641 (patch)
tree624d5dc3de80426956a1c46447f1f26443a87a64 /scratch
parentababfbfd88334ca6d94d5d0a8b6324dd6600d602 (diff)
Streamed some refactoring. More work to do
Diffstat (limited to 'scratch')
-rw-r--r--scratch/bk_tree.lua270
-rw-r--r--scratch/file_finder.lua13
-rw-r--r--scratch/fuzzy_tester.lua8
-rw-r--r--scratch/ngrams.lua87
-rw-r--r--scratch/picker_locations.lua4
-rwxr-xr-xscratch/slow_proc.sh8
6 files changed, 380 insertions, 10 deletions
diff --git a/scratch/bk_tree.lua b/scratch/bk_tree.lua
new file mode 100644
index 0000000..3957224
--- /dev/null
+++ b/scratch/bk_tree.lua
@@ -0,0 +1,270 @@
+
+---------------------------
+-- bk-tree datastructure
+--
+-- http://en.wikipedia.org/wiki/BK-tree
+-- @module bk-tree
+-- @author Robin Hübner
+-- robinhubner@gmail.com
+-- @release version 1.0.2
+-- @license MIT
+
+local bk_tree = {}
+
+
+local function lazy_copy(t1)
+
+ local cp = {}
+
+ for k, v in pairs(t1) do
+ cp[k] = v
+ end
+
+ return cp
+
+end
+
+local function min(a, b, c)
+
+ local min_val = a
+
+ if b < min_val then min_val = b end
+ if c < min_val then min_val = c end
+
+ return min_val
+
+end
+
+----------------------------------
+--- Levenshtein distance function.
+-- @tparam string s1
+-- @tparam string s2
+-- @treturn number the levenshtein distance
+-- @within Metrics
+function bk_tree.levenshtein_dist(s1, s2)
+
+ if s1 == s2 then return 0 end
+ if s1:len() == 0 then return s2:len() end
+ if s2:len() == 0 then return s1:len() end
+ if s1:len() < s2:len() then s1, s2 = s2, s1 end
+
+ t = {}
+ for i=1, #s1+1 do
+ t[i] = {i-1}
+ end
+
+ for i=1, #s2+1 do
+ t[1][i] = i-1
+ end
+
+ local cost
+ for i=2, #s1+1 do
+
+ for j=2, #s2+1 do
+ cost = (s1:sub(i-1,i-1) == s2:sub(j-1,j-1) and 0) or 1
+ t[i][j] = min(
+ t[i-1][j] + 1,
+ t[i][j-1] + 1,
+ t[i-1][j-1] + cost)
+ end
+
+ end
+
+ return t[#s1+1][#s2+1]
+
+end
+
+function bk_tree.hook(param)
+
+ local name, callee = debug.getlocal(2, 1)
+ local f = debug.getinfo(2, "f").func
+ local p = debug.getinfo(3, "f").func
+ --[[ previous function in the callstack, if called from the same place,
+ don't add to the insert/remove counters. ]]--
+
+ if f == bk_tree.insert and p ~= bk_tree.insert then
+ callee.stats.nodes = callee.stats.nodes + 1
+ elseif f == bk_tree.remove and p ~= bk_tree.remove then
+ callee.stats.nodes = callee.stats.nodes - 1
+ elseif f == bk_tree.query and p == bk_tree.query then
+ callee.stats.queries = callee.stats.queries + 1
+ end
+
+end
+
+--- Hooks debugging into tree execution.
+-- Keeps track of number of nodes created, queries made,
+-- note that this must be run directly after tree is created
+-- in order to get correct information.
+-- @within Debug
+--- @usage
+-- local bktree = require "bk-tree"
+-- local tree = bktree:new("word")
+-- tree:debug()
+-- tree:insert("perceive")
+-- tree:insert("beautiful")
+-- tree:insert("definitely")
+-- local result = tree:query("definately", 3)
+-- tree:print_stats()
+--
+-- -- output
+-- Nodes: 4
+-- Queries: 3
+-- Nodes Queried: 75%
+function bk_tree:debug()
+
+ local nc = 0
+ if self.root then nc = 1 end
+ self.stats = { nodes = nc, queries = 0 }
+ debug.sethook(self.hook, "c")
+
+end
+
+--- Print execution stats.
+-- Prints nodes queried and total nodes, as well as a fraction of
+-- nodes visited to satisfy the query, resets the counter of nodes queried when called.
+-- @within Debug
+-- @see debug
+function bk_tree:print_stats()
+
+ print("\nNodes: " .. self.stats.nodes)
+ print("Queries: " .. self.stats.queries)
+ print("Nodes Queried: " .. self.stats.queries/self.stats.nodes*100 .. "%\n")
+ self.stats.queries = 0
+
+end
+
+--- Fetch execution stats.
+-- Returns a copy of the execution stats that @{print_stats} would print, requires debug to have been enabled
+-- to not just return defaults. Useful if you want to profile things.
+-- @within Debug
+-- @return {key = value,...}
+function bk_tree:get_stats()
+
+ return lazy_copy(self.stats)
+
+end
+
+---------------------------
+--- Creates a new bk-tree.
+-- @constructor
+-- @string[opt] root_word the root of the new tree
+-- @tparam[opt=levenshtein_dist] function dist_func the distance function used
+-- @see levenshtein_dist
+-- @return the new bk-tree instance
+--- @usage
+-- local bktree = require "bk-tree"
+-- local tree = bktree:new("word")
+function bk_tree:new(root_word, dist_func)
+
+ local n_obj = {}
+ if root_word then n_obj.root = { str = root_word, children = {} } end
+ n_obj.dist_func = dist_func or self.levenshtein_dist
+
+ setmetatable(n_obj, self)
+ self.__index = self
+
+ return n_obj
+
+end
+
+--------------------------------
+--- Inserts word into the tree.
+-- @string word
+-- @treturn bool true if inserted, false if word already exists in tree
+--- @usage
+-- local bktree = require "bk-tree"
+-- local tree = bktree:new("root")
+-- local success = tree:insert("other_word")
+function bk_tree:insert(word, node)
+
+ node = node or self.root
+
+ if not node then
+ self.root = { str = word, children = {} }
+ return true
+ end
+
+ local dist = self.dist_func(word, node.str)
+ if dist == 0 then return false end
+
+ local some_node = node.children[dist]
+
+ if not some_node then
+ node.children[dist] = { str = word, children = {} }
+ return true
+ end
+
+ return self:insert(word, some_node)
+
+end
+
+--------------------------------
+--- Query the tree for matches.
+-- @string word
+-- @tparam number n max edit distance to use when querying
+-- @treturn {{str=string,distance=number},....} table of tables with matching words, empty table if no matches
+--- @usage
+-- local bktree = require "bk-tree"
+-- local tree = bktree:new("word")
+-- tree:insert("hello")
+-- tree:insert("goodbye")
+-- tree:insert("woop")
+-- local result = tree:query("woop", 1)
+function bk_tree:query(word, n, node, matches)
+
+ node = node or self.root
+ matches = matches or {}
+
+ if not node then return matches end
+
+ local dist = self.dist_func(word, node.str)
+ if dist <= n then matches[#matches+1] = {str = node.str, distance = dist} end
+
+ for k, child in pairs(node.children) do
+ if child ~= nil then
+ if k >= dist-n and k <= dist+n then
+ self:query(word, n, child, matches)
+ end
+ end
+ end
+
+ return matches
+
+end
+
+---------------------------------------------------------
+--- Queries the the tree for a match, sorts the results.
+-- Calls @{query} and returns the results sorted.
+-- @string word
+-- @tparam number n max edit distance to use when querying
+-- @treturn {{str=string,distance=number},....} table of tables with matching words sorted by distance, empty table if no matches
+--- @usage
+-- local bktree = require "bk-tree"
+-- local tree = bktree:new("word")
+-- tree:insert("woop")
+-- tree:insert("worp")
+-- tree:insert("warp")
+-- local result = tree:query_sorted("woop", 3)
+function bk_tree:query_sorted(word, n)
+
+ local result = self:query(word, n)
+
+ table.sort(result, function(a,b) return a.distance < b.distance end)
+
+ return result
+
+end
+
+
+local tree = bk_tree:new("word")
+tree:insert("hello")
+tree:insert("welp")
+tree:insert("function")
+tree:insert("this long line what")
+tree:insert("what this long line what")
+print(vim.inspect(tree))
+print(vim.inspect(tree:query_sorted("what", 3)))
+
+
+return bk_tree
diff --git a/scratch/file_finder.lua b/scratch/file_finder.lua
index 8b5bb7b..ae84e2f 100644
--- a/scratch/file_finder.lua
+++ b/scratch/file_finder.lua
@@ -1,3 +1,4 @@
+package.loaded['telescope.pickers'] = nil
local telescope = require('telescope')
-- Goals:
@@ -61,15 +62,15 @@ local file_sorter = telescope.sorters.new {
if prompt == '' then return 0 end
if not line then return -1 end
- local dist = string_distance(prompt, line)
- -- if dist > (0.75 * #line) and #prompt > 3 then
- -- return -1
- -- end
-
- return dist
+ return tonumber(vim.fn.systemlist(string.format(
+ "echo '%s' | ~/tmp/fuzzy_test/target/debug/fuzzy_test '%s'",
+ line,
+ prompt
+ ))[1])
end
}
+
local file_previewer = telescope.previewers.vim_buffer
local file_picker = telescope.pickers.new {
diff --git a/scratch/fuzzy_tester.lua b/scratch/fuzzy_tester.lua
new file mode 100644
index 0000000..0971381
--- /dev/null
+++ b/scratch/fuzzy_tester.lua
@@ -0,0 +1,8 @@
+
+line = "hello"
+prompt = "h"
+print(vim.inspect(vim.fn.systemlist(string.format(
+ "echo '%s' | ~/tmp/fuzzy_test/target/debug/fuzzy_test '%s'",
+ line,
+ prompt
+))))
diff --git a/scratch/ngrams.lua b/scratch/ngrams.lua
new file mode 100644
index 0000000..8b763a8
--- /dev/null
+++ b/scratch/ngrams.lua
@@ -0,0 +1,87 @@
+
+local function ngrams(counts, doc)
+ local DEPTH = 5
+ local docLen = #doc
+ local min, concat = math.min, table.concat
+ for i = 1, docLen - 1 do
+ for j = i, min(i + DEPTH - 1, docLen) do
+ if not doc[j] then break end
+ local k = concat(doc, " ", i, j)
+ counts[k] = (counts[k] or 0) + 1
+ end
+ end
+end
+
+
+local bz = io.popen('bzcat /home/tj/Downloads/pages.xml.bz2')
+local title, content = "", ""
+local inText = false
+
+local numDocs = 0
+local globalCounts = {}
+
+local function set(t)
+ local s = {}
+ for _, v in pairs(t) do s[v] = true end
+ return s
+end
+
+local bad = set({
+ 'after', 'also', 'article', 'date', 'defaultsort', 'external', 'first', 'from',
+ 'have', 'html', 'http', 'image', 'infobox', 'links', 'name', 'other', 'preserve',
+ 'references', 'reflist', 'space', 'that', 'this', 'title', 'which', 'with',
+ 'quot', 'ref', 'name', 'http', 'amp', 'ndash', 'www', 'cite', 'nbsp',
+ 'style', 'text', 'align', 'center', 'background'
+ })
+
+local function isnumber(w)
+ s, e = w:find("[0-9]+")
+ return s
+end
+
+for line in bz:lines() do
+ local _, _, mTitle = line:find("<title>(.*)</title>")
+ local _, _, bText = line:find("<text[^>]*>([^<]*)")
+ local eText, _ = line:find("</text>")
+
+ if mTitle then
+ title = mTitle
+ elseif bText then
+ content = bText
+ inText = true
+ elseif inText then
+ content = content .. line
+ end
+
+ if eText then
+ words = {}
+ for v in content:gmatch("%w+") do
+ v = v:lower()
+ if #v >= 3 and #v < 12 and not bad[v] and not isnumber(v) then
+ table.insert(words, v)
+ else
+ table.insert(words, nil)
+ end
+ end
+
+ ngrams(globalCounts, words)
+ inText = false
+
+ numDocs = numDocs + 1
+ if numDocs % 10 == 0 then
+ io.write(string.format("Working... %d documents processed.\r", numDocs))
+ io.flush()
+ end
+
+ if numDocs == 500 then
+ local f = io.open('/tmp/freqs.lua.txt', 'w')
+ for k, v in pairs(globalCounts) do
+ f:write(k, '\t', v, '\n')
+ end
+ f:close()
+
+ globalCounts = {}
+ os.exit(0)
+ end
+ end
+end
diff --git a/scratch/picker_locations.lua b/scratch/picker_locations.lua
new file mode 100644
index 0000000..82fe7f7
--- /dev/null
+++ b/scratch/picker_locations.lua
@@ -0,0 +1,4 @@
+package.loaded['telescope'] = nil
+package.loaded['telescope.init'] = nil
+package.loaded['telescope.picker'] = nil
+package.loaded['telescope.finder'] = nil
diff --git a/scratch/slow_proc.sh b/scratch/slow_proc.sh
index bbcf8bf..580e617 100755
--- a/scratch/slow_proc.sh
+++ b/scratch/slow_proc.sh
@@ -1,10 +1,10 @@
echo "hello"
sleep 1
-echo "cool"
+echo "help"
sleep 1
-echo "world"
+echo "hi"
sleep 1
-echo "x"
+echo "husband"
sleep 1
-echo "y"
+echo "helper"