summaryrefslogtreecommitdiff
path: root/scratch/bk_tree.lua
diff options
context:
space:
mode:
authorTJ DeVries <devries.timothyj@gmail.com>2021-08-20 11:11:24 -0400
committerGitHub <noreply@github.com>2021-08-20 11:11:24 -0400
commita97af306c4e9c9a6fa7c886c0ffe3079822c5203 (patch)
treef5e2b50a767e93618d0d8fdddb8a964c90633c8a /scratch/bk_tree.lua
parentd6d28dbe324de9826a579155076873888169ba0f (diff)
feat(performance): Major performance improvements using async v2 from @oberblastmeister (#987)
* start: Working w/ async jobs * short circuit to using bad finder if you pass writer.
Diffstat (limited to 'scratch/bk_tree.lua')
-rw-r--r--scratch/bk_tree.lua270
1 files changed, 0 insertions, 270 deletions
diff --git a/scratch/bk_tree.lua b/scratch/bk_tree.lua
deleted file mode 100644
index 3957224..0000000
--- a/scratch/bk_tree.lua
+++ /dev/null
@@ -1,270 +0,0 @@
-
----------------------------
--- bk-tree datastructure
---
--- http://en.wikipedia.org/wiki/BK-tree
--- @module bk-tree
--- @author Robin Hübner
--- robinhubner@gmail.com
--- @release version 1.0.2
--- @license MIT
-
-local bk_tree = {}
-
-
-local function lazy_copy(t1)
-
- local cp = {}
-
- for k, v in pairs(t1) do
- cp[k] = v
- end
-
- return cp
-
-end
-
-local function min(a, b, c)
-
- local min_val = a
-
- if b < min_val then min_val = b end
- if c < min_val then min_val = c end
-
- return min_val
-
-end
-
-----------------------------------
---- Levenshtein distance function.
--- @tparam string s1
--- @tparam string s2
--- @treturn number the levenshtein distance
--- @within Metrics
-function bk_tree.levenshtein_dist(s1, s2)
-
- if s1 == s2 then return 0 end
- if s1:len() == 0 then return s2:len() end
- if s2:len() == 0 then return s1:len() end
- if s1:len() < s2:len() then s1, s2 = s2, s1 end
-
- t = {}
- for i=1, #s1+1 do
- t[i] = {i-1}
- end
-
- for i=1, #s2+1 do
- t[1][i] = i-1
- end
-
- local cost
- for i=2, #s1+1 do
-
- for j=2, #s2+1 do
- cost = (s1:sub(i-1,i-1) == s2:sub(j-1,j-1) and 0) or 1
- t[i][j] = min(
- t[i-1][j] + 1,
- t[i][j-1] + 1,
- t[i-1][j-1] + cost)
- end
-
- end
-
- return t[#s1+1][#s2+1]
-
-end
-
-function bk_tree.hook(param)
-
- local name, callee = debug.getlocal(2, 1)
- local f = debug.getinfo(2, "f").func
- local p = debug.getinfo(3, "f").func
- --[[ previous function in the callstack, if called from the same place,
- don't add to the insert/remove counters. ]]--
-
- if f == bk_tree.insert and p ~= bk_tree.insert then
- callee.stats.nodes = callee.stats.nodes + 1
- elseif f == bk_tree.remove and p ~= bk_tree.remove then
- callee.stats.nodes = callee.stats.nodes - 1
- elseif f == bk_tree.query and p == bk_tree.query then
- callee.stats.queries = callee.stats.queries + 1
- end
-
-end
-
---- Hooks debugging into tree execution.
--- Keeps track of number of nodes created, queries made,
--- note that this must be run directly after tree is created
--- in order to get correct information.
--- @within Debug
---- @usage
--- local bktree = require "bk-tree"
--- local tree = bktree:new("word")
--- tree:debug()
--- tree:insert("perceive")
--- tree:insert("beautiful")
--- tree:insert("definitely")
--- local result = tree:query("definately", 3)
--- tree:print_stats()
---
--- -- output
--- Nodes: 4
--- Queries: 3
--- Nodes Queried: 75%
-function bk_tree:debug()
-
- local nc = 0
- if self.root then nc = 1 end
- self.stats = { nodes = nc, queries = 0 }
- debug.sethook(self.hook, "c")
-
-end
-
---- Print execution stats.
--- Prints nodes queried and total nodes, as well as a fraction of
--- nodes visited to satisfy the query, resets the counter of nodes queried when called.
--- @within Debug
--- @see debug
-function bk_tree:print_stats()
-
- print("\nNodes: " .. self.stats.nodes)
- print("Queries: " .. self.stats.queries)
- print("Nodes Queried: " .. self.stats.queries/self.stats.nodes*100 .. "%\n")
- self.stats.queries = 0
-
-end
-
---- Fetch execution stats.
--- Returns a copy of the execution stats that @{print_stats} would print, requires debug to have been enabled
--- to not just return defaults. Useful if you want to profile things.
--- @within Debug
--- @return {key = value,...}
-function bk_tree:get_stats()
-
- return lazy_copy(self.stats)
-
-end
-
----------------------------
---- Creates a new bk-tree.
--- @constructor
--- @string[opt] root_word the root of the new tree
--- @tparam[opt=levenshtein_dist] function dist_func the distance function used
--- @see levenshtein_dist
--- @return the new bk-tree instance
---- @usage
--- local bktree = require "bk-tree"
--- local tree = bktree:new("word")
-function bk_tree:new(root_word, dist_func)
-
- local n_obj = {}
- if root_word then n_obj.root = { str = root_word, children = {} } end
- n_obj.dist_func = dist_func or self.levenshtein_dist
-
- setmetatable(n_obj, self)
- self.__index = self
-
- return n_obj
-
-end
-
---------------------------------
---- Inserts word into the tree.
--- @string word
--- @treturn bool true if inserted, false if word already exists in tree
---- @usage
--- local bktree = require "bk-tree"
--- local tree = bktree:new("root")
--- local success = tree:insert("other_word")
-function bk_tree:insert(word, node)
-
- node = node or self.root
-
- if not node then
- self.root = { str = word, children = {} }
- return true
- end
-
- local dist = self.dist_func(word, node.str)
- if dist == 0 then return false end
-
- local some_node = node.children[dist]
-
- if not some_node then
- node.children[dist] = { str = word, children = {} }
- return true
- end
-
- return self:insert(word, some_node)
-
-end
-
---------------------------------
---- Query the tree for matches.
--- @string word
--- @tparam number n max edit distance to use when querying
--- @treturn {{str=string,distance=number},....} table of tables with matching words, empty table if no matches
---- @usage
--- local bktree = require "bk-tree"
--- local tree = bktree:new("word")
--- tree:insert("hello")
--- tree:insert("goodbye")
--- tree:insert("woop")
--- local result = tree:query("woop", 1)
-function bk_tree:query(word, n, node, matches)
-
- node = node or self.root
- matches = matches or {}
-
- if not node then return matches end
-
- local dist = self.dist_func(word, node.str)
- if dist <= n then matches[#matches+1] = {str = node.str, distance = dist} end
-
- for k, child in pairs(node.children) do
- if child ~= nil then
- if k >= dist-n and k <= dist+n then
- self:query(word, n, child, matches)
- end
- end
- end
-
- return matches
-
-end
-
----------------------------------------------------------
---- Queries the the tree for a match, sorts the results.
--- Calls @{query} and returns the results sorted.
--- @string word
--- @tparam number n max edit distance to use when querying
--- @treturn {{str=string,distance=number},....} table of tables with matching words sorted by distance, empty table if no matches
---- @usage
--- local bktree = require "bk-tree"
--- local tree = bktree:new("word")
--- tree:insert("woop")
--- tree:insert("worp")
--- tree:insert("warp")
--- local result = tree:query_sorted("woop", 3)
-function bk_tree:query_sorted(word, n)
-
- local result = self:query(word, n)
-
- table.sort(result, function(a,b) return a.distance < b.distance end)
-
- return result
-
-end
-
-
-local tree = bk_tree:new("word")
-tree:insert("hello")
-tree:insert("welp")
-tree:insert("function")
-tree:insert("this long line what")
-tree:insert("what this long line what")
-print(vim.inspect(tree))
-print(vim.inspect(tree:query_sorted("what", 3)))
-
-
-return bk_tree