diff options
| author | TJ DeVries <devries.timothyj@gmail.com> | 2021-08-20 11:11:24 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-08-20 11:11:24 -0400 |
| commit | a97af306c4e9c9a6fa7c886c0ffe3079822c5203 (patch) | |
| tree | f5e2b50a767e93618d0d8fdddb8a964c90633c8a /scratch/bk_tree.lua | |
| parent | d6d28dbe324de9826a579155076873888169ba0f (diff) | |
feat(performance): Major performance improvements using async v2 from @oberblastmeister (#987)
* start: Working w/ async jobs
* short circuit to using bad finder if you pass writer.
Diffstat (limited to 'scratch/bk_tree.lua')
| -rw-r--r-- | scratch/bk_tree.lua | 270 |
1 files changed, 0 insertions, 270 deletions
diff --git a/scratch/bk_tree.lua b/scratch/bk_tree.lua deleted file mode 100644 index 3957224..0000000 --- a/scratch/bk_tree.lua +++ /dev/null @@ -1,270 +0,0 @@ - ---------------------------- --- bk-tree datastructure --- --- http://en.wikipedia.org/wiki/BK-tree --- @module bk-tree --- @author Robin Hübner --- robinhubner@gmail.com --- @release version 1.0.2 --- @license MIT - -local bk_tree = {} - - -local function lazy_copy(t1) - - local cp = {} - - for k, v in pairs(t1) do - cp[k] = v - end - - return cp - -end - -local function min(a, b, c) - - local min_val = a - - if b < min_val then min_val = b end - if c < min_val then min_val = c end - - return min_val - -end - ----------------------------------- ---- Levenshtein distance function. --- @tparam string s1 --- @tparam string s2 --- @treturn number the levenshtein distance --- @within Metrics -function bk_tree.levenshtein_dist(s1, s2) - - if s1 == s2 then return 0 end - if s1:len() == 0 then return s2:len() end - if s2:len() == 0 then return s1:len() end - if s1:len() < s2:len() then s1, s2 = s2, s1 end - - t = {} - for i=1, #s1+1 do - t[i] = {i-1} - end - - for i=1, #s2+1 do - t[1][i] = i-1 - end - - local cost - for i=2, #s1+1 do - - for j=2, #s2+1 do - cost = (s1:sub(i-1,i-1) == s2:sub(j-1,j-1) and 0) or 1 - t[i][j] = min( - t[i-1][j] + 1, - t[i][j-1] + 1, - t[i-1][j-1] + cost) - end - - end - - return t[#s1+1][#s2+1] - -end - -function bk_tree.hook(param) - - local name, callee = debug.getlocal(2, 1) - local f = debug.getinfo(2, "f").func - local p = debug.getinfo(3, "f").func - --[[ previous function in the callstack, if called from the same place, - don't add to the insert/remove counters. ]]-- - - if f == bk_tree.insert and p ~= bk_tree.insert then - callee.stats.nodes = callee.stats.nodes + 1 - elseif f == bk_tree.remove and p ~= bk_tree.remove then - callee.stats.nodes = callee.stats.nodes - 1 - elseif f == bk_tree.query and p == bk_tree.query then - callee.stats.queries = callee.stats.queries + 1 - end - -end - ---- Hooks debugging into tree execution. --- Keeps track of number of nodes created, queries made, --- note that this must be run directly after tree is created --- in order to get correct information. --- @within Debug ---- @usage --- local bktree = require "bk-tree" --- local tree = bktree:new("word") --- tree:debug() --- tree:insert("perceive") --- tree:insert("beautiful") --- tree:insert("definitely") --- local result = tree:query("definately", 3) --- tree:print_stats() --- --- -- output --- Nodes: 4 --- Queries: 3 --- Nodes Queried: 75% -function bk_tree:debug() - - local nc = 0 - if self.root then nc = 1 end - self.stats = { nodes = nc, queries = 0 } - debug.sethook(self.hook, "c") - -end - ---- Print execution stats. --- Prints nodes queried and total nodes, as well as a fraction of --- nodes visited to satisfy the query, resets the counter of nodes queried when called. --- @within Debug --- @see debug -function bk_tree:print_stats() - - print("\nNodes: " .. self.stats.nodes) - print("Queries: " .. self.stats.queries) - print("Nodes Queried: " .. self.stats.queries/self.stats.nodes*100 .. "%\n") - self.stats.queries = 0 - -end - ---- Fetch execution stats. --- Returns a copy of the execution stats that @{print_stats} would print, requires debug to have been enabled --- to not just return defaults. Useful if you want to profile things. --- @within Debug --- @return {key = value,...} -function bk_tree:get_stats() - - return lazy_copy(self.stats) - -end - ---------------------------- ---- Creates a new bk-tree. --- @constructor --- @string[opt] root_word the root of the new tree --- @tparam[opt=levenshtein_dist] function dist_func the distance function used --- @see levenshtein_dist --- @return the new bk-tree instance ---- @usage --- local bktree = require "bk-tree" --- local tree = bktree:new("word") -function bk_tree:new(root_word, dist_func) - - local n_obj = {} - if root_word then n_obj.root = { str = root_word, children = {} } end - n_obj.dist_func = dist_func or self.levenshtein_dist - - setmetatable(n_obj, self) - self.__index = self - - return n_obj - -end - --------------------------------- ---- Inserts word into the tree. --- @string word --- @treturn bool true if inserted, false if word already exists in tree ---- @usage --- local bktree = require "bk-tree" --- local tree = bktree:new("root") --- local success = tree:insert("other_word") -function bk_tree:insert(word, node) - - node = node or self.root - - if not node then - self.root = { str = word, children = {} } - return true - end - - local dist = self.dist_func(word, node.str) - if dist == 0 then return false end - - local some_node = node.children[dist] - - if not some_node then - node.children[dist] = { str = word, children = {} } - return true - end - - return self:insert(word, some_node) - -end - --------------------------------- ---- Query the tree for matches. --- @string word --- @tparam number n max edit distance to use when querying --- @treturn {{str=string,distance=number},....} table of tables with matching words, empty table if no matches ---- @usage --- local bktree = require "bk-tree" --- local tree = bktree:new("word") --- tree:insert("hello") --- tree:insert("goodbye") --- tree:insert("woop") --- local result = tree:query("woop", 1) -function bk_tree:query(word, n, node, matches) - - node = node or self.root - matches = matches or {} - - if not node then return matches end - - local dist = self.dist_func(word, node.str) - if dist <= n then matches[#matches+1] = {str = node.str, distance = dist} end - - for k, child in pairs(node.children) do - if child ~= nil then - if k >= dist-n and k <= dist+n then - self:query(word, n, child, matches) - end - end - end - - return matches - -end - ---------------------------------------------------------- ---- Queries the the tree for a match, sorts the results. --- Calls @{query} and returns the results sorted. --- @string word --- @tparam number n max edit distance to use when querying --- @treturn {{str=string,distance=number},....} table of tables with matching words sorted by distance, empty table if no matches ---- @usage --- local bktree = require "bk-tree" --- local tree = bktree:new("word") --- tree:insert("woop") --- tree:insert("worp") --- tree:insert("warp") --- local result = tree:query_sorted("woop", 3) -function bk_tree:query_sorted(word, n) - - local result = self:query(word, n) - - table.sort(result, function(a,b) return a.distance < b.distance end) - - return result - -end - - -local tree = bk_tree:new("word") -tree:insert("hello") -tree:insert("welp") -tree:insert("function") -tree:insert("this long line what") -tree:insert("what this long line what") -print(vim.inspect(tree)) -print(vim.inspect(tree:query_sorted("what", 3))) - - -return bk_tree |
