1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
|
local log = require('telescope.log')
local util = require('telescope.utils')
local sorters = {}
local Sorter = {}
Sorter.__index = Sorter
---@class Sorter
--- Sorter sorts a list of results by return a single integer for a line,
--- given a prompt
---
--- Lower number is better (because it's like a closer match)
--- But, any number below 0 means you want that line filtered out.
--- @field scoring_function function Function that has the interface:
-- (sorter, prompt, line): number
function Sorter:new(opts)
opts = opts or {}
return setmetatable({
state = {},
scoring_function = opts.scoring_function,
}, Sorter)
end
function Sorter:score(prompt, entry)
-- TODO: Decide if we actually want to check the type every time.
return self:scoring_function(prompt, type(entry) == "string" and entry or entry.ordinal)
end
function sorters.new(...)
return Sorter:new(...)
end
sorters.Sorter = Sorter
sorters.get_ngram_sorter = function()
return Sorter:new {
scoring_function = function(_, prompt, line)
if prompt == "" or prompt == nil then
return 1
end
local ok, result = pcall(function()
local ngram = util.new_ngram { N = 4 }
ngram:add(line)
local score = ngram:score(prompt)
if score == 0 then
return -1
end
-- return math.pow(math.max(score, 0.0001), -1)
return score
end)
print(prompt, line, result)
return ok and result or 1
end
}
end
sorters.get_levenshtein_sorter = function()
return Sorter:new {
scoring_function = function(_, prompt, line)
local result = require('telescope.algos.string_distance')(prompt, line)
log.info("Sorting result for", prompt, line, " = ", result)
return result
end
}
end
sorters.get_norcalli_sorter = function()
local ngramlen = 2
local cached_ngrams = {}
local function overlapping_ngrams(s, n)
if cached_ngrams[s] and cached_ngrams[s][n] then
return cached_ngrams[s][n]
end
local R = {}
for i = 1, s:len() - n + 1 do
R[#R+1] = s:sub(i, i+n-1)
end
if not cached_ngrams[s] then
cached_ngrams[s] = {}
end
cached_ngrams[s][n] = R
return R
end
return Sorter:new {
scoring_function = function(_, prompt, line)
if prompt == 0 or #prompt < ngramlen then
return 0
end
local prompt_ngrams = overlapping_ngrams(prompt, ngramlen)
local prompt_lower = prompt:lower()
local line_lower = line:lower()
local N = #prompt
local contains_string = line_lower:find(prompt_lower, 1, true)
local consecutive_matches = 0
local previous_match_index = 0
local match_count = 0
for i = 1, #prompt_ngrams do
local match_start = line_lower:find(prompt_ngrams[i], 1, true)
if match_start then
match_count = match_count + 1
if match_start > previous_match_index then
consecutive_matches = consecutive_matches + 1
end
previous_match_index = match_start
end
end
-- TODO: Copied from ashkan.
local denominator = (
(10 * match_count / #prompt_ngrams)
-- biases for shorter strings
-- TODO(ashkan): this can bias towards repeated finds of the same
-- subpattern with overlapping_ngrams
+ 3 * match_count * ngramlen / #line
+ consecutive_matches
+ N / (contains_string or (2 * #line))
-- + 30/(c1 or 2*N)
)
if denominator == 0 or denominator ~= denominator then
return -1
end
if #prompt > 2 and denominator < 0.5 then
return -1
end
return 1 / denominator
end
}
end
return sorters
|