227 lines
8.8 KiB
EmacsLisp
227 lines
8.8 KiB
EmacsLisp
|
;;; grizzl-core.el --- Fast fuzzy search index for Emacs.
|
||
|
|
||
|
;; Copyright © 2013 Chris Corbyn
|
||
|
;;
|
||
|
;; Author: Chris Corbyn <chris@w3style.co.uk>
|
||
|
;; URL: https://github.com/d11wtq/grizzl
|
||
|
;; Version: 0.1.1
|
||
|
;; Keywords: convenience, usability
|
||
|
|
||
|
;; This file is NOT part of GNU Emacs.
|
||
|
|
||
|
;;; --- License
|
||
|
|
||
|
;; Licensed under the same terms as Emacs.
|
||
|
|
||
|
;;; --- Commentary
|
||
|
|
||
|
;; Grizzl provides a fuzzy completion framework for general purpose
|
||
|
;; use in Emacs Lisp projects.
|
||
|
;;
|
||
|
;; grizzl-core.el provides the underlying data structures and sesrch
|
||
|
;; algorithm without any UI attachment. At the core, a fuzzy search
|
||
|
;; index is created from a list of strings, using `grizzl-make-index'.
|
||
|
;; A fuzzy search term is then used to get a result from this index
|
||
|
;; with `grizzl-search'. Because grizzl considers the usage of a
|
||
|
;; fuzzy search index to operate in real-time as a user enters a
|
||
|
;; search term in the minibuffer, the framework optimizes for this use
|
||
|
;; case. Any result can be passed back into `grizzl-search' as a hint
|
||
|
;; to continue searching. The search algorithm is able to understand
|
||
|
;; insertions and deletions and therefore minimizes the work it needs
|
||
|
;; to do in this case. The intended use here is to collect a result
|
||
|
;; on each key press and feed that result into the search for the next
|
||
|
;; key press. Once a search is complete, the matched strings are then
|
||
|
;; read, using `grizzl-result-strings'. The results are ordered on the
|
||
|
;; a combination of the Levenshtein Distance and a character-proximity
|
||
|
;; scoring calculation. This means shorter strings are favoured, but
|
||
|
;; adjacent letters are more heavily favoured.
|
||
|
;;
|
||
|
;; It is assumed that the index will be re-used across multiple
|
||
|
;; searches on larger sets of data.
|
||
|
;;
|
||
|
;;
|
||
|
|
||
|
(eval-when-compile
|
||
|
(require 'cl-lib))
|
||
|
|
||
|
;;; --- Public Functions
|
||
|
|
||
|
;;;###autoload
|
||
|
(defun grizzl-make-index (strings &rest options)
|
||
|
"Makes an index from the list STRINGS for use with `grizzl-search'.
|
||
|
|
||
|
If :PROGRESS-FN is given as a keyword argument, it is called repeatedly
|
||
|
with integers N and TOTAL.
|
||
|
|
||
|
If :CASE-SENSITIVE is specified as a non-nil keyword argument, the index
|
||
|
will be created case-sensitive, otherwise it will be case-insensitive."
|
||
|
(let ((lookup-table (make-hash-table))
|
||
|
(total-strs (length strings))
|
||
|
(case-sensitive (plist-get options :case-sensitive))
|
||
|
(progress-fn (plist-get options :progress-fn))
|
||
|
(string-data (vconcat (mapcar (lambda (s)
|
||
|
(cons s (length s)))
|
||
|
strings))))
|
||
|
(reduce (lambda (list-offset str)
|
||
|
(grizzl-index-insert str list-offset lookup-table
|
||
|
:case-sensitive case-sensitive)
|
||
|
(when progress-fn
|
||
|
(funcall progress-fn (1+ list-offset) total-strs))
|
||
|
(1+ list-offset))
|
||
|
strings
|
||
|
:initial-value 0)
|
||
|
(maphash (lambda (char str-map)
|
||
|
(maphash (lambda (list-offset locations)
|
||
|
(puthash list-offset (reverse locations) str-map))
|
||
|
str-map)) lookup-table)
|
||
|
`((case-sensitive . ,case-sensitive)
|
||
|
(lookup-table . ,lookup-table)
|
||
|
(string-data . ,string-data))))
|
||
|
|
||
|
;;;###autoload
|
||
|
(defun grizzl-search (term index &optional old-result)
|
||
|
"Fuzzy searches for TERM in INDEX prepared with `grizzl-make-index'.
|
||
|
|
||
|
OLD-RESULT may be specified as an existing search result to increment from.
|
||
|
The result can be read with `grizzl-result-strings'."
|
||
|
(let* ((cased-term (if (grizzl-index-case-sensitive-p index)
|
||
|
term
|
||
|
(downcase term)))
|
||
|
(result (grizzl-rewind-result cased-term index old-result))
|
||
|
(matches (copy-hash-table (grizzl-result-matches result)))
|
||
|
(from-pos (length (grizzl-result-term result)))
|
||
|
(remainder (substring cased-term from-pos))
|
||
|
(lookup-table (grizzl-lookup-table index)))
|
||
|
(reduce (lambda (acc-res ch)
|
||
|
(let ((sub-table (gethash ch lookup-table)))
|
||
|
(if (not sub-table)
|
||
|
(clrhash matches)
|
||
|
(grizzl-search-increment sub-table matches))
|
||
|
(grizzl-cons-result cased-term matches acc-res)))
|
||
|
remainder
|
||
|
:initial-value result)))
|
||
|
|
||
|
;;;###autoload
|
||
|
(defun grizzl-result-count (result)
|
||
|
"Returns the number of matches present in RESULT."
|
||
|
(hash-table-count (grizzl-result-matches result)))
|
||
|
|
||
|
;;;###autoload
|
||
|
(defun grizzl-result-strings (result index &rest options)
|
||
|
"Returns the ordered list of matched strings in RESULT, using INDEX.
|
||
|
|
||
|
If the :START option is specified, results are read from the given offset.
|
||
|
If the :END option is specified, up to :END results are returned."
|
||
|
(let* ((matches (grizzl-result-matches result))
|
||
|
(strings (grizzl-index-strings index))
|
||
|
(loaded '())
|
||
|
(start (plist-get options :start))
|
||
|
(end (plist-get options :end)))
|
||
|
(maphash (lambda (string-offset char-offset)
|
||
|
(push string-offset loaded))
|
||
|
matches)
|
||
|
(let* ((ordered (sort loaded
|
||
|
(lambda (a b)
|
||
|
(< (cadr (gethash a matches))
|
||
|
(cadr (gethash b matches))))))
|
||
|
(best (if (or start end)
|
||
|
(delete-if-not 'identity
|
||
|
(subseq ordered (or start 0) end))
|
||
|
ordered)))
|
||
|
(mapcar (lambda (n)
|
||
|
(car (elt strings n)))
|
||
|
best))))
|
||
|
|
||
|
;;; --- Private Functions
|
||
|
|
||
|
(defun grizzl-cons-result (term matches results)
|
||
|
"Build a new result for TERM and hash-table MATCHES consed with RESULTS."
|
||
|
(cons (cons term matches) results))
|
||
|
|
||
|
(defun grizzl-rewind-result (term index result)
|
||
|
"Adjusts RESULT according to TERM, ready for a new search."
|
||
|
(if result
|
||
|
(let* ((old-term (grizzl-result-term result))
|
||
|
(new-len (length term))
|
||
|
(old-len (length old-term)))
|
||
|
(if (and (>= new-len old-len)
|
||
|
(string-equal old-term (substring term 0 old-len)))
|
||
|
result
|
||
|
(grizzl-rewind-result term index (cdr result))))
|
||
|
(grizzl-cons-result "" (grizzl-base-matches index) nil)))
|
||
|
|
||
|
(defun grizzl-base-matches (index)
|
||
|
"Returns the full set of matches in INDEX, with an out-of-bound offset."
|
||
|
(let ((matches (make-hash-table)))
|
||
|
(reduce (lambda (n s-len)
|
||
|
(puthash n (list -1 0 (cdr s-len)) matches)
|
||
|
(1+ n))
|
||
|
(grizzl-index-strings index)
|
||
|
:initial-value 0)
|
||
|
matches))
|
||
|
|
||
|
(defun grizzl-result-term (result)
|
||
|
"Returns the search term used to find the matches in RESULT."
|
||
|
(car (car result)))
|
||
|
|
||
|
(defun grizzl-result-matches (result)
|
||
|
"Returns the internal hash used to track the matches in RESULT."
|
||
|
(cdar result))
|
||
|
|
||
|
(defun grizzl-index-insert (string list-offset index &rest options)
|
||
|
"Inserts STRING at LIST-OFFSET into INDEX."
|
||
|
(let ((case-sensitive (plist-get options :case-sensitive)))
|
||
|
(reduce (lambda (char-offset cs-char)
|
||
|
(let* ((char (if case-sensitive
|
||
|
cs-char
|
||
|
(downcase cs-char)))
|
||
|
(str-map (or (gethash char index)
|
||
|
(puthash char (make-hash-table) index)))
|
||
|
(offsets (gethash list-offset str-map)))
|
||
|
(puthash list-offset
|
||
|
(cons char-offset offsets)
|
||
|
str-map)
|
||
|
(1+ char-offset)))
|
||
|
string
|
||
|
:initial-value 0)))
|
||
|
|
||
|
(defun grizzl-lookup-table (index)
|
||
|
"Returns the lookup table portion of INDEX."
|
||
|
(cdr (assoc 'lookup-table index)))
|
||
|
|
||
|
(defun grizzl-index-strings (index)
|
||
|
"Returns the vector of strings stored in INDEX."
|
||
|
(cdr (assoc 'string-data index)))
|
||
|
|
||
|
(defun grizzl-index-case-sensitive-p (index)
|
||
|
"Predicate to test of INDEX is case-sensitive."
|
||
|
(cdr (assoc 'case-sensitive index)))
|
||
|
|
||
|
(defun grizzl-search-increment (sub-table result)
|
||
|
"Use the search lookup table to filter already-accumulated results."
|
||
|
(cl-flet ((next-offset (key current sub-table)
|
||
|
(find-if (lambda (v)
|
||
|
(> v current))
|
||
|
(gethash key sub-table))))
|
||
|
(maphash (lambda (k v)
|
||
|
(let* ((oldpos (car v))
|
||
|
(oldrank (cadr v))
|
||
|
(len (caddr v))
|
||
|
(newpos (next-offset k oldpos sub-table)))
|
||
|
(if newpos
|
||
|
(puthash k (list newpos
|
||
|
(grizzl-inc-rank oldrank oldpos newpos len)
|
||
|
len)
|
||
|
result)
|
||
|
(remhash k result))))
|
||
|
result)))
|
||
|
|
||
|
(defun grizzl-inc-rank (oldrank oldpos newpos len)
|
||
|
"Increment the current match distance as a new char is matched."
|
||
|
(let ((distance (if (< oldpos 0) 1 (- newpos oldpos))))
|
||
|
(+ oldrank (* len (* distance distance)))))
|
||
|
|
||
|
(provide 'grizzl-core)
|
||
|
|
||
|
;;; grizzl-core.el ends here
|