my-emacs-d/elpa/grizzl-0.1.1/grizzl-core.el
2014-10-08 00:03:38 +02:00

227 lines
8.8 KiB
EmacsLisp

;;; grizzl-core.el --- Fast fuzzy search index for Emacs.
;; Copyright © 2013 Chris Corbyn
;;
;; Author: Chris Corbyn <chris@w3style.co.uk>
;; URL: https://github.com/d11wtq/grizzl
;; Version: 0.1.1
;; Keywords: convenience, usability
;; This file is NOT part of GNU Emacs.
;;; --- License
;; Licensed under the same terms as Emacs.
;;; --- Commentary
;; Grizzl provides a fuzzy completion framework for general purpose
;; use in Emacs Lisp projects.
;;
;; grizzl-core.el provides the underlying data structures and sesrch
;; algorithm without any UI attachment. At the core, a fuzzy search
;; index is created from a list of strings, using `grizzl-make-index'.
;; A fuzzy search term is then used to get a result from this index
;; with `grizzl-search'. Because grizzl considers the usage of a
;; fuzzy search index to operate in real-time as a user enters a
;; search term in the minibuffer, the framework optimizes for this use
;; case. Any result can be passed back into `grizzl-search' as a hint
;; to continue searching. The search algorithm is able to understand
;; insertions and deletions and therefore minimizes the work it needs
;; to do in this case. The intended use here is to collect a result
;; on each key press and feed that result into the search for the next
;; key press. Once a search is complete, the matched strings are then
;; read, using `grizzl-result-strings'. The results are ordered on the
;; a combination of the Levenshtein Distance and a character-proximity
;; scoring calculation. This means shorter strings are favoured, but
;; adjacent letters are more heavily favoured.
;;
;; It is assumed that the index will be re-used across multiple
;; searches on larger sets of data.
;;
;;
(eval-when-compile
(require 'cl-lib))
;;; --- Public Functions
;;;###autoload
(defun grizzl-make-index (strings &rest options)
"Makes an index from the list STRINGS for use with `grizzl-search'.
If :PROGRESS-FN is given as a keyword argument, it is called repeatedly
with integers N and TOTAL.
If :CASE-SENSITIVE is specified as a non-nil keyword argument, the index
will be created case-sensitive, otherwise it will be case-insensitive."
(let ((lookup-table (make-hash-table))
(total-strs (length strings))
(case-sensitive (plist-get options :case-sensitive))
(progress-fn (plist-get options :progress-fn))
(string-data (vconcat (mapcar (lambda (s)
(cons s (length s)))
strings))))
(reduce (lambda (list-offset str)
(grizzl-index-insert str list-offset lookup-table
:case-sensitive case-sensitive)
(when progress-fn
(funcall progress-fn (1+ list-offset) total-strs))
(1+ list-offset))
strings
:initial-value 0)
(maphash (lambda (char str-map)
(maphash (lambda (list-offset locations)
(puthash list-offset (reverse locations) str-map))
str-map)) lookup-table)
`((case-sensitive . ,case-sensitive)
(lookup-table . ,lookup-table)
(string-data . ,string-data))))
;;;###autoload
(defun grizzl-search (term index &optional old-result)
"Fuzzy searches for TERM in INDEX prepared with `grizzl-make-index'.
OLD-RESULT may be specified as an existing search result to increment from.
The result can be read with `grizzl-result-strings'."
(let* ((cased-term (if (grizzl-index-case-sensitive-p index)
term
(downcase term)))
(result (grizzl-rewind-result cased-term index old-result))
(matches (copy-hash-table (grizzl-result-matches result)))
(from-pos (length (grizzl-result-term result)))
(remainder (substring cased-term from-pos))
(lookup-table (grizzl-lookup-table index)))
(reduce (lambda (acc-res ch)
(let ((sub-table (gethash ch lookup-table)))
(if (not sub-table)
(clrhash matches)
(grizzl-search-increment sub-table matches))
(grizzl-cons-result cased-term matches acc-res)))
remainder
:initial-value result)))
;;;###autoload
(defun grizzl-result-count (result)
"Returns the number of matches present in RESULT."
(hash-table-count (grizzl-result-matches result)))
;;;###autoload
(defun grizzl-result-strings (result index &rest options)
"Returns the ordered list of matched strings in RESULT, using INDEX.
If the :START option is specified, results are read from the given offset.
If the :END option is specified, up to :END results are returned."
(let* ((matches (grizzl-result-matches result))
(strings (grizzl-index-strings index))
(loaded '())
(start (plist-get options :start))
(end (plist-get options :end)))
(maphash (lambda (string-offset char-offset)
(push string-offset loaded))
matches)
(let* ((ordered (sort loaded
(lambda (a b)
(< (cadr (gethash a matches))
(cadr (gethash b matches))))))
(best (if (or start end)
(delete-if-not 'identity
(subseq ordered (or start 0) end))
ordered)))
(mapcar (lambda (n)
(car (elt strings n)))
best))))
;;; --- Private Functions
(defun grizzl-cons-result (term matches results)
"Build a new result for TERM and hash-table MATCHES consed with RESULTS."
(cons (cons term matches) results))
(defun grizzl-rewind-result (term index result)
"Adjusts RESULT according to TERM, ready for a new search."
(if result
(let* ((old-term (grizzl-result-term result))
(new-len (length term))
(old-len (length old-term)))
(if (and (>= new-len old-len)
(string-equal old-term (substring term 0 old-len)))
result
(grizzl-rewind-result term index (cdr result))))
(grizzl-cons-result "" (grizzl-base-matches index) nil)))
(defun grizzl-base-matches (index)
"Returns the full set of matches in INDEX, with an out-of-bound offset."
(let ((matches (make-hash-table)))
(reduce (lambda (n s-len)
(puthash n (list -1 0 (cdr s-len)) matches)
(1+ n))
(grizzl-index-strings index)
:initial-value 0)
matches))
(defun grizzl-result-term (result)
"Returns the search term used to find the matches in RESULT."
(car (car result)))
(defun grizzl-result-matches (result)
"Returns the internal hash used to track the matches in RESULT."
(cdar result))
(defun grizzl-index-insert (string list-offset index &rest options)
"Inserts STRING at LIST-OFFSET into INDEX."
(let ((case-sensitive (plist-get options :case-sensitive)))
(reduce (lambda (char-offset cs-char)
(let* ((char (if case-sensitive
cs-char
(downcase cs-char)))
(str-map (or (gethash char index)
(puthash char (make-hash-table) index)))
(offsets (gethash list-offset str-map)))
(puthash list-offset
(cons char-offset offsets)
str-map)
(1+ char-offset)))
string
:initial-value 0)))
(defun grizzl-lookup-table (index)
"Returns the lookup table portion of INDEX."
(cdr (assoc 'lookup-table index)))
(defun grizzl-index-strings (index)
"Returns the vector of strings stored in INDEX."
(cdr (assoc 'string-data index)))
(defun grizzl-index-case-sensitive-p (index)
"Predicate to test of INDEX is case-sensitive."
(cdr (assoc 'case-sensitive index)))
(defun grizzl-search-increment (sub-table result)
"Use the search lookup table to filter already-accumulated results."
(cl-flet ((next-offset (key current sub-table)
(find-if (lambda (v)
(> v current))
(gethash key sub-table))))
(maphash (lambda (k v)
(let* ((oldpos (car v))
(oldrank (cadr v))
(len (caddr v))
(newpos (next-offset k oldpos sub-table)))
(if newpos
(puthash k (list newpos
(grizzl-inc-rank oldrank oldpos newpos len)
len)
result)
(remhash k result))))
result)))
(defun grizzl-inc-rank (oldrank oldpos newpos len)
"Increment the current match distance as a new char is matched."
(let ((distance (if (< oldpos 0) 1 (- newpos oldpos))))
(+ oldrank (* len (* distance distance)))))
(provide 'grizzl-core)
;;; grizzl-core.el ends here