180 lines
		
	
	
		
			7.7 KiB
		
	
	
	
		
			EmacsLisp
		
	
	
	
	
	
			
		
		
	
	
			180 lines
		
	
	
		
			7.7 KiB
		
	
	
	
		
			EmacsLisp
		
	
	
	
	
	
;;; sx-encoding.el --- encoding                      -*- lexical-binding: t; -*-
 | 
						||
 | 
						||
;; Copyright (C) 2014  Sean Allred
 | 
						||
 | 
						||
;; Author: Sean Allred <code@seanallred.com>
 | 
						||
 | 
						||
;; This program is free software; you can redistribute it and/or modify
 | 
						||
;; it under the terms of the GNU General Public License as published by
 | 
						||
;; the Free Software Foundation, either version 3 of the License, or
 | 
						||
;; (at your option) any later version.
 | 
						||
 | 
						||
;; This program is distributed in the hope that it will be useful,
 | 
						||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						||
;; GNU General Public License for more details.
 | 
						||
 | 
						||
;; You should have received a copy of the GNU General Public License
 | 
						||
;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
						||
 | 
						||
;;; Commentary:
 | 
						||
 | 
						||
;; This file handles decoding the responses we get from the API.  They
 | 
						||
;; are received either as plain-text or as a `gzip' compressed archive.
 | 
						||
;; For this, `sx-encoding-gzipped-p' is used to determine if content
 | 
						||
;; has been compressed under `gzip'.
 | 
						||
 | 
						||
;;; Code:
 | 
						||
 | 
						||
(require 'cl-lib)
 | 
						||
 | 
						||
 | 
						||
;;;; HTML Encoding
 | 
						||
 | 
						||
(defcustom sx-encoding-html-entities-plist
 | 
						||
  '(Aacute "Á" aacute "á" Acirc "Â" acirc "â" acute "´" AElig "Æ" aelig "æ"
 | 
						||
           Agrave "À" agrave "à" alefsym "ℵ" Alpha "Α" alpha "α" amp "&" and "∧"
 | 
						||
           ang "∠" apos "'" aring "å" Aring "Å" asymp "≈" atilde "ã" Atilde "Ã"
 | 
						||
           auml "ä" Auml "Ä" bdquo "„" Beta "Β" beta "β" brvbar "¦" bull "•"
 | 
						||
           cap "∩" ccedil "ç" Ccedil "Ç" cedil "¸" cent "¢" Chi "Χ" chi "χ"
 | 
						||
           circ "ˆ" clubs "♣" cong "≅" copy "©" crarr "↵" cup "∪" curren "¤"
 | 
						||
           Dagger "‡" dagger "†" darr "↓" dArr "⇓" deg "°" Delta "Δ" delta "δ"
 | 
						||
           diams "♦" divide "÷" eacute "é" Eacute "É" ecirc "ê" Ecirc "Ê" egrave "è"
 | 
						||
           Egrave "È" empty "∅" emsp " " ensp " " Epsilon "Ε" epsilon "ε" equiv "≡"
 | 
						||
           Eta "Η" eta "η" eth "ð" ETH "Ð" euml "ë" Euml "Ë" euro "€"
 | 
						||
           exist "∃" fnof "ƒ" forall "∀" frac12 "½" frac14 "¼" frac34 "¾" frasl "⁄"
 | 
						||
           Gamma "Γ" gamma "γ" ge "≥" gt ">" harr "↔" hArr "⇔" hearts "♥"
 | 
						||
           hellip "…" iacute "í" Iacute "Í" icirc "î" Icirc "Î" iexcl "¡" igrave "ì"
 | 
						||
           Igrave "Ì" image "ℑ" infin "∞" int "∫" Iota "Ι" iota "ι" iquest "¿"
 | 
						||
           isin "∈" iuml "ï" Iuml "Ï" Kappa "Κ" kappa "κ" Lambda "Λ" lambda "λ"
 | 
						||
           lang "〈" laquo "«" larr "←" lArr "⇐" lceil "⌈" ldquo "“" le "≤"
 | 
						||
           lfloor "⌊" lowast "∗" loz "◊" lrm "" lsaquo "‹" lsquo "‘" lt "<"
 | 
						||
           macr "¯" mdash "—" micro "µ" middot "·" minus "−" Mu "Μ" mu "μ"
 | 
						||
           nabla "∇" nbsp " " ndash "–" ne "≠" ni "∋" not "¬" notin "∉"
 | 
						||
           nsub "⊄" ntilde "ñ" Ntilde "Ñ" Nu "Ν" nu "ν" oacute "ó" Oacute "Ó"
 | 
						||
           ocirc "ô" Ocirc "Ô" OElig "Œ" oelig "œ" ograve "ò" Ograve "Ò" oline "‾"
 | 
						||
           omega "ω" Omega "Ω" Omicron "Ο" omicron "ο" oplus "⊕" or "∨" ordf "ª"
 | 
						||
           ordm "º" oslash "ø" Oslash "Ø" otilde "õ" Otilde "Õ" otimes "⊗" ouml "ö"
 | 
						||
           Ouml "Ö" para "¶" part "∂" permil "‰" perp "⊥" Phi "Φ" phi "φ"
 | 
						||
           Pi "Π" pi "π" piv "ϖ" plusmn "±" pound "£" Prime "″" prime "′"
 | 
						||
           prod "∏" prop "∝" Psi "Ψ" psi "ψ" quot "\"" radic "√" rang "〉"
 | 
						||
           raquo "»" rarr "→" rArr "⇒" rceil "⌉" rdquo "”" real "ℜ" reg "®"
 | 
						||
           rfloor "⌋" Rho "Ρ" rho "ρ" rlm "" rsaquo "›" rsquo "’" sbquo "‚"
 | 
						||
           scaron "š" Scaron "Š" sdot "⋅" sect "§" shy "" Sigma "Σ" sigma "σ"
 | 
						||
           sigmaf "ς" sim "∼" spades "♠" sub "⊂" sube "⊆" sum "∑" sup "⊃"
 | 
						||
           sup1 "¹" sup2 "²" sup3 "³" supe "⊇" szlig "ß" Tau "Τ" tau "τ"
 | 
						||
           there4 "∴" Theta "Θ" theta "θ" thetasym "ϑ" thinsp " " thorn "þ" THORN "Þ"
 | 
						||
           tilde "˜" times "×" trade "™" uacute "ú" Uacute "Ú" uarr "↑" uArr "⇑"
 | 
						||
           ucirc "û" Ucirc "Û" ugrave "ù" Ugrave "Ù" uml "¨" upsih "ϒ" Upsilon "Υ"
 | 
						||
           upsilon "υ" uuml "ü" Uuml "Ü" weierp "℘" Xi "Ξ" xi "ξ" yacute "ý"
 | 
						||
           Yacute "Ý" yen "¥" yuml "ÿ" Yuml "Ÿ" Zeta "Ζ" zeta "ζ" zwj "" zwnj "")
 | 
						||
  "Plist of HTML entities and their respective glyphs.
 | 
						||
See `sx-encoding-decode-entities'."
 | 
						||
  :type '(repeat (choice symbol string))
 | 
						||
  :group 'sx)
 | 
						||
 | 
						||
(defun sx-encoding-decode-entities (string)
 | 
						||
  "Decode HTML entities (e.g. \""\") in STRING.
 | 
						||
 | 
						||
Done according to `sx-encoding-html-entities-plist'.  If this
 | 
						||
list does not contain the entity, it is assumed to be a number
 | 
						||
and converted to a string (with `char-to-string').
 | 
						||
 | 
						||
Return the decoded string."
 | 
						||
  (let* ((plist sx-encoding-html-entities-plist)
 | 
						||
         (get-function
 | 
						||
          (lambda (s)
 | 
						||
            (let ((ss (substring s 1 -1)))
 | 
						||
              ;; Handle things like "
 | 
						||
              (or (plist-get plist (intern ss))
 | 
						||
                  ;; Handle things like '
 | 
						||
                  (char-to-string
 | 
						||
                   (string-to-number
 | 
						||
                    ;; Skip the `#'
 | 
						||
                    (substring ss 1))))))))
 | 
						||
    (replace-regexp-in-string "&[^; ]*;" get-function string)))
 | 
						||
 | 
						||
 | 
						||
;;;; Convenience Functions
 | 
						||
 | 
						||
(defun sx-encoding-normalize-line-endings (string)
 | 
						||
  "Normalize the line endings for STRING.
 | 
						||
The API returns strings that use Windows-style line endings.
 | 
						||
These are largely useless in an Emacs environment.  Windows uses
 | 
						||
\"\\r\\n\", Unix uses just \"\\n\".  Deleting \"\\r\" is sufficient for
 | 
						||
conversion."
 | 
						||
  (delete ?\r string))
 | 
						||
 | 
						||
(defun sx-encoding-clean-content (string)
 | 
						||
  "Clean STRING for display.
 | 
						||
Applies `sx-encoding-normalize-line-endings' and
 | 
						||
`sx-encoding-decode-entities' (in that order) to prepare STRING
 | 
						||
for sane display."
 | 
						||
  (sx-encoding-decode-entities
 | 
						||
   (sx-encoding-normalize-line-endings
 | 
						||
    string)))
 | 
						||
 | 
						||
(defun sx-encoding-clean-content-deep (data)
 | 
						||
  "Clean DATA recursively where necessary.
 | 
						||
 | 
						||
If DATA is a list or a vector, map this function over DATA and
 | 
						||
return as the the same type of structure.
 | 
						||
 | 
						||
If DATA is a cons cell (but not a list), use
 | 
						||
`sx-encoding-clean-content-deep' on the `cdr' of DATA.
 | 
						||
 | 
						||
If DATA is a string, return DATA after applying
 | 
						||
`sx-encoding-clean-content'.
 | 
						||
 | 
						||
Otherwise, return DATA.
 | 
						||
 | 
						||
This function is highly specialized for the data structures
 | 
						||
returned by `json-read' via `sx-request-make'.  It may fail in
 | 
						||
some cases."
 | 
						||
  (if (consp data)
 | 
						||
      (if (listp (cdr data))
 | 
						||
          (cl-map #'list #'sx-encoding-clean-content-deep data)
 | 
						||
        (cons (car data) (sx-encoding-clean-content-deep (cdr data))))
 | 
						||
    (cond
 | 
						||
     ((stringp data)
 | 
						||
      (sx-encoding-clean-content data))
 | 
						||
     ((vectorp data)
 | 
						||
      (cl-map #'vector #'sx-encoding-clean-content-deep data))
 | 
						||
     (t data))))
 | 
						||
 | 
						||
 | 
						||
;;;; GZIP
 | 
						||
 | 
						||
(defun sx-encoding-gzipped-p (data)
 | 
						||
  "Check for magic bytes in DATA.
 | 
						||
Check if the first two bytes of a string in DATA match the magic
 | 
						||
numbers identifying the gzip file format.
 | 
						||
 | 
						||
See URL `http://www.gzip.org/zlib/rfc-gzip.html'."
 | 
						||
  ;; Credit: http://emacs.stackexchange.com/a/2978
 | 
						||
  (equal (substring (string-as-unibyte data) 0 2)
 | 
						||
         (unibyte-string 31 139)))
 | 
						||
 | 
						||
(defun sx-encoding-gzipped-buffer-p (buffer)
 | 
						||
  "Check if BUFFER is gzip-compressed.
 | 
						||
See `sx-encoding-gzipped-p'."
 | 
						||
  (with-current-buffer buffer
 | 
						||
    (sx-encoding-gzipped-p
 | 
						||
     (buffer-string))))
 | 
						||
 | 
						||
(defun sx-encoding-gzipped-file-p (file)
 | 
						||
  "Check if the FILE is gzip-compressed.
 | 
						||
See `sx-encoding-gzipped-p'."
 | 
						||
  (let ((first-two-bytes (with-temp-buffer
 | 
						||
                           (set-buffer-multibyte nil)
 | 
						||
                           (insert-file-contents-literally file nil 0 2)
 | 
						||
                           (buffer-string))))
 | 
						||
    (sx-encoding-gzipped-p first-two-bytes)))
 | 
						||
 | 
						||
(provide 'sx-encoding)
 | 
						||
;;; sx-encoding.el ends here
 | 
						||
 | 
						||
;; Local Variables:
 | 
						||
;; indent-tabs-mode: nil
 | 
						||
;; End:
 |