(defvar *ok-set* "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.," "The list of characters which don't need to be escaped when writing URIs.")
(defun escape-as-uri (string) "Escapes all non alphanumeric characters in STRING following the URI convention. Returns a fresh string." (with-output-to-string (escaped) (write-as-uri string escaped)))
(defun write-as-uri (string stream) (loop for char across string if (find char *ok-set* :test #'char=) do (write-char char stream) else do (format stream "%~2,'0X" (char-code char))))
(defun char->hex-value (char) "Returns the number associated the hex value of CHAR. CHAR must be one of #\0 - #\9, #\a - #\f, #\A - #\F." (ecase char (#\0 0) (#\1 1) (#\2 2) (#\3 3) (#\4 4) (#\5 5) (#\6 6) (#\7 7) (#\8 8) (#\9 9) ((#\a #\A) 10) ((#\b #\B) 11) ((#\c #\C) 12) ((#\d #\D) 13) ((#\e #\E) 14) ((#\f #\F) 15)))
(defun make-escaped-table () (let ((table (make-array '(16 16) :element-type 'character :initial-element #\\))) (dotimes (i 16) (dotimes (j 16) (setf (aref table i j) (code-char (+ (* i 16) j))))) table))
(defvar *unescape-table* (make-escaped-table))
(defun nunescape-as-uri (string) (unescape-as-uri string))
(defun unescape-as-uri (string) (with-output-to-string (unescaped) (loop for index upfrom 0 while (< index (length string)) do (case (aref string index) (#\% (write-char (aref *unescape-table* (char->hex-value (aref string (incf index))) (char->hex-value (aref string (incf index)))) unescaped)) (#\+ (write-char #\Space unescaped)) (t (write-char (aref string index) unescaped))))))
This so blatently wrong its not even funny, and while this is exactly what I need I would do well to start using a "real" html escaping library (there are a couple to choose from).
(defun make-html-entities () (let ((ht (make-hash-table :test 'equal))) (flet ((add-mapping (char escaped) (setf (gethash char ht) escaped (gethash escaped ht) char))) (add-mapping #\< "<") (add-mapping #\> ">") (add-mapping #\& "&") (add-mapping #\" """) (add-mapping "a`" "à") (add-mapping "a'" "á") (add-mapping "e`" "è") (add-mapping "e'" "é") (add-mapping "i'" "ì") (add-mapping "i`" "í") (add-mapping "o`" "ò") (add-mapping "o'" "ó") (add-mapping "u`" "ù") (add-mapping "u'" "ú")) ht))
(defparameter *html-entites* (make-html-entities))
(defun write-as-html (string &key (stream t) (escape-whitespace nil)) (loop for char across string do (cond ((char= char #\Space) (if escape-whitespace (princ " " stream) (write-char char stream))) ((gethash char *html-entites*) (princ (gethash char *html-entites*) stream)) ((> (char-code char) 127) (princ "&#x" stream) (write (char-code char) :stream stream :base 16) (write-char #\; stream)) (t (write-char char stream)))))
(defun escape-as-html (string &key (escape-whitespace nil)) (with-output-to-string (escaped) (write-as-html string :stream escaped :escape-whitespace escape-whitespace)))
(define-condition html-escape-error (error) ((what :accessor html-escape-error.what :initarg :what)))
(define-condition unterminated-html-entity (html-escape-error) ())
(define-condition unknown-html-entity (html-escape-error) ())
(define-condition unknown-char-escape (warning) ((what :accessor html-escape-error.what :initarg :what)))
(defun unescape-as-html (string) (with-output-to-string (unescaped) (loop for offset upfrom 0 below (length string) for char = (aref string offset) if (char= #\& char) do (progn (aif (position #\; string :start offset) (let ((escape-tag (subseq string offset (1+ it)))) (aif (gethash escape-tag *html-entites*) (progn (princ it unescaped) (incf offset (1- (length escape-tag)))) (if (char= #\# (aref escape-tag 1)) ;; special code, ignore (restart-case (warn 'unknown-char-escape :what escape-tag) (continue-delete () :report "Continue processing, delete this char." (incf offset (1- (length escape-tag))))) (restart-case (error 'unknown-html-entity :what escape-tag) (continue-as-is () :report "Continue processing, leaving the string as is." (write-char #\& unescaped)) (continue-delete () :report "Continue processing, delete this entity." (incf offset (1- (length escape-tag)))))))) (restart-case (error 'unterminated-html-entity :what (subseq string offset (min (+ offset 20) (length string)))) (continue-as-is () :report "Continue processing, leave the string as is." (write-char #\& unescaped))))) else do (write-char char unescaped))))