用Common Lisp实现的一个Huffman编码程序,

scinart posted @ 2013年4月28日 18:47 in Lisp with tags common lisp Huffman tree , 1641 阅读

;;; Time-stamp: <2013-04-28 18:28:34 scinart>

;;; by Scinart akukeej@gmail.com

;;; modified at 2013-04-28 Sunday 17:36:07

(defun string-to-list (str lst)
  "from \"abc\" to '(\#a \#b \#c)"
  (if (= 0 (length str))
      lst
      (string-to-list (subseq str 1) (cons (char str 0) lst))))
(defun occurrences (lst)
  "frome '(A B A) to '((A . 2) (B . 1))"
  (labels ((insert (mem array)
		   (let ((result (member mem array :key #'car)))
		     (cond (result (incf (cdar result)) array)
			   (t (push (cons mem 1) array)))))
	   (occurrences-aid (lst array)
			    (if (null lst)
				(sort array #'(lambda (a b) (< (cdr a) (cdr b))))
			      (let ((new-array (insert (car lst) array)))
				(occurrences-aid (cdr lst) new-array)))))
    (occurrences-aid lst nil)))
(defstruct (huffman (:conc-name h-)
		    (:print-function huffman-print))
  element
  frequency
  (l nil)
  (r nil))
(defun huffman-print (huff stream depth)
  "pretty print huffman tree"
  (when (not (null (h-r huff)))
    (huffman-print (h-r huff) stream (+ 1 depth)))
  (when (not (null (h-element huff)))
    (dotimes (i depth)
      (format stream "    "))
    (format stream "~A:~A~%" (h-element huff) (h-frequency huff)))
  (when (null (h-element huff))
    (dotimes (i depth)
      (format stream "    "))
    (format stream "#S:~A~%" (h-frequency huff)))
  (when (not (null (h-l huff)))
    (huffman-print (h-l huff) stream (+ 1 depth))))
(defun combine (a b)
  "Combine to huffman to a new father huffman
   set element to nil, and frequency the sum"
  (make-huffman :element nil
	    :frequency (+ (h-frequency a) (h-frequency b))
	    :l a
	    :r b))
(defun huffman-encode (huff)
  "encode a huffman tree which is not a leaf. pretty printed."
  (labels ((add0 (elt)
	     (cons (car elt) (concatenate 'string "0" (cdr elt))))
	   (add1 (elt)
	     (cons (car elt) (concatenate 'string "1" (cdr elt)))))
    (if (null (h-element huff))
	(append (mapcar #'add0 (huffman-encode (h-l huff)))
		(mapcar #'add1 (huffman-encode (h-r huff))))
	(list (cons (h-element huff) "")))))
(defun huffman-sort (&optional (< #'<))
  "return the function compares A.frequency and  B.frequency"
  #'(lambda (A B) (< (h-frequency A) (h-frequency B))))
(defun huffman-encode-string (str)
  "huffman encode a string"
  ;; test is a sorted list of huffman leaves.
  (let ((test (sort (mapcar #'(lambda (x)
				(make-huffman :element (car x)
					      :frequency (cdr x)))
			    (occurrences (string-to-list str nil)))
		    (huffman-sort))))
    ;; this loop reduce test to a huffman root.
    (loop
       (when (<= (length test) 1)
	 (return))
   ;; the sort here is not effective, should be replaced by binary insertion sort or maintained by a priorty queue.
   (setf test (sort (cons (combine (first test) (second test)) (nthcdr 2 test))
		    (huffman-sort))))
    (huffman-encode (car test))))
(huffman-encode-string "abracadabra")
(sort (huffman-encode-string "emacs acronyms
emacs: escape-meta-alt-control-shift
emacs: eight megabytes and constantly swapping
emacs: even a master of arts comes simpler
emacs: emacs manuals are cryptic and surreal
emacs: energetic merchants always cultivate sales
emacs: each manual's audience is completely stupified
emacs: emacs means a crappy screen
emacs: eventually munches all computer storage
emacs: even my aunt crashes the system
emacs: eradication of memory accomplished with complete simplicity
emacs: elsewhere maybe alternative civilizations survive
emacs: egregious managers actively court stallman
emacs: esoteric malleability always considered silly
emacs: emacs manuals always cause senility
emacs: easily maintained with the assistance of chemical solutions
emacs: edwardian manifestation of all colonial sins
emacs: extended macros are considered superfluous
emacs: every mode accelerates creation of software
emacs: elsewhere maybe all commands are simple
emacs: emacs may allow customised screwups
emacs: excellent manuals are clearly suppressed
emacs: emetic macros assault core and segmentation
emacs: embarrassed manual-writer accused of communist subversion
emacs: extensibility and modifiability aggravate confirmed simpletons
emacs: emacs may annihilate command structures
emacs: easily mangles, aborts, crashes and stupifies
emacs: extraneous macros and commands stink
emacs: exceptionally mediocre algorithm for computer scientists
emacs: emacs makes no allowances considering its stiff price
emacs: equine mammals are considerably smaller
emacs: embarrassingly mundane advertising cuts sales
emacs: every moron assumes cca is superior
emacs: exceptionally mediocre autocratic control system
emacs: emacs may alienate clients and supporters
emacs: excavating mayan architecture comes simpler
emacs: erasing minds allows complete submission
emacs: emacs makers are crazy sickos
emacs: eenie-meenie-miney-mo- macros are completely slow
emacs: experience the mildest ad campaign ever seen
emacs: emacs makefiles annihilate c- shells
emacs: eradication of memory accomplished with complete simplicity
emacs: emetic macros assault core and segmentation
emacs: epileptic mlisp aggravates compiler seizures
emacs: evenings, mornings, and a couple of saturdays
emacs: emacs makes all computing simple
emacs: emacs masquerades as comfortable shell
emacs: emacs: my alternative computer story
emacs: emacs made almost completely screwed
emacs: each mail a continued surprise
emacs: every mode acknowledges customized strokes
emacs: eating memory and cycle-sucking
emacs: everyday material almost compiled successfully
emacs: elvis masterminds all computer software
emacs: emacs makes a computer slow") #'(lambda (x y) (< (length (cdr x)) (length (cdr y)))))

sbcl的运行结果:

((#\s . "000") (#\a . "001") (#\  . "010") (#\e . "100") (#\i . "0110") (#\c . "1010") (#\m . "1011") (#\o . "11001") (#\n . "11011") (#\r . "11101") (#\l . "11110") (#\t . "11111") (#\p . "011111") (#\y . "110000") (#\Newline . "110100") (#\d . "110101") (#\: . "111000") (#\u . "111001") (#\b . "0111000") (#\w . "0111010") (#\v . "0111100") (#\f . "0111101") (#\h . "1100010") (#\g . "1100011") (#\x . "01110010") (#\k . "01110011") (#\- . "01110111") (#\z . "011101100") (#\, . "0111011011") (#\' . "01110110100") (#\q . "01110110101"))


登录 *


loading captcha image...
(输入验证码)
or Ctrl+Enter