This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; Implementation of a simple variant of RLE | |
;; Algorithm | |
;; Whenever a character is repeated (appears 2 times), we put a number after it | |
;; which inidicate how many more times it is repeated. | |
;; For instance, "wwwwwwwwbb3333333123" will be decoded into"ww6bb0335123" | |
(defun rle-encode (string) | |
(loop as prev-char = #\Null then char | |
for char in (coerce string 'list) | |
with repeat-times = 0 and stream = (make-string-output-stream) | |
if (and (char= prev-char char) (<= repeat-times 9)) do | |
(incf repeat-times) | |
else if (> repeat-times 0) do | |
(format stream "~a~a~a" prev-char (1- repeat-times) char) | |
(setf repeat-times 0) | |
else do | |
(format stream "~a" char) | |
finally (return (get-output-stream-string stream)))) | |
(defun rle-decode (string) | |
(loop for char in (coerce string 'list) | |
with prev-char = #\Null and repeated = nil and | |
stream = (make-string-output-stream) | |
if repeated do | |
(let* ((repeated-times (parse-integer (string char))) | |
(format-string (format nil "~~0,0,~a,'~a@a" repeated-times prev-char))) | |
(format stream "~@?" format-string prev-char) | |
(setf repeated nil prev-char #\Null)) | |
else if (char= prev-char char) do | |
(setf repeated T) | |
else do | |
(format stream "~a" char) | |
(setf prev-char char) | |
finally (return (get-output-stream-string stream)))) | |
;;Testing | |
(let* ((string "wwwwwwwwbb3333333123") | |
(encoded-string (rle-encode string)) | |
(decoded-string (rle-decode encoded-string))) | |
(format t "original string = ~a ~%" string) | |
(format t "encoded string = ~a ~%" encoded-string) | |
(format t "decoded string = ~a ~%" decoded-string)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; Implementation of static minimum variance huffman algorithm | |
(defun hf-get-symbol-table (string &key (for 'encode)) | |
;; Get the symbol table :for can be either encode or decode | |
;; For encode, the hashtable will use the symobls as keys | |
;; For decode, the hashtable will use the prefix codes as keys | |
(labels ((sort-func (x y) (< (cdr x) (cdr y))) | |
(build-tree (list) | |
(if (<= (length list) 2) | |
(cons (caar list) (caadr list)) | |
(let* ((first-item (first list)) | |
(second-item (second list)) | |
(new-node (cons (cons (car first-item) | |
(car second-item)) | |
(+ (cdr first-item) | |
(cdr second-item))))) | |
(setf list (delete first-item list :test #'equal) | |
list (delete second-item list :test #'equal)) | |
(nconc list (list new-node)) | |
(build-tree (sort list #'sort-func))))) | |
(build-table (tree &optional (prefix nil) | |
(hashtable (make-hash-table :test 'equal))) | |
(if (characterp tree) | |
(if (eql for 'encode) | |
(setf (gethash tree hashtable) | |
prefix) | |
(setf (gethash prefix hashtable) | |
tree)) | |
(progn | |
(build-table (car tree) | |
(concatenate 'string prefix "0") | |
hashtable) | |
(build-table (cdr tree) | |
(concatenate 'string prefix "1") | |
hashtable) | |
hashtable)))) | |
(let* ((list (coerce string 'list)) | |
(unique-list (remove-duplicates list)) | |
(list-with-freq (loop for item in unique-list | |
collecting (cons item | |
(count item list)))) | |
(sorted-list (sort list-with-freq #'sort-func))) | |
(build-table (build-tree sorted-list))))) | |
(defun hf-print-table (string) | |
(let ((table (hf-get-symbol-table string))) | |
(format t "~%~10a ~10a ~10a~%" "Symbol" "Frequency" "Code") | |
(loop for key being each hash-key of table | |
do (format t "~10a ~10a ~10a~%" key (count key string) (gethash key table))))) | |
(defun hf-encode (string &key (destination *standard-output*)) | |
;; encode the given string, return a symbol table for decoding | |
(loop | |
with hashtable = (hf-get-symbol-table string) | |
for ch in (coerce string 'list) | |
do (format destination "~a" (gethash ch hashtable)) | |
finally (return (hf-get-symbol-table string :for 'decode)))) | |
(defun hf-decode (string hashtable &key (destination *standard-output*)) | |
;; decode the given string, according to symbol table | |
(loop with start = 0 | |
as end = 1 then (1+ end) | |
do (let ((hash-value (gethash (subseq string start end) hashtable))) | |
(if hash-value | |
(progn | |
(format destination "~a" hash-value) | |
(setf start end)))) | |
while (< end (length string)))) | |
;; Testing | |
(let* ((string "aaaaaaaaaaaaaaabbbbbbbccccccddddddeeeee") | |
(stream (make-string-output-stream)) | |
(table (hf-encode string :destination stream)) | |
(encoded-string (get-output-stream-string stream))) | |
(format t "original string = ~a~%" string) | |
(hf-print-table string) | |
(format t "~%encoded string = ~a~%" encoded-string) | |
(hf-decode encoded-string table :destination stream) | |
(format t "decoded string = ~a~%" (get-output-stream-string stream))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; Implementation of static shanno-fano algorithm | |
;; Algorithm - (Taken from Wikipedia) | |
;; 1. For a given list of symbols, develop a corresponding list of probabilities | |
;; or frequency counts so that each symbol’s relative frequency of occurrence is known. | |
;; 2. Sort the lists of symbols according to frequency, with the most frequently | |
;; occurring symbols at the left and the least common at the right. | |
;; 3. Divide the list into two parts, with the total frequency counts of the left half | |
;; being as close to the total of the right as possible. | |
;; 4. The left half of the list is assigned the binary digit 0, and | |
;; the right half is assigned the digit 1. This means that the codes for the symbols in the | |
;; first half will all start with 0, and the codes in the second half will all start with 1. | |
;; 5. Recursively apply the steps 3 and 4 to each of the two halves, subdividing groups and | |
;; adding bits to the codes until each symbol has become a corresponding code leaf on the tree. | |
(defun get-divide-pos (sorted-list) | |
;; Get the position to divide the list into two parts (See step 3) | |
(labels ((divide-pos (list pos) | |
(let* ((bfc (loop for x from 0 to (1- pos) | |
summing (cdr (elt list x)))) | |
(afc (loop for x from (1+ pos) to (1- (length list)) | |
summing (cdr (elt list x)))) | |
(cfc (cdr (elt list pos)))) | |
(if (>= (+ afc cfc) bfc) | |
(if (< bfc afc) | |
(1+ pos) | |
pos) | |
(divide-pos list (1- pos)))))) | |
(divide-pos sorted-list (floor (/ (length sorted-list) 2))))) | |
(defun sf-get-symbol-table (string &key (for 'encode)) | |
;; Get the symbol table :for can be either encode or decode | |
;; For encode, the hashtable will use the symobls as keys | |
;; For decode, the hashtable will use the prefix codes as keys | |
(labels ((build-table (list &optional (prefix nil) | |
(hashtable (make-hash-table :test 'equal))) | |
(if (null (rest list)) | |
;; if the list of pairs has left only one pair, step 4 | |
(if (eql for 'encode) | |
(setf (gethash (car (first list)) hashtable) | |
prefix) | |
(setf (gethash prefix hashtable) | |
(car (first list)))) | |
;; else (step 5) divide the list into lhs and rhs, assign 0 and respectively | |
;; recursively apply step 3 and 4 | |
(let* ((pivot (get-divide-pos list))) | |
(build-table (subseq list 0 pivot) | |
(concatenate 'string prefix "0") | |
hashtable) | |
(build-table (subseq list pivot) | |
(concatenate 'string prefix "1") | |
hashtable) | |
;; finally return the hashtable | |
hashtable)))) | |
(let* ((list (coerce string 'list)) ; convert the string into a list of chars (symbol) | |
;; remove reoccuring symbols | |
(unique-list (remove-duplicates list)) | |
;; make a list of pairs in the form (symobol . frequency) | |
(list-with-freq (loop for item in unique-list | |
collecting (cons item | |
(count item list)))) | |
;; sort the list based on the frequency | |
(sorted-list (sort list-with-freq | |
(lambda (x y) (> (cdr x) (cdr y)))))) | |
;; build hash table | |
(build-table sorted-list)))) | |
(defun sf-encode (string &key (destination *standard-output*)) | |
;; encode the given string, return a symbol table for decoding | |
(loop | |
with hashtable = (sf-get-symbol-table string) | |
for ch in (coerce string 'list) | |
do (format destination "~a" (gethash ch hashtable)) | |
finally (return (sf-get-symbol-table string :for 'decode)))) | |
(defun sf-decode (string hashtable &key (destination *standard-output*)) | |
;; decode the given string, according to symbol table | |
(loop with start = 0 | |
as end = 1 then (1+ end) | |
do (let ((hash-value (gethash (subseq string start end) hashtable))) | |
(if hash-value | |
(progn | |
(format destination "~a" hash-value) | |
(setf start end)))) | |
while (< end (length string)))) | |
(defun sf-print-table (string) | |
(let ((table (sf-get-symbol-table string))) | |
(format t "~%~10a ~10a ~10a~%" "Symbol" "Frequency" "Code") | |
(loop for key being each hash-key of table | |
do (format t "~10a ~10a ~10a~%" key (count key string) (gethash key table))))) | |
;; Testing | |
(let* ((string "aaaaaaaaaaaaaaabbbbbbbccccccddddddeeeee") | |
(stream (make-string-output-stream)) | |
(table (sf-encode string :destination stream)) | |
(encoded-string (get-output-stream-string stream))) | |
(format t "original string = ~a~%" string) | |
(sf-print-table string) | |
(format t "~%encoded string = ~a~%" encoded-string) | |
(sf-decode encoded-string table :destination stream) | |
(format t "decoded string = ~a~%" (get-output-stream-string stream))) |