Jun 20, 2013

Simple Data Compression Algorithms in Common Lisp

Common Lisp implementation of some data compression algorithms — Run-Length, Huffman, and Shannon–Fano Encodings — that I have written during my graduate course "Data Compression".

;; Implementation of a simple variant of RLE
;; Algorithm
;; Whenever a character is repeated (appears 2 times), we put a number after it
;; which inidicate how many more times it is repeated.
;; For instance, "wwwwwwwwbb3333333123" will be decoded into"ww6bb0335123"
(defun rle-encode (string)
(loop as prev-char = #\Null then char
for char in (coerce string 'list)
with repeat-times = 0 and stream = (make-string-output-stream)
if (and (char= prev-char char) (<= repeat-times 9)) do
(incf repeat-times)
else if (> repeat-times 0) do
(format stream "~a~a~a" prev-char (1- repeat-times) char)
(setf repeat-times 0)
else do
(format stream "~a" char)
finally (return (get-output-stream-string stream))))
(defun rle-decode (string)
(loop for char in (coerce string 'list)
with prev-char = #\Null and repeated = nil and
stream = (make-string-output-stream)
if repeated do
(let* ((repeated-times (parse-integer (string char)))
(format-string (format nil "~~0,0,~a,'~a@a" repeated-times prev-char)))
(format stream "~@?" format-string prev-char)
(setf repeated nil prev-char #\Null))
else if (char= prev-char char) do
(setf repeated T)
else do
(format stream "~a" char)
(setf prev-char char)
finally (return (get-output-stream-string stream))))
;;Testing
(let* ((string "wwwwwwwwbb3333333123")
(encoded-string (rle-encode string))
(decoded-string (rle-decode encoded-string)))
(format t "original string = ~a ~%" string)
(format t "encoded string = ~a ~%" encoded-string)
(format t "decoded string = ~a ~%" decoded-string))
view raw rle.lisp hosted with ❤ by GitHub
;; Implementation of static minimum variance huffman algorithm
(defun hf-get-symbol-table (string &key (for 'encode))
;; Get the symbol table :for can be either encode or decode
;; For encode, the hashtable will use the symobls as keys
;; For decode, the hashtable will use the prefix codes as keys
(labels ((sort-func (x y) (< (cdr x) (cdr y)))
(build-tree (list)
(if (<= (length list) 2)
(cons (caar list) (caadr list))
(let* ((first-item (first list))
(second-item (second list))
(new-node (cons (cons (car first-item)
(car second-item))
(+ (cdr first-item)
(cdr second-item)))))
(setf list (delete first-item list :test #'equal)
list (delete second-item list :test #'equal))
(nconc list (list new-node))
(build-tree (sort list #'sort-func)))))
(build-table (tree &optional (prefix nil)
(hashtable (make-hash-table :test 'equal)))
(if (characterp tree)
(if (eql for 'encode)
(setf (gethash tree hashtable)
prefix)
(setf (gethash prefix hashtable)
tree))
(progn
(build-table (car tree)
(concatenate 'string prefix "0")
hashtable)
(build-table (cdr tree)
(concatenate 'string prefix "1")
hashtable)
hashtable))))
(let* ((list (coerce string 'list))
(unique-list (remove-duplicates list))
(list-with-freq (loop for item in unique-list
collecting (cons item
(count item list))))
(sorted-list (sort list-with-freq #'sort-func)))
(build-table (build-tree sorted-list)))))
(defun hf-print-table (string)
(let ((table (hf-get-symbol-table string)))
(format t "~%~10a ~10a ~10a~%" "Symbol" "Frequency" "Code")
(loop for key being each hash-key of table
do (format t "~10a ~10a ~10a~%" key (count key string) (gethash key table)))))
(defun hf-encode (string &key (destination *standard-output*))
;; encode the given string, return a symbol table for decoding
(loop
with hashtable = (hf-get-symbol-table string)
for ch in (coerce string 'list)
do (format destination "~a" (gethash ch hashtable))
finally (return (hf-get-symbol-table string :for 'decode))))
(defun hf-decode (string hashtable &key (destination *standard-output*))
;; decode the given string, according to symbol table
(loop with start = 0
as end = 1 then (1+ end)
do (let ((hash-value (gethash (subseq string start end) hashtable)))
(if hash-value
(progn
(format destination "~a" hash-value)
(setf start end))))
while (< end (length string))))
;; Testing
(let* ((string "aaaaaaaaaaaaaaabbbbbbbccccccddddddeeeee")
(stream (make-string-output-stream))
(table (hf-encode string :destination stream))
(encoded-string (get-output-stream-string stream)))
(format t "original string = ~a~%" string)
(hf-print-table string)
(format t "~%encoded string = ~a~%" encoded-string)
(hf-decode encoded-string table :destination stream)
(format t "decoded string = ~a~%" (get-output-stream-string stream)))
view raw huffman.lisp hosted with ❤ by GitHub
;; Implementation of static shanno-fano algorithm
;; Algorithm - (Taken from Wikipedia)
;; 1. For a given list of symbols, develop a corresponding list of probabilities
;; or frequency counts so that each symbol’s relative frequency of occurrence is known.
;; 2. Sort the lists of symbols according to frequency, with the most frequently
;; occurring symbols at the left and the least common at the right.
;; 3. Divide the list into two parts, with the total frequency counts of the left half
;; being as close to the total of the right as possible.
;; 4. The left half of the list is assigned the binary digit 0, and
;; the right half is assigned the digit 1. This means that the codes for the symbols in the
;; first half will all start with 0, and the codes in the second half will all start with 1.
;; 5. Recursively apply the steps 3 and 4 to each of the two halves, subdividing groups and
;; adding bits to the codes until each symbol has become a corresponding code leaf on the tree.
(defun get-divide-pos (sorted-list)
;; Get the position to divide the list into two parts (See step 3)
(labels ((divide-pos (list pos)
(let* ((bfc (loop for x from 0 to (1- pos)
summing (cdr (elt list x))))
(afc (loop for x from (1+ pos) to (1- (length list))
summing (cdr (elt list x))))
(cfc (cdr (elt list pos))))
(if (>= (+ afc cfc) bfc)
(if (< bfc afc)
(1+ pos)
pos)
(divide-pos list (1- pos))))))
(divide-pos sorted-list (floor (/ (length sorted-list) 2)))))
(defun sf-get-symbol-table (string &key (for 'encode))
;; Get the symbol table :for can be either encode or decode
;; For encode, the hashtable will use the symobls as keys
;; For decode, the hashtable will use the prefix codes as keys
(labels ((build-table (list &optional (prefix nil)
(hashtable (make-hash-table :test 'equal)))
(if (null (rest list))
;; if the list of pairs has left only one pair, step 4
(if (eql for 'encode)
(setf (gethash (car (first list)) hashtable)
prefix)
(setf (gethash prefix hashtable)
(car (first list))))
;; else (step 5) divide the list into lhs and rhs, assign 0 and respectively
;; recursively apply step 3 and 4
(let* ((pivot (get-divide-pos list)))
(build-table (subseq list 0 pivot)
(concatenate 'string prefix "0")
hashtable)
(build-table (subseq list pivot)
(concatenate 'string prefix "1")
hashtable)
;; finally return the hashtable
hashtable))))
(let* ((list (coerce string 'list)) ; convert the string into a list of chars (symbol)
;; remove reoccuring symbols
(unique-list (remove-duplicates list))
;; make a list of pairs in the form (symobol . frequency)
(list-with-freq (loop for item in unique-list
collecting (cons item
(count item list))))
;; sort the list based on the frequency
(sorted-list (sort list-with-freq
(lambda (x y) (> (cdr x) (cdr y))))))
;; build hash table
(build-table sorted-list))))
(defun sf-encode (string &key (destination *standard-output*))
;; encode the given string, return a symbol table for decoding
(loop
with hashtable = (sf-get-symbol-table string)
for ch in (coerce string 'list)
do (format destination "~a" (gethash ch hashtable))
finally (return (sf-get-symbol-table string :for 'decode))))
(defun sf-decode (string hashtable &key (destination *standard-output*))
;; decode the given string, according to symbol table
(loop with start = 0
as end = 1 then (1+ end)
do (let ((hash-value (gethash (subseq string start end) hashtable)))
(if hash-value
(progn
(format destination "~a" hash-value)
(setf start end))))
while (< end (length string))))
(defun sf-print-table (string)
(let ((table (sf-get-symbol-table string)))
(format t "~%~10a ~10a ~10a~%" "Symbol" "Frequency" "Code")
(loop for key being each hash-key of table
do (format t "~10a ~10a ~10a~%" key (count key string) (gethash key table)))))
;; Testing
(let* ((string "aaaaaaaaaaaaaaabbbbbbbccccccddddddeeeee")
(stream (make-string-output-stream))
(table (sf-encode string :destination stream))
(encoded-string (get-output-stream-string stream)))
(format t "original string = ~a~%" string)
(sf-print-table string)
(format t "~%encoded string = ~a~%" encoded-string)
(sf-decode encoded-string table :destination stream)
(format t "decoded string = ~a~%" (get-output-stream-string stream)))

Internet connection sharing using iptables

Scenario ― I have several virtualbox GUEST machines, using an internal network on my PC. Now, I wanted to share host machine's internet connection to guest machines. I searched on the web how to do it using iptables. Most of the solutions seem too complex. Here is a working simple solution (found on centos documentation)

On Host Machine — type these commands in Terminal.

thura @ ~ $ sudo iptables -A FORWARD -i vboxnet0 -j ACCEPT
thura @ ~ $ sudo iptables -A FORWARD -o eth1 -j ACCEPT
thura @ ~ $ sudo iptables -t nat -A POSTROUTING -o eth1 -j MASQUERADE

That's it. Now you can access the external network of host machine from guest machines. To access Internet, you may need to edit /etc/resolve.conf in your guest machine.
# Dynamic resolv.conf(5) file for glibc resolver(3) generated by resolvconf(8)
#     DO NOT EDIT THIS FILE BY HAND -- YOUR CHANGES WILL BE OVERWRITTEN
nameserver 8.8.8.8

Jun 19, 2013

if_nametoindex, if_indextoname functions for python using ctypes


I needed to use if_nametoindex, if_indextoname functions for one of my packet capturing programs. So, here is my python wrapper for those functions using ctypes.


import ctypes
import ctypes.util
libc = ctypes.CDLL(ctypes.util.find_library('c'))
def if_nametoindex (name):
if not isinstance (name, str):
raise TypeError ('name must be a string.')
ret = libc.if_nametoindex (name)
if not ret:
raise RunTimeError ("Invalid Name")
return ret
def if_indextoname (index):
if not isinstance (index, int):
raise TypeError ('index must be an int.')
libc.if_indextoname.argtypes = [ctypes.c_uint32, ctypes.c_char_p]
libc.if_indextoname.restype = ctypes.c_char_p
ifname = ctypes.create_string_buffer (32)
ifname = libc.if_indextoname (index, ifname)
if not ifname:
raise RuntimeError ("Inavlid Index")
return ifname
PS: You will need this only for python2.x. Those functions are available in socket module starting since python 3.3.

Jun 13, 2013

Using Prolink Wireless Nano USB Adapter on Ubuntu

Recently, I purchased a prolink nano usb wireless adapter WN2001. However, when I tried to plug it into my ubuntu box, I am getting "hardware disabled errors".

trhura @ ~ $ sudo rfkill list
0: phy0: Wireless LAN
    Soft blocked: no
    Hard blocked: yes

This can be fixed by reloading the wireless module with swenc option.

trhura @ ~ $ sudo rmmod -f rtl8192cu; sudo modprobe rtl8192cu swenc=1 debug=5