Entropy calculator in Common Lisp

A snippet I have found very useful when doing information theory. It can assemble characters, numbers, words, or really whatever you want (you have to write the tokenizer) into a PMF and tell you various information-theoretic things about it.

Snippet is released under the GPLv3.

(defun new-pmf ()
  (let ((pmf nil))
    (lambda (msg obj)
          (case msg
                (show pmf)
                (add (let ((found (assoc obj pmf :test #'equalp)))
                        (if found
                                (rplacd found (1+ (cdr found)))
                                (setq pmf (acons obj 1 pmf)))))
                (sum (apply #'+ (mapcar #'cdr pmf)))
                (length (length pmf))))))
 
(defun show-pmf (pmf)
  (funcall pmf 'show nil))
 
(defun add-to-pmf (pmf obj)
  (funcall pmf 'add obj))
 
(defun pmf-sum (pmf)
  (funcall pmf 'sum nil))
 
(defun pmf-length (pmf)
  (funcall pmf 'length nil))
 
(defun frequencies (pmf)
    (mapcar (lambda (x) (cons (car x) (/ (cdr x) (pmf-sum pmf)))) 
            (show-pmf pmf)))
 
(defun entropy (pmf &optional (base 2))
  (- (apply #'+ (mapcar (lambda (x) (* (cdr x) (log (cdr x) base))) 
                        (frequencies pmf)))))
 
(defun chi-squared (pmf)
  (let* ((n (pmf-sum pmf))
         (f (frequencies pmf))
         (p (/ 1 n)))    
    (* n (apply #'+ (mapcar (lambda (x) (* p (expt (/ (- (cdr x) p) p) 2)))
                            f)))))

Leave a Reply

Your email address will not be published. Required fields are marked *