;;;;
;;;; statistics.lsp XLISP-STAT statistics functions
;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
;;;; You may give out copies of this software; for conditions see the file
;;;; COPYING included with this distribution.
;;;;

(provide "stats")

; setf method for select function
(defsetf select set-select)

;;;;
;;;; Data File Reading 
;;;;

(defun count-file-columns (fname)
"Args: (fname)
Returns the number of lisp items on the first nonblank line of file FNAME."
  (with-open-file (f fname)
    (if f
        (let ((line (do ((line (read-line f) (read-line f))) 
                        ((or (null line) (< 0 (length line))) line))))
          (if line
              (with-input-from-string (s line)
                (do ((n 0 (+ n 1)) (eof (gensym))) 
                    ((eq eof (read s nil eof)) n))))))))

(defvar *xlisptable* *readtable*)

(unless (fboundp 'open-file-dialog)
	(defun open-file-dialog (&optional set)
	  (declare (ignore set))
	  (if (system-has-windows)
	      (get-string-dialog "Enter a data file name:")
	      (error "You must provide a file name explicitly"))))

(defun read-data-file (&optional (file (open-file-dialog t)))
"Args:  (file)
Returns a list of all lisp objects in FILE. FILE can be a string or a symbol,
in which case the symbol'f print name is used."
  (if file
      (let ((oldtable *readtable*)
            (oldbreak *breakenable*)
            (eof (gensym)))
        (setq *readtable* *xlisptable*)
        (setq *breakenable* nil)
        (with-open-file (f file)
          (if f
              (unwind-protect
               (do* ((r (read f nil eof) (read f nil eof))
                     (x (list nil))
                     (tail x (cdr tail)))
                    ((eq r eof) (cdr x))
                    (setf (cdr tail) (list r)))
               (setq *breakenable* oldbreak)
               (setq *readtable* oldtable)))))))

;;; New definition to avoid stack size limit in apply
(defun read-data-columns (&optional (file (open-file-dialog t))
                                    (cols (if file 
                                              (count-file-columns file))))
"Args: (&optional file cols)
Reads the data in FILE as COLS columns and returns a list of lists representing the columns."
  (if (and file cols)
      (transpose (split-list (read-data-file file) cols))))

(defun load-data (file)
"Args: (file)
Read in data file from the data examples library."
  (load (merge-pathnames file
			 (make-pathname :directory '(:relative "Data")))))

(defun load-example (file)
"Args: (file)
Read in lisp example file from the examples library."
  (load (merge-pathnames file
			 (make-pathname :directory '(:relative "Examples")))))


;;;;
;;;; Listing and Saving Variables and Functions
;;;;

(defvar *variables* nil)
(defvar *ask-on-redefine* nil)

(defmacro def (symbol value)
"Syntax: (def var form)
VAR is not evaluated and must be a symbol.  Assigns the value of FORM to
VAR and adds VAR to the list *VARIABLES* of def'ed variables. Returns VAR.
If VAR is already bound and the global variable *ASK-ON-REDEFINE*
is not nil then you are asked if you want to redefine the variable."
  `(unless (and *ask-on-redefine*
                (boundp ',symbol)
                (not (y-or-n-p "Variable has a value. Redefine?")))
           (pushnew ',symbol *variables*)
           (setf ,symbol ,value)
           ',symbol))
  
(defun variables ()
"Args:()
Returns a list of the names of all def'ed variables."
  (sort (copy-list *variables*) #'string<=))

;;**** modify to use with-open-file
(defun savevar (vars file)
"Args: (vars file-name-root)
VARS is a symbol or a list of symbols. FILE-NAME-ROOT is a string (or a symbol
whose print name is used) not endinf in .lsp. The VARS and their current values
are written to the file FILE-NAME-ROOT.lsp in a form suitable for use with the
load command."
  (let ((f (open (concatenate 'string (string file) ".lsp")
		 :direction :output))
        (vars (if (consp vars) vars (list vars)))
        (oldbreak *breakenable*))
    (setq *breakenable* nil)
    (unwind-protect
      (mapcar
        (lambda (x)
            (if (objectp (symbol-value x))
                (print `(def ,x ,(send (symbol-value x) :save)) f)
                (print `(def ,x ',(symbol-value x)) f)))
        vars)
      (setq *breakenable* oldbreak)
      (close f))
    vars))

(defun undef (v)
"Args: (v)
If V is the symbol of a defined variable the variable it is unbound and
removed from the list of defined variables. If V is a list of variable
names each is unbound and removed. Returns V."
  (dolist (s (if (listp v) v (list v)))
          (when (member s *variables*)
                (setq *variables* (delete s *variables*))
                (makunbound s)))
  v)
        

;;;;
;;;; Basic Summary Statistics
;;;;

(defun standard-deviation (x)
"Args: (x)
Returns the standard deviation of the elements x. Vector reducing."
  (let ((n (count-elements x))
        (r (- x (mean x))))
    (sqrt (* (mean (* r r)) (/ n (- n 1))))))

(defun quantile (x p)
"Args: (x p)
Returns the P-th quantile(s) of sequence X. P can be a number or a sequence."
  (let* ((x (sort-data x))
         (n (length x))
         (np (* p (- n 1)))
         (low (floor np))
         (high (ceiling np)))
    (/ (+ (select x low) (select x high)) 2)))
    
(defun median (x) 
"Args: (x)
Returns the median of the elements of X."
  (quantile x 0.5))

(defun interquartile-range (x) 
"Args: (number-data)
Returns the interquartile range of the elements of X."
  (apply #'- (quantile x '(0.75 0.25))))

(defun fivnum (x) 
"Args: (number-data)
Returns the five number summary (min, 1st quartile, medinan, 3rd quartile,
max) of the elements X."
  (quantile x '(0 .25 .5 .75 1)))

(defun covariance-matrix (&rest args)
"Args: (&rest args)
Returns the sample covariance matrix of the data columns in ARGS. ARGS may
consist of lists, vectors or matrices."
  (let ((columns (apply #'append 
                        (mapcar (lambda (x) 
                                  (if (matrixp x) (column-list x) (list x)))
                                args))))
    (/ (cross-product (apply #'bind-columns 
                             (- columns (mapcar #'mean columns))))
       (- (length (car columns)) 1))))

;;;;
;;;; Basic Sequence Operations
;;;;

(defun difference (x)
"Args: (x)
Returns differences for a sequence X."
  (let ((n (length x)))
    (- (select x (iseq 1 (1- n))) (select x (iseq 0 (- n 2))))))

(defun rseq (a b num)
"Args: (a b num)
Returns a list of NUM equally spaced points starting at A and ending at B."
  (+ a (* (iseq 0 (1- num)) (/ (- b a) (1- num)))))


;;;;
;;;; Linear Algebra Functions
;;;;

(defun matrix (dim data)
"Args: (dim data)
returns a matrix of dimensions DIM initialized using sequence DATA
in row major order." 
  (let ((dim (coerce dim 'list))
        (data (coerce data 'list)))
    (make-array dim :initial-contents (split-list data (nth 1 dim)))))

#|
(defun print-matrix (a &optional (stream *standard-output*))
"Args: (matrix &optional stream)
Prints MATRIX to STREAM in a nice form that is still machine readable"
  (unless (matrixp a) (error "not a matrix - ~a" a))
  (let ((size (min 15 (max (map-elements #'flatsize a)))))
    (format stream "#2a(~%")
    (dolist (x (row-list a))
            (format stream "    (")
            (let ((n (length x)))
              (dotimes (i n)
                       (let ((y (aref x i)))
                         (cond
                           ((integerp y) (format stream "~vd" size y))
                           ((floatp y) (format stream "~vg" size y))
                           (t (format stream "~va" size y))))
                       (if (< i (- n 1)) (format stream " "))))
            (format stream ")~%"))
    (format stream "   )~%")
    nil))
|#
;; **** temporary modification for new printing -- needs rethinking
(defun print-matrix (a &optional (stream *standard-output*)
		       &key (float-digits 6))
"Args: (matrix &optional stream &key (float-digits 6))
Prints MATRIX to STREAM in a nice form that is still machine readable"
  (unless (matrixp a) (error "not a matrix - ~a" a))
  (let ((float-size (+ 7 float-digits))
	(size 0))
    (map-elements
     #'(lambda (x)
	 (setf size
	       (max size
		    (cond
		     ((floatp x) float-size)
		     ((integerp x) (+ (flatsize x) 4))
		     (t (flatsize x))))))
     a)
    (format stream "#2a(~%")
    (dolist (x (row-list a))
      (format stream "    (")
      (let ((n (length x)))
	(dotimes (i n)
	  (let ((y (aref x i)))
	    (cond
	     ((integerp y) (format stream "~vd~4@t" (- size 4) y))
	     ((floatp y) (format stream "~v,vg" size float-digits y))
	     (t (format stream "~va" size y))))
	  (if (< i (- n 1)) (format stream " "))))
      (format stream ")~%"))
    (format stream "   )~%")
    nil))

(defun array-to-nested-list (array)
  (let ((n (array-rank array))
	(alist (combine array)))
    (do ((i (- n 1) (- i 1)))
	((<= i 0) alist)
	(setf alist (split-list alist (array-dimension array i))))))

(defun solve (a b)
"Args: (a b)
Solves A x = B using LU decomposition and backsolving. B can be a sequence
or a matrix."
  (let ((lu (lu-decomp a)))
    (if (matrixp b)
        (apply #'bind-columns 
               (mapcar #'(lambda (x) (lu-solve lu x)) (column-list b)))
        (lu-solve lu b))))
        
(defun backsolve (a b)
"Args: (a b)
Solves A x = B by backsolving, assuming A is upper triangular. B must be a
sequence. For use with qr-decomp."
  (let* ((n (length b))
         (sol (make-array n)))
    (dotimes (i n)
             (let* ((k (- n i 1))
                    (val (elt b k)))
               (dotimes (j i)
                        (let ((l (- n j 1)))
                          (setq val (- val (* (aref sol l) (aref a k l))))))
               (setf (aref sol k) (/ val (aref a k k)))))
    (if (listp b) (coerce sol 'list) sol)))

(defun eigenvalues (a) 
"Args: (a)
Returns list of eigenvalues of square, symmetric matrix A"
  (first (eigen a)))

(defun eigenvectors (a) 
"Args: (a)
Returns list of eigenvectors of square, symmetric matrix A"
  (second (eigen a)))

(defun accumulate (f s)
"Args: (f s)
Accumulates elements of sequence S using binary function F.
(accumulate #'+ x) returns the cumulative sum of x."
  (let* ((result (list (elt s 0)))
         (tail result))
    (flet ((acc (dummy x)
                (declare (ignore dummy))
                (rplacd tail (list (funcall f (first tail) x)))
                (setf tail (cdr tail))))
      (reduce #'acc s))
    (if (vectorp s) (coerce result 'vector) result)))

(defun cumsum (x)
"Args: (x)
Returns the cumulative sum of X."
  (accumulate #'+ x))

(defun combine (&rest args) 
"Args (&rest args) 
Returns sequence of elements of all arguments."
  (copy-seq (element-seq args)))

(defun lowess (x y &key (f .25) (steps 2) (delta -1) sorted)
"Args: (x y &key (f .25) (steps 2) delta sorted)
Returns (list X YS) with YS the LOWESS fit. F is the fraction of data used for
each point, STEPS is the number of robust iterations. Fits for points within
DELTA of each other are interpolated linearly. If the X values setting SORTED
to T speeds up the computation."
  (multiple-value-bind
   (x y)
   (let ((x (coerce x '(vector c-double)))
	 (y (coerce y '(vector c-double))))
     (if sorted
	 (values x y)
	 (let ((ord (order x)))
	   (values (select x ord) (select y ord)))))
   (let* ((n (length x))
	  (ys (make-array n :element-type 'c-double))
	  (rw (make-array n :element-type 'c-double))
	  (res (make-array n :element-type 'c-double))
	  (delta (if (> delta 0.0) delta (/ (- (max x) (min x)) 50))))
     (when (base-lowess x y n f steps delta ys rw res)
	   (error "bad lowess data"))
     (list (coerce x 'list) (coerce ys 'list)))))

(defparameter *default-smoother-points* 30)

(defun spline (x y &key (xvals *default-smoother-points*))
"Args: (x y &key xvals)
Returns list of x and y values of natural cubic spline interpolation of (X,Y).
X must be strictly increasing. XVALS can be an integer, the number of equally
spaced points to use in the range of X, or it can be a sequence of points at 
which to interpolate."
  (multiple-value-bind
   (n x y ns xs ys)
   (get-smoother-data x y xvals t)
   (let ((work (make-array (* 2 n) :element-type 'c-double)))
     (when (base-spline n x y ns xs ys work) (error "bad spline data"))
     (list (coerce xs 'list) (coerce ys 'list)))))

(defun kernel-dens (x &key
		      (xvals *default-smoother-points*)
		      (width -1.0)
		      (type 'B))
"Args: (x &key xvals width type)
Returns list of x and y values of kernel density estimate of X. XVALS can be an
integer, the number of equally spaced points to use in the range of X, or it
can be a sequence of points at which to interpolate. WIDTH specifies the
window width. TYPE specifies the lernel and should be one of the symbols G, T,
U or B for gaussian, triangular, uniform or bisquare. The default is B."
  (multiple-value-bind
   (n x y ns xs ys)
   (get-smoother-data x nil xvals nil)
   (when (base-kernel-smooth n x y ns xs ys width type)
	 (error "bad data for smoother"))
   (list (coerce xs 'list) (coerce ys 'list))))

(defun kernel-smooth (x y &key
			(xvals *default-smoother-points*)
			(width -1.0)
			(type 'B))
"Args: (x y &key xvals width type)
Returns list of x and y values of kernel smooth of (X,Y). XVALS can be an
integer, the number of equally spaced points to use in the range of X, or it
can be a sequence of points at which to interpolate. WIDTH specifies the
window width. TYPE specifies the lernel and should be one of the symbols G, T,
U or B for Gaussian, triangular, uniform or bisquare. The default is B."
  (multiple-value-bind
   (n x y ns xs ys)
   (get-smoother-data x y xvals t)
   (when (base-kernel-smooth n x y ns xs ys width type)
	 (error "bad data for smoother"))
   (list (coerce xs 'list) (coerce ys 'list))))

;;;;
;;;; Sorting Functions
;;;; (Moved to Lisp since MWERKS qsort is awful)
;;;;

(defun sort-data (x)
  (let ((xlist (if (listp x)
		   (copy-list x)
		 (coerce (compound-data-seq x) 'list))))
    (sort xlist #'sort-data<)))

(defun order (x)
  (let* ((xlist (if (listp x) x (coerce (compound-data-seq x) 'list)))
	 (data (mapcar #'cons xlist (iseq (length xlist))))
	 (sdata (sort data #'order<)))
    (mapcar #'cdr sdata)))

(defun xlisp::make-compound (form seq)
  (cond
   ((listp form) (coerce seq 'list))
   ((stringp form) (error "not a combound data item - ~s" seq))
   ;;**** preserve element type??
   ((vectorp form) (coerce seq 'vector))
   ((arrayp form)
    (make-array (array-dimensions form) :displaced-to (coerce seq 'vector)))
   (t (error "not a combound data item - ~s" seq))))

(defun rank (x) (xlisp::make-compound x (order (order x))))


syntax highlighted by Code2HTML, v. 0.9.1