;;                 COPYRIGHT NOTICE
;;  
;;  Copyright (C) 2006 Mario Rodriguez Riotorto
;;  
;;  This program is free software; you can redistribute
;;  it and/or modify it under the terms of the
;;  GNU General Public License as published by
;;  the Free Software Foundation; either version 2 
;;  of the License, or (at your option) any later version. 
;;  
;;  This program is distributed in the hope that it
;;  will be useful, but WITHOUT ANY WARRANTY;
;;  without even the implied warranty of MERCHANTABILITY
;;  or FITNESS FOR A PARTICULAR PURPOSE. See the 
;;  GNU General Public License for more details at
;;  http://www.gnu.org/copyleft/gpl.html

;;  This is a set of numerical routines used by package 'stats'

;; For questions, suggestions, bugs and the like, feel free
;; to contact me at
;; mario @@@ edu DOT xunta DOT es
;; www.biomates.net


(in-package :maxima)

;; para traducir funciones de usuario a lisp:
;; (meval '($translate $random_normal))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;                                       ;;
;;    Wilcoxon-Mann-Whitney recursion    ;;
;;                                       ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;  This recursion is used when computing exact probabilities
;;  for the distribution of the rank statistic in the Wilcoxon-
;;  Mann-Whitney test, specially for small samples. It's defined as
;;             a(i,j,k) = a(i,j-1,j-k) + a(i-k,j-1,j-k-1)
;;  Reference:
;;     Klotz, Jerome (2006) 'A Computational Approach to Statistics',
;;          available at www.stat.wisc.edu/~klotz/Book.pdf
(defun rank_sum_recursion (i j k)
    (cond ((or (< i 0) (< j 0) (< k 0)) 0)
          ((= i 0) 1)
          (t (+ (rank_sum_recursion i (- j 1) (- j k))
                (rank_sum_recursion (- i k) (- j 1) (+ j (- k) (- 1)) )))))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;                                       ;;
;;        Signed rank recursion          ;;
;;                                       ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;  This recursion is used when computing exact probabilities
;;  for the true distribution of statistic in the signed rank test,
;;  specially for small samples. It's defined as
;;             a(i,j) = a(i,j-1) + a(i-j,j-1)
;;  Reference:
;;     Klotz, Jerome (2006) 'A Computational Approach to Statistics',
;;          available at www.stat.wisc.edu/~klotz/Book.pdf
;;     R statistical package. File signrank.c
(defun signed_rank_recursion (i j)
  (let* ((u (/ (* j (+ j 1)) 2))
         (c (floor (/ u 4))))
    (if (> i c) (setf i (- u i)))
    (cond ((or (< i 0) (< j 0)) 0)
          ((= i 0) 1)
          (t (+ (signed_rank_recursion i (- j 1))
                (signed_rank_recursion (- i j) (- j 1)))))))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;                                       ;;
;;    Shapiro-Wilk test for normality    ;;
;;                                       ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Calculates the algebraic polynomial of order (- (lengh cc) 1)
;; with array of coefficients cc. Zero order coefficient is (aref cc 0).
(defun swpoly (cc x)
  (let ( (ret-val (aref cc 0))
         (nord (length cc))
         p)
    (when (> nord 1) 
      (setf p (* x (aref cc (1- nord))))
      (do ((j (- nord 2) (1- j)))
          ((= j 0) 'done)
        (setf p (* x (+ p (aref cc j)))))
      (setf ret-val (+ ret-val p)) )
    ret-val ) )


;;  Calculates the Shapiro-Wilk test and its significance level
;;  This is a translation from Fortran to Lisp of swilk.f, which translation
;;  to C is used by the R statistical package. This is
;;  Algorithm AS R94, Applied Statistics (1995), vol.44, no.4, 547-551
;;  Argument 'x' is a Maxima list containing numbers. Optional argument 'n1'
;;  is the number of uncensored observations; this option is not documented
;;  in Maxima at this moment, since I haven't checked it; in fact, the R 
;;  function 'shapiro.test' doesn't make use of this second argument (I don't know why).
(defun $test_normality (x &optional n1)
   (if (not ($listp x))
      (merror "First argument of 'test_normality' must be a Maxima list"))
   (setf x (sort (map 'list #'$float (rest x)) #'<))
   (if (null n1) (setf n1 (length x)))
   (let* (($numer $stats_numer)
          (n (length x))
          (n2 (floor (/ n 2)))
          w pw  ; W statistic and its p-value
          ; initialized data
          (z90 1.2816)  (z95 1.6449)
          (z99 2.3263)  (zm 1.7509)   (zss 0.56268)
          (bf1 0.8378)  (xx90 0.556)  (xx95 0.622)
          (sqrth 0.70711)  (small 1e-19)  (pi6 1.909859) (stqr 1.047198)
          ; polynomial coefficients
          (g (make-array 2 :element-type 'double-float
               :initial-contents '(-2.273 0.459)))
          (c1 (make-array 6 :element-type 'double-float
               :initial-contents '(0.0 0.221157 -0.147981 -2.07119 4.434685 -2.706056)))
          (c2 (make-array 6 :element-type 'double-float
               :initial-contents '(0.0 0.042981 -0.293762 -1.752461 5.682633 -3.582633)))
          (c3 (make-array 4 :element-type 'double-float
               :initial-contents '(0.544 -0.39978 0.025054 -6.714e-4)))
          (c4 (make-array 4 :element-type 'double-float
               :initial-contents '(1.3822 -0.77857 0.062767 -0.0020322)))
          (c5 (make-array 4 :element-type 'double-float
               :initial-contents '(-1.5861 -0.31082 -0.083751 0.0038915)))
          (c6 (make-array 3 :element-type 'double-float
               :initial-contents '(-0.4803 -0.082676 0.0030302)))
          (c7 (make-array 2 :element-type 'double-float
               :initial-contents '(0.164 0.533)))
          (c8 (make-array 2 :element-type 'double-float
               :initial-contents '(0.1736 0.315)))
          (c9 (make-array 2 :element-type 'double-float
               :initial-contents '(0.256 -0.00635)))
          ncens nn2 i1 range delta a2 a1 an25 an rsn fac ssumm2 summ2 xi xx y w1 
          ssassx xsx asa sax ssa ssx sx sa zbar zsd zfm z99f z95f 
          z90f bf ld s m gamma a)

      (setf an (coerce n 'double-float))
      (setf ncens (- n n1))   ; number of censored observations
      (setf nn2 (truncate n 2))
      (if (or (< n 3) (> n 5000))
         (merror "Sample size must be between 3 and 5000"))
      (setf range (- (car (last x)) (first x)))
      (if (< range small)
         (merror "All observations are identical"))
      (if (< n1 3) (merror "Number of uncensored observations is too small"))
      (if (< ncens 0) 
         (merror "Number of uncensored observations is greater than total sample size!!"))
      (if (and (> ncens 0) (< n 20))
         (merror "Sample size is too small, and censored data are not allowed"))
      (setf delta (/ ncens an))
      (if (> delta 0.8)
         (merror "Ratio of censored observations is too great (>80%)"))

      ; calculate coefficients for statistic w
      (setf a (make-array (1+ n2) :element-type 'double-float :initial-element 0.0))
      (cond ((= n 3)
                (setf (aref a 1) sqrth))
            (t  (setf an25 (+ 0.25 an))
                (setf summ2 0.0)
                (do ((i 1 (1+ i)))
                   ((> i n2) 'done)
                   (setf (aref a i) (coerce (ppnd16 (/ (- i 0.375) an25)) 'double-float))
                   (setf summ2 (+ summ2 (* (aref a i) (aref a i)))))
                (setf summ2 (* summ2 2.0))
                (setf ssumm2 (sqrt summ2))
                (setf rsn (/ 1.0 (sqrt an)))
                (setf a1 (- (swpoly c1 rsn) (/ (aref a 1) ssumm2)))
                ; normalize a
                (cond ((> n 5)
                         (setf i1 3)
                         (setf a2 (- (swpoly c2 rsn) (/ (aref a 2) ssumm2) ))
                         (setf fac (sqrt (/ (+ summ2
                                               (* (aref a 1) (aref a 1) -2.0)
                                               (* (aref a 2) (aref a 2) -2.0))
                                            (+ 1.0 (* a1 a1 -2.0) (* a2 a2 -2.0)))))
                         (setf (aref a 2) a2)  )
                      (t (setf i1 2)
                         (setf fac (sqrt (/ (- summ2 (* 2.0 (aref a 1) (aref a 1)))
                                            (- 1.0 (* 2.0 a1 a1)))))     ))
                (setf (aref a 1) a1)
                (do ((i i1 (1+ i)))
                     ((> i nn2) 'done)
                   (setf (aref a i) (/ (aref a i) (- fac))))     ))

      ; check for correct sort order on range - scaled X
      (setf xx (/ (first x) range))
      (setf sx xx)
      (setf sa (- (aref a 1)))
      (do ((j (1- n) (1- j))
           (i 1 i))
          ((>= i n1) 'done)
        (setf xi (/ (nth i x) range))
        (setf sx (+ sx xi))
        (setf i (1+ i))
        (if (/= i j)
            (setf sa (+ sa (* (signum (- i j)) (aref a (min i j))))))
        (setf xx xi) )

      ; Calculate W statistic as squared correlation
      ; between data and coefficients
      (setf sa (/ sa n1))
      (setf sx (/ sx n1))
      (setf ssa 0.0
            ssx 0.0
            sax 0.0)
      (do ((j (1- n) (1- j))
           (i 0 (1+ i)))
        ((>= i n1) 'done)
        (if (/= i j)
            (setf asa (- (* (signum (- i j)) (aref a (+ 1 (min i j)))) sa))
            (setf asa (- sa)))
        (setf xsx (- (/ (nth i x) range) sx))
        (setf ssa (+ ssa (* asa asa)))
        (setf ssx (+ ssx (* xsx xsx)))
        (setf sax (+ sax (* asa xsx)))  )

      ; w1 equals (1-w) calculated to avoid excessive rounding error
      ; for W very near 1 (a potential problem in very large samples)
      (setf ssassx (sqrt (* ssa ssx)))
      (setf w1 (/ (* (- ssassx sax) (+ ssassx sax))
                  (* ssa ssx)))
      (setf w (- 1 w1))

      ; calculate significance level for w
      (tagbody
         (when (= n 3)
            (setf pw (* pi6 (- ($asin (sqrt w)) stqr)))
            (go fin))
         (setf y  (log w1)
               xx (log an))
         (cond ((<= n 11)
                  (setf gamma (swpoly g an))
                  (when (>= y gamma)
                     (setf pw small)
                     (go fin))
                  (setf y (- (log (- gamma y)))
                        m (swpoly c3 an)
                        s (exp (swpoly c4 an))))
               (t ; n >= 12
                  (setf m (swpoly c5 xx)
                        s (exp (swpoly c6 xx)))))

          ; censoring by proportion ncens/n
          ; calculate mean and sd of normal equivalent deviate of w
          (when (> ncens 0)
             (setf ld (- (log delta))
                   bf (+ 1.0 (* xx bf1)))
             (setf z90f (+ z90 (* bf (expt (swpoly c7 (expt xx90 xx)) ld))))
             (setf z95f (+ z95 (* bf (expt (swpoly c8 (expt xx95 xx)) ld))))
             (setf z99f (+ z99 (* bf (expt (swpoly c9 xx) ld))))
             ; regress z90F,...,z99F on normal deviates z90,...,z99 to get
             ; pseudo-mean and pseudo-sd of z as the slope and intercept
             (setf zfm (/ (+ z90f z95f z99f) 3.0))
             (setf zsd (/ (+ (* z90 (- z90f zfm))
                             (* z95 (- z95f zfm))
                             (* z99 (- z99f zfm)))
                          zss))
             (setf zbar (- zfm (* zsd zm)))
             (setf m (+ m (* zbar s)))
             (setf s (* s zsd)) )

          (setf pw (- 0.5 (* 0.5 (mfuncall '%erf (/ (- y m)
                                                    (* 1.41421356 s))))))
        fin)

      ; the result is an 'inference_result' Maxima object
      ($inference_result
         '|&shapiro - wilk test|
         (list '(mlist) (list '(mlist) '$statistic w)
                        (list '(mlist) '$p_value pw))
         (list '(mlist) 1 2) )  ))