"""
Rpy glm example: Shows how to use glm (with family=binomial)
to predict the probability of a binary (yes or no) outcome.
I use it to produce probability forecasts from a weather
prediction model - this a simple toy example that illustrates
the process.
Jeff Whitaker
<Jeffrey.S.Whitaker@noaa.gov>
20030801
"""
import sys
from Numeric import *
from RandomArray import *
from MLab import *
import sys
sys.path.insert(1, "..")
from rpy import *
# specify correlation between forecast and observations.
fcstcorr = 0.5
# specify link function for glm (logit, probit or cloglog)
link = 'logit'
# specify exceedance threshold for probability forecast
thresh = 0.
# specify number of realizations.
nfcsts = 10000
# compute climatological probabaility of exceeding threshold.
climprob = r.pnorm(thresh,0.,1.)*ones(nfcsts,'f')
print 'fcstcorr = ',fcstcorr
print 'link = ',link
print 'thresh = ',thresh
print 'climo prob = ',max(climprob)
print 'nfcsts = ',nfcsts
# generate realizations from multivariate normal distribution
#
# obsvar fcstcov
# with cov matrix =
# fcstcov fcstvar
#
# obsvar is fixed to 1, fcstcov = fcstcorr*sqrt(fcstvar).
# so only free parameter is fcstcorr.
fcstvar = fcstcorr**2
fcstcov = fcstcorr*sqrt(fcstvar)
# repeat the process 1000 times.
for nrun in range(1000):
print 'run ',nrun
x = multivariate_normal([0.,0.], [[fcstcov,fcstvar],[1.,fcstcov]], nfcsts)
# Now have two time series that are correlated at r=fcstcorr.
# Call one the 'observations' and one the 'forecast'
obs = x[:,0]
fcst = x[:,1]
# train the glm (fit the model)
binaryobs = obs > thresh
d = r.data_frame(x=fcst.tolist(),y=binaryobs.tolist())
formula = r("y ~ x")
family = r("family=binomial(link="+link+")")
model = with_mode(NO_CONVERSION,r.glm)(formula,family=family,data=d)
print 'model coefficients = ',r.coef(model)
# independent data for verification
x = multivariate_normal([0.,0.], [[fcstcov,fcstvar],[1.,fcstcov]], nfcsts)
obs = x[:,0]
fcst = x[:,1]
# predict the probability that fcst > thresh, using
# the glm on independent data.
d = r.data_frame(x=fcst.tolist())
fcstprob = array(r.predict(model, newdata=d, type="response"))
# calculate brier skill score of the forecasts.
# bss = 0: forecasts are no better than climatology
# bss = 1: forecasts are perfect.
verif = obs > thresh
bs = mean((fcstprob - verif)**2)
bsclim = mean((climprob - verif)**2)
bss = 1.-(bs/bsclim)
print "brier skill score = ",bss
# try to delete the objects to free memory (doesn't work)
del model
del d
syntax highlighted by Code2HTML, v. 0.9.1