# Contributed by Tim Churches, with a minor hack for detecting some
# graphical device.

from rpy import *

# hack
d = r.capabilities()
if d['png']:
    device = r.png
    ext='png'
elif d['jpeg']:
    device = r.jpeg
    ext='jpg'
else:
    device = r.postscript
    ext='ps'


faithful_data = {"eruption_duration":[],
                 "waiting_time":[]}
		 
f = open('faithful.dat','r')

for row in f.readlines()[1:]: # skip the column header line
    splitrow = row[:-1].split(" ")
    faithful_data["eruption_duration"].append(float(splitrow[0]))
    faithful_data["waiting_time"].append(int(splitrow[1]))

f.close()

ed = faithful_data["eruption_duration"] 
edsummary = r.summary(ed)
print "Summary of Old Faithful eruption duration data"
for k in edsummary.keys():
    print k + ": %.3f" % edsummary[k]
print    
print "Stem-and-leaf plot of Old Faithful eruption duration data"
print r.stem(ed)

file = 'faithful_histogram.'+ext
device(file,width=733,height=550)
r.hist(ed,r.seq(1.6, 5.2, 0.2), prob=1,col="lightgreen",main="Old Faithful eruptions",xlab="Eruption duration (seconds)")
r.lines(r.density(ed,bw=0.1),col="orange")
r.rug(ed)
r.dev_off()

long_ed = filter(lambda x: x > 3, ed)
file = 'faithful_ecdf.'+ext
device(file,width=733,height=550)
r.library('stepfun')
r.plot(r.ecdf(long_ed), do_points=0, verticals=1, main="Empirical cumulative distribution function of Old Faithful eruptions longer than 3 seconds")
x = r.seq(3,5.4,0.01)
r.lines(r.seq(3,5.4,0.01),r.pnorm(r.seq(3,5.4,0.01),mean=r.mean(long_ed), sd=r.sqrt(r.var(long_ed))), lty=3, lwd=2, col="red")
r.dev_off()

file = 'faithful_qq.'+ext
device(file,width=733,height=550)
r.par(pty="s")
r.qqnorm(long_ed, col="blue")
r.qqline(long_ed, col="red")
r.dev_off()

r.library('ctest')
print
print "Shapiro-Wilks normality test of Old Faithful eruptions longer than 3 seconds"
sw = r.shapiro_test(long_ed)
print "W = %.4f" % sw['statistic']['W']
print "p-value = %.5f" % sw['p.value']

print
print "One-sample Kolmogorov-Smirnov test of Old Faithful eruptions longer than 3 seconds"
ks = r.ks_test(long_ed,"pnorm", mean=r.mean(long_ed), sd=r.sqrt(r.var(long_ed)))
print "D = %.4f" % ks['statistic']['D']
print "p-value = %.4f" % ks['p.value']
print "Alternative hypothesis: %s" % ks['alternative']
print



syntax highlighted by Code2HTML, v. 0.9.1