R_Statistical Programming
#########Statistical Package (Application of R_Statistical Programming)
#Q1 This is a chisquare test by going via the prop.test(true solution & robust)
#The table below comes from a study that investigated the number of
#people who prefer a particular brand of hair conditioner by age group
Lessthan40years<-c(23,34)
Over40years<-c(35,132)
People1<-data.frame( Lessthan40years, Over40years )
rownames(People1)<-c("BrandA","BrandB" )
People1
#The standard hypothesis test is H0 against the alternative (two-sided) H1.
#The function prop.test is used to being called as prop.test(x,n) where x is the number favorable and
#n is the total. Here it is no different, but since there are two x's it looks slightly different as
Lessthan40years=prop.test(c(23,35),c(23+35,34+132))
Over40years=prop.test(c(34,132),c(23+35,34+132))
chisq.test(People1)
#conclusion, Brand test is dependent on age, since the pvalue=0.006711 < 0.05
##################################################################################
#################################################################################
#This is a chisquare test without going through the prop.test(not robust)
#Would you say that brand preference is independet of age?
#Justify your answer(including codes where necessary)
People2=matrix(c(23,34,35,132),byrow=TRUE,ncol=2)
colnames(People2)=c("Lessthan40years","Over40years")
rownames(People2)=c("BrandA","BrandB")
chisq.test(People2)
#conclusion, Brand test is dependent on age, since the pvalue=0.006711 < 0.05
#############################################################################
########################################################################
#Q2 Find the following using R
#a)The Chisq values for (i)df=9,alpha=0.01,p=0.9998
#ii)df=22,alpha=0.1,p=0.9999
#b)The values for t: (i)df=4,alpha=0.01,p=0.5214
#ii)df=19,alpha=0.05,p=0.6891
pchisq(0.99,df=9) #compare the value with p=0.9998 & draw conclusion
pchisq(0.9,df=22) #compare the value with p=0.9999 & draw conclusion
pt(0.99,df=4) #compare the value with p=0.5214 & draw conclusion
pt(0.95,df=19) #compare the value with p=0.689 & draw conclusion
#############################################################################
########################################################################
#Q3 The annual lowest birth weight of babies in the maternity unit of a
#particular hospital from 1991-2015 is as follows:
BabiesWeight=c(1.92,2.27,2.62,2.11,2.98,2.33,3.18,3.18,1.99,2.55,2.90,1.52,1.16,2.80,1.52,1.16,2.80,1.54,3.03,3.52,2.16,4.73,2.33,2.16,2.39)
Period=factor(c("1991"=1,"1992"=2,"1993"=3,"1994"=4,"1995"=5,"1996"=6,"1997"=7,"1998"=8,"1999"=9,"2000"=10,"2001"=11,"2002"=12,"2003"=13,"2004"=14,"2005"=15,"2006"=16,"2007"=17,"2008"=18,"2009"=19,"2010"=20,"2011"=21,"2012"=22,"2013"=23,"2014"=24,"2015"=25))
#create a data frame consisting of the 2variables
Mydata=(data.frame(Period,BabiesWeight))
#plot histogram as well as scatter plot
hist(BabiesWeight,col=2,probability=T)
xbar=mean(BabiesWeight)
S=sd(BabiesWeight)
curve(dnorm(x,xbar,S),col=2,add=T)
plot(BabiesWeight~Period,data=Mydata)
#generate summary statistics for the data
summary(Mydata)
#######################################################
#3b)outline the steps for doing the following in SPSS
#(i)Enter the data into SPSS and define the variable properties
#(ii)plot a histogram as well as scatter plot of the data
#(iii)Generate summary statistics for the data
#Step1: Open SPSS
#Step2: Define the variables under var view with the appropriate method
#step3: Graph<ChartBuilder<histogram<target Var=BabiesWeight<ok
#step4: Graph<ChartBuilder<scatterplot<target Var=BabiesWeight(Y-axis),Period(X-axis)<ok