Patrick Cahan
require(devtools)
library(slidify)
library(ggplot2)
tdat<-read.csv("../../misc/timesData.csv", header=1)
cclasses<-rep("numeric", ncol(tdat))
cclasses[c(1, 2,3, 10,12, 13)]<-"character"
tdat<-read.csv("../../misc/timesData.csv", header=1, colClass=cclasses, na.strings="-")
# na.strings defines what should be treated as NA
wor<-tdat$world_rank
table(wor[grep("-", wor)])[1:2]
##
## 201-225 201-250
## 103 53
newWOR<-rep(0, length(wor));
badWOR<-names(table(wor[grep("-", wor)]))
indexBad<-vector();
for(bbad in badWOR){
xi<-which(wor==bbad);
indexBad<-append(indexBad, xi);
valPair<-as.numeric(strsplit(bbad, "-")[[1]]);
newChar<-mean(valPair);
newWOR[xi]<-newChar;
}
goodInd<-setdiff(1:length(wor), indexBad);
newWOR[goodInd]<-as.numeric(wor[goodInd]);
tdat$world_rank<-newWOR
plot1 <- ggplot(tdat, aes(x=teaching)) + geom_histogram() + facet_grid( .~ year) + theme_bw()
plot1 + xlab("Teaching score")
plot2<-ggplot(tdat, aes(x=income, y=teaching)) + geom_point(colour="blue",alpha=0.15) +
geom_smooth(method=lm, colour='red') +
facet_grid( .~ year) +
theme_bw()
plot2 + ylab("Teaching score")
summary(lm(teaching~income, data=subset(tdat, year==2011)))
##
## Call:
## lm(formula = teaching ~ income, data = subset(tdat, year == 2011))
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.066 -9.665 -2.810 6.263 50.445
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 40.71592 2.81008 14.489 < 2e-16 ***
## income 0.24751 0.04954 4.996 1.73e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.7 on 139 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.1522, Adjusted R-squared: 0.1461
## F-statistic: 24.96 on 1 and 139 DF, p-value: 1.728e-06
plot4<-ggplot(tdat, aes(x=income, y=total_score)) + geom_point(colour="blue",alpha=0.15) +
geom_smooth(method=lm, colour='red') +
facet_grid( .~ year) +
theme_bw()
plot4 + ylab("Total score")
plot5<-ggplot(tdat, aes(x=research, y=world_rank)) + geom_point(colour="blue",alpha=0.15) +
geom_smooth(method=lm, colour='red') +
facet_grid( .~ year) +
theme_bw()
plot5 + ylab("World rank")
thresh<-200
plot5v2<-ggplot(tdat[tdat$world_rank<thresh,], aes(x=research, y=world_rank)) + geom_point(colour="blue",alpha=0.15) +
geom_smooth(method=lm, colour='red') +
facet_grid( .~ year) +
theme_bw()
plot5v2 + ylab("World rank")
plot6<-ggplot(tdat, aes(x=female_male_ratio, y=total_score)) +
geom_boxplot() +
facet_grid( .~ year) +
theme_bw()
plot6 + xlab("F-M ratio")