Next: About this document ...
Up: Data Analysis Supplement
Previous: Bibliography
##############
# The basics #
#########
# cohen #
#########
# The original data.
# Column names day, act, pr1, pr2, se, mb, bso, bcnu.
cohen_read.table("cohen.dat",header=T,na.strings="*")
############
# find.act #
# find.pro #
############
# Finds the activity and protein vectors for a given treatment combination.
# Default at day 1, treatments 0.
# For all protein missing, get two NA. For all activity missing, get three NA.
# Incorrect input will give NA, not NULL.
# Uses cohen.
find.act_function(day=1,se=0,mb=0,bso=0,bcnu=0)
{ if(length(cohen$act[cohen$day==day&cohen$se==se&cohen$mb==mb&cohen$bso==bso&cohen$bcnu==bcnu])!=0) cohen$act[cohen$day==day&cohen$se==se&cohen$mb==mb&cohen$bso==bso&cohen$bcnu==bcnu]
else if(c(NA,NA,NA))
}
find.pro_function(day=1,se=0,mb=0,bso=0,bcnu=0){
if(length(c(cohen$pr1[cohen$day==day&cohen$se==se&cohen$mb==mb&cohen$bso==bso&cohen$bcnu==bcnu][1],cohen$pr2[cohen$day==day&cohen$se==se&cohen$mb==mb&cohen$bso==bso&cohen$bcnu==bcnu][1]))!=0) c(cohen$pr1[cohen$day==day&cohen$se==se&cohen$mb==mb&cohen$bso==bso&cohen$bcnu==bcnu][1],cohen$pr2[cohen$day==day&cohen$se==se&cohen$mb==mb&cohen$bso==bso&cohen$bcnu==bcnu][1])
else c(NA,NA)
}
################
# resample.e89 #
################
# Resamples from original activity and original protein for each day and
# treatment combination.
# Uses find.act, find.pro.
day_c(seq(1,13))
bso_c(0,1)
bcnu_c(0,1)
mb_c(0,1)
se_c(0,1)
resample.e89_function(day=c(seq(1,13)),bso=c(0,1),bcnu=c(0,1),mb=c(0,1),se=c(0,1)){
# Initialize.
rat.mat_NULL
# Loops for each treatment combination.
for(i in day){
for(j in bso){
for(k in bcnu){
for(l in mb){
for(m in se){
# Finds the original activity and protein using find.act and find.pro
act.orig_find.act(i,m,l,j,k)
pro.orig_find.pro(i,m,l,j,k)
# Sets in the indices for activity and protein.
# If there are NAs, assigns length of 1.
n.act_ifelse(sum(!is.na(act.orig)>0),sum(!is.na(act.orig)),1)
n.pro_ifelse(sum(!is.na(pro.orig)>0),sum(!is.na(pro.orig)),1)
# Resamples from the indices for activity and protein.
ind.act_sample(n.act,n.act*n.pro,replace=T)
ind.pro_sample(n.pro,n.act*n.pro,replace=T)
# Finds the vector of ratios of each resampled activity to resampled protein.
temp.rat_act.orig[ind.act]/pro.orig[ind.pro]
# Puts the temporary vectors into a matrix with treatment combinations.
# NAs still present.
temp.rat.mat_matrix(cbind(temp.rat,matrix(c(i,j,k,l,m),ncol=5,nrow=n.act*n.pro,byrow=T)),nrow=n.act*n.pro,ncol=6)
rat.mat_rbind(rat.mat,temp.rat.mat)
rbind(rat.mat,temp.rat.mat)
}}}}}
# Creates data frame for use in regression.
e89.sam_na.omit(rat.mat)
data.frame(rat=e89.sam[,1],day=e89.sam[,2],bso=e89.sam[,3],bcnu=e89.sam[,4],mb=e89.sam[,5],se=e89.sam[,6]) }
############################
# Between-Days Variability #
#
data.oneday_function(data,day=1)
{data[data$day==day,]}
# Determines average response from resampling for each experimental condition.
avg.resp_function() {
row.temp_NULL
data_resample.e89()
for (d in 1:13) {
for (i in 0:1){
for (j in 0:1){
for (k in 0:1){
for (l in 0:1){
row.temp_c(row.temp,mean(data[data$day==d&data$se==i&data$mb==j&data$bcnu==k&data$bso==l,1]),d,l,k,j,i,)
}}}}}
mat.avg.temp_t(array(row.temp,dim=c(6,13*16)))
mat.avg_mat.avg.temp[!is.na(mat.avg.temp[,1]),]
mat.avg_data.frame(resp=mat.avg[,1],day=mat.avg[,2],bso=mat.avg[,3],bcnu=mat.avg[,4],mb=mat.avg[,5],se=mat.avg[,6])
return(mat.avg)
}
# For a given set of experimental conditions, determines the mean response.
ef_function(data,day,se=0,mb=0,bso=0,bcnu=0){
mean(data$resp[data$day==day&data$se==se&data$mb==mb&data$bso==bso&data$bcnu==bcnu])}
# Determines the effects of each compound on the rate of synthesis.
eff.vec_NULL
for(b in 1:100){
mat.avg_avg.resp()
for(d in 1:13){
mb.eff_ef(mat.avg,day=d,se=1,mb=1)-ef(mat.avg,day=d,se=1)
bso.eff_ef(mat.avg,day=d,se=1,bso=1)-ef(mat.avg,day=d,se=1)
bcnu.eff_ef(mat.avg,day=d,se=1,bcnu=1)-ef(mat.avg,day=d,se=1)
eff.vec_c(eff.vec,d,mb.eff,bso.eff,bcnu.eff)}
}
all.eff_(array(eff.vec,dim=c(4,13,100)))
# Boxplots to illustrate between-days variability.
boxplot(all.eff[2,1,],all.eff[2,2,],all.eff[2,3,],all.eff[2,4,],all.eff[2,7,],all.eff[2,8,],all.eff[2,9,],all.eff[2,10,],all.eff[2,11,])
title(main="Inhibiting Effect of MB by day",xlab="Days",ylab="Specific Activity")
for(i in 1:9){
text(locator(1),paste("Day",c(1,2,3,4,7,8,9,10,11)[i]))
}
boxplot(all.eff[3,2,],all.eff[3,3,],all.eff[3,4,],all.eff[3,5,],all.eff[3,6,],all.eff[3,7,],all.eff[3,8,],all.eff[3,9,],all.eff[3,11,],all.eff[3,12,],all.eff[3,13,])
title(main="Inhibiting Effect of BSO by day",xlab="Days",ylab="Specific Activity")
for(i in 1:11){
text(locator(1),paste("Day",c(2,3,4,5,6,7,8,9,11,12,13)[i]))
}
##########################
# Box-Cox transformation #
###########
# max.box #
###########
# Examine for optimal box-cox transformation.
# Uses library MASS for boxcox function.
# Returns a vector of optimal transformations for each resampling.
# Transformation chosen from this vector.
reg.simple_lm(rat~factor(day)+(factor(se)+factor(mb)+factor(bso)+factor(bcnu))^3,singular.ok=T,resample.e89())
library(MASS)
max.box_NULL
for(i in 1:10){
box.temp_boxcox(reg.simple,lambda=seq(0.1,0.3,length=20),plotit=F)
max.temp_box.temp$x[box.temp$y==max(box.temp$y)]
max.box_c(max.box,max.temp)}
# Sample box.cox for E-89 cell line: [1] 0.1631579 0.1526316 0.1526316
# 0.1210526 0.1526316 0.1947368 0.1842105 0.1315789 0.1842105 0.1947368
# Chose 0.2 for E-89 (fifth root) and 0.4 for both the CHO and PC-3 cell lines
# Plot of likelihood for single regression.
boxcox(reg.simple,lambda=seq(0.1,0.3,length=20),plotit=T)
title(main="Log likelihood of data with power transformation")
# Comparison of residuals before and after transformation.
reg.simple.trans_lm(rat^0.2~factor(day)+(factor(se)+factor(mb)+factor(bso)+factor(bcnu))^3,singular.ok=T,resample.e89())
par(mfrow=c(2,1))
plot(reg.simple$res,reg.simple$fitted,main="Residuals Before Transformation",xlab="fitted values",ylab="residuals")
plot(reg.simple.trans$res,reg.simple.trans$fitted,main="Residuals After Transformation",xlab="fitted values",ylab="residuals")
###############
# Regression #
######################
# Initial regression #
######################
reg.initial_lm(rat^0.20~factor(day)+factor(se)+factor(mb)+factor(bso)+factor(bcnu)+factor(mb)*factor(se)+factor(bso)*factor(se)+factor(bcnu)*factor(se)+factor(mb)*factor(bso)*factor(se)+factor(bso)*factor(bcnu)*factor(se),singular.ok=T,resample.e89())
###########################
# Bootstrapped regression #
###########################
coef_NULL
B_100
for(j in 1:B){
temp.coef_lm(rat^0.20~factor(day)+factor(se)+factor(mb)+factor(bso)+factor(bcnu)+factor(mb)*factor(se)+factor(bso)*factor(se)+factor(bcnu)*factor(se)+factor(mb)*factor(bso)*factor(se)+factor(bso)*factor(bcnu)*factor(se),singular.ok=T,resample.e89())$coef
coef_rbind(coef,temp.coef)}
coef_data.frame(int=coef[,1],day1=coef[,2],day2=coef[,3],day3=coef[,4],day4=coef[,5],day5=coef[,6],day6=coef[,7],day7=coef[,8],day8=coef[,9],day9=coef[,10],day10=coef[,11],day11=coef[,12],day12=coef[,13],se=coef[,14],mb=coef[,15],bso=coef[,16],bcnu=coef[,17],se.mb=coef[,18],se.bso=coef[,19],se.bcnu=coef[,20],mb.bso=coef[,21],bso.bcnu=coef[,22],se.mb.bso=coef[,23],se.bso.bcnu=coef[,24])
#############
# plot.hist #
#############
plot.hist_function(eff,alpha=0.05) {
hist(eff)
qu_quantile(sort(eff),c(alpha/2,1-alpha/2))
# qu.new_quantile.new(eff,alpha)
abline(v=qu[1:2],lty=4)
abline(v=mean(eff))
# abline(v=qu.new)
text(locator(1),paste(round(qu[1],digits=4)))
text(locator(1),paste(round(mean(eff),digits=4)))
text(locator(1),paste(round(qu[2],digits=4)))
}
par(mfrow=c(2,3))
main.effects_rbind(plot.hist(coef$se),plot.hist(coef$mb),plot.hist(coef$bso),plot.hist(coef$bcnu),plot.hist(coef$mb.bso),plot.hist(coef$bso.bcnu))
par(mfrow=c(2,2))
plot.hist(coef$se.mb)
text(locator(1),"Se*MB")
plot.hist(coef$se.bso)
text(locator(1),"Se*BSO")
plot.hist(coef$se.mb.bso)
text(locator(1),"Se*MB*BSO")
for(i in 1:3){
text(locator(1),paste(c("Se*MB","Se*BSO","Se*MB*BSO")[i]))}
Next: About this document ...
Up: Data Analysis Supplement
Previous: Bibliography
regina@stat.stanford.edu