# This function fits two or more different evolutionary rates to different parts of a phylogenetic tree # Takes a tree with a binary or multistate character "painted" on the branches in SIMMAP format # (see read.simmap()) and data for a single continuously valued trait # Based on "Brownie" by O'Meara et al. 2006 # Written by Liam Revell 2010 brownie.lite<-function(tree,x,maxit=2000,test="chisq",nsim=100){ # require dependencies require(ape) # bookkeeping x<-as.matrix(x) n<-nrow(x) # number of species p<-ncol(tree$mapped.edge) # number of states one<-matrix(1,n,1) # first compute C for the whole tree C1<-vcv.phylo(tree) C1<-C1[rownames(x),rownames(x)] # find the ancestral state vector a<-as.numeric(colSums(solve(C1))%*%x/sum(solve(C1))) # compute the MLE of sig1 sig1<-as.numeric(t(x-one%*%a)%*%solve(C1)%*%(x-one%*%a)/n) # compute the log-likelihood likelihood.single<-function(theta,y,C){ n<-length(y) one<-matrix(1,n,1) sig<-theta[1] a<-theta[2] logL<-as.numeric(-t(x-one%*%t(a))%*%solve(sig*C)%*%(x-one%*%t(a))/2-n*log(2*pi)/2-determinant(sig*C)$modulus[1]/2) return(-logL) } # optimize single rate model (we can feed it our ML estimates here) model1<-optim(c(sig1,a),fn=likelihood.single,y=x,C=C1,control=list(maxit=maxit),hessian=TRUE,method="L-BFGS-B",lower=c(0,-Inf)) # convert for convenience of output sig1<-model1$par[1] logL1<--model1$value vcv1<-solve(model1$hessian); rownames(vcv1)<-c("sig","a"); colnames(vcv1)<-rownames(vcv1) a1<-model1$par[2] # compute separate C for each state multi.tre<-list(); class(multi.tre)<-"multiPhylo" C2<-array(dim=c(nrow(C1),ncol(C1),ncol(tree$mapped.edge))) for(i in 1:ncol(tree$mapped.edge)){ multi.tre[[i]]<-tree multi.tre[[i]]$edge.length<-tree$mapped.edge[,i] multi.tre[[i]]$state<-colnames(tree$mapped.edge)[i] temp<-vcv.phylo(multi.tre[[i]]) C2[,,i]<-temp[rownames(x),rownames(x)] } # compute the log-likelihood likelihood.multiple<-function(theta,y,C){ n<-length(y); p<-dim(C)[3] one<-matrix(1,n,1) sig<-vector(mode="numeric") for(i in 1:p) sig[i]<-theta[i] a<-theta[p+1] V<-matrix(0,length(y),length(y)) for(i in 1:p) V<-V+sig[i]*C[,,i] logL<--t(y-one%*%a)%*%solve(V)%*%(y-one%*%a)/2-n*log(2*pi)/2-determinant(V)$modulus[1]/2 return(-logL) } # compute the starting parameter values starting<-vector(mode="numeric") lower<-vector(mode="numeric") for(i in 1:p){ starting[i]<-sig1 lower[i]<-0.001*sig1 # this sets the lower limit on the sigmas during optimization to be 0.1% of sigma in the single rate model } starting[p+1]<-a; lower[p+1]<--Inf # optimize using built-in optimizer model2<-optim(starting,fn=likelihood.multiple,y=x,C=C2,control=list(maxit=maxit),hessian=TRUE,method="L-BFGS-B",lower=lower) # log-likelihood for the multi-matrix model logL2<--model2$value while(logL22*(sim$logL.multiple-sim$logL1))/nsim } return(list(sig2.single=sig1,a.single=a1,var.single=vcv1[1,1],logL1=logL1,k1=2,sig2.multiple=sig.i,a.multiple=a2,vcv.multiple=vcv2[1:p,1:p],logL.multiple=logL2,k2=p+1,P.sim=Psim,convergence=converged)) } else { print(paste("test =",test,"is not a valid option - usingn chi-sq test")) return(list(sig2.single=sig1,a.single=a1,var.single=vcv1[1,1],logL1=logL1,k1=2,sig2.multiple=sig.i,a.multiple=a2,vcv.multiple=vcv2[1:p,1:p],logL.multiple=logL2,k2=p+1,P.chisq=pchisq(2*(logL2-as.numeric(logL1)),p-1,lower.tail=FALSE),convergence=converged)) } }