# This function fits two or more different evolutionary rates to different parts of a phylogenetic tree
# Takes a tree with a binary or multistate character "painted" on the branches in SIMMAP format 
# (see read.simmap()) and data for a single continuously valued trait
# Based on "Brownie" by O'Meara et al. 2006
# Written by Liam Revell 2010
brownie.lite<-function(scm.tre,dat,maxit=2000){

	# require dependencies
	require(ape)

	# bookkeeping
	x<-as.matrix(dat)
	n<-nrow(x) # number of species
	p<-ncol(scm.tre$mapped.edge) # number of states
	one<-matrix(1,n,1)
	
	# first compute C for the whole scm.tre
	C<-vcv.phylo(scm.tre)
	C<-C[rownames(x),rownames(x)]

	# find the ancestral state vector
	a<-as.numeric(colSums(solve(C))%*%x/sum(solve(C)))

	# compute the MLE of sig1
	sig1<-as.numeric(t(x-one%*%a)%*%solve(C)%*%(x-one%*%a)/n)

	# compute the log-likelihood
	likelihood.single<-function(theta,y,C){
		n<-length(y)
		one<-matrix(1,n,1)		
		sig<-theta[1]
		a<-theta[2]
		logL<-as.numeric(-t(x-one%*%t(a))%*%solve(sig*C)%*%(x-one%*%t(a))/2-n*log(2*pi)/2-determinant(sig*C)$modulus[1]/2)
		return(-logL)
	}

	# optimize single rate model (we can feed it out ML estimates here)
	model1<-optim(c(sig1,a),fn=likelihood.single,y=x,C=C,control=list(maxit=maxit),hessian=TRUE)

	# convert for convenience of output
	sig1<-model1$par[1]
	logL1<--model1$value
	vcv1<-solve(model1$hessian); rownames(vcv1)<-c("sig","a"); colnames(vcv1)<-rownames(vcv1)

	# compute separate C for each state
	multi.tre<-list(); class(multi.tre)<-"multiPhylo"
	C<-array(dim=c(nrow(C),ncol(C),ncol(scm.tre$mapped.edge)))
	for(i in 1:ncol(scm.tre$mapped.edge)){
		multi.tre[[i]]<-scm.tre
		multi.tre[[i]]$edge.length<-scm.tre$mapped.edge[,i]
		multi.tre[[i]]$state<-colnames(scm.tre$mapped.edge)[i]
		temp<-vcv.phylo(multi.tre[[i]])
		C[,,i]<-temp[rownames(x),rownames(x)]
	}

	# compute the log-likelihood
	likelihood.multiple<-function(theta,y,C){
		n<-length(y); p<-dim(C)[3]
		one<-matrix(1,n,1)
		sig<-vector(mode="numeric")
		for(i in 1:p) sig[i]<-theta[i]
		a<-theta[p+1]
		V<-matrix(0,length(y),length(y))
		for(i in 1:p) V<-V+sig[i]*C[,,i]
		logL<--t(y-one%*%a)%*%solve(V)%*%(y-one%*%a)/2-n*log(2*pi)/2-determinant(V)$modulus[1]/2
		return(-logL)
	}

	# compute the starting parameter values
	starting<-vector(mode="numeric")
	lower<-vector(mode="numeric")
	for(i in 1:p){ 
		starting[i]<-sig1
		lower[i]<-0.0
	}
	starting[p+1]<-a; lower[p+1]<--Inf
	# optimize using generic optimizer
	model2=optim(starting,fn=likelihood.multiple,y=x,C=C,control=list(maxit=maxit),hessian=TRUE)

	# convert for convenience of output
	sig.i<-vector(mode="numeric"); states<-vector(mode="character")
	for(i in 1:p){
		sig.i[i]<-model2$par[i]
		states[i]<-multi.tre[[i]]$state
	}
	names(sig.i)<-states

	# log-likelihood for the multi-matrix model
	logL2<--model2$value

	vcv2<-solve(model2$hessian); rownames(vcv2)<-c(states,"a"); colnames(vcv2)<-rownames(vcv2)

	# report convergence
	if(model2$convergence==0){
		converged<-"Optimization has converged."
	} else {
		converged<-"Optimization may not have converged.  Consider increasing maxit."
	}

	# return results
	return(list(sig2.single=sig1,var.single=vcv1[1,1],logL1=logL1,k1=2,sig2.multiple=sig.i,vcv.multiple=vcv2[1:p,1:p],logL.multiple=logL2,k2=p+1,P.chisq=pchisq(2*(logL2-as.numeric(logL1)),p-1,lower.tail=FALSE),convergence=converged))

}