# this function uses a Bayesian MCMC approach to estimate heterogeneity in the evolutionary rate for a
# continuous character
# written by Liam J. Revell 2010

evol.rate.mcmc<-function(tree,x,ngen=10000,control=list()){

	# first, try and obtain reasonable estimates for control parameters
	C<-vcv.phylo(tree); C<-C[names(x),names(x)]; n<-nrow(C); one<-matrix(1,n,1)
	a<-colSums(solve(C))%*%x/sum(solve(C)) # MLE ancestral value, used to start MCMC
	sig1<-as.numeric(t(x-one%*%a)%*%solve(C)%*%(x-one%*%a)/n) # MLE sigma-squared, used to start MCMC
	sig2<-sig1 # used to start MCMC
	
	# control list
	con=list(sd1=0.2*sig1,sd2=0.2*sig2,sda=0.2*abs(a),kloc=0.2*mean(diag(C)),sdlnr=1,rand.shift=0.05,print=100)
	# also might use: sig1mu=1000,sig2mu=1000
	con[(namc <- names(control))] <- control

	# require dependenciess
	require(ape)

	# all internal functions
	
	# function to return the index of a random edge
	random.node<-function(phy){
		# sum edges cumulatively
		cum.edge<-vector(mode="numeric"); index<-vector(mode="numeric")
		for(i in 1:length(phy$edge.length)){
			if(i==1) cum.edge[i]<-phy$edge.length[1]
			else cum.edge[i]<-cum.edge[i-1]+phy$edge.length[i]
			index[i]<-phy$edge[i,2]
		}
		# pick random position
		pos<-runif(1)*cum.edge[length(phy$edge.length)]
		edge<-1
		while(pos>cum.edge[edge]) edge<-edge+1
		return (index[edge])
	}

	# return the indices of a vector that match a scalar
	match.all<-function(s,v){
		result<-vector(mode="numeric")
		j=1
		for(i in 1:length(v)){
			if(s==v[i]){
				result[j]=i
				j=j+1
			}
		}
		if(j==1) result<-NA
		return(result)
	}

	# function to return a matrix of the descendant tips from each internal & terminal node
	compute.descendant.species<-function(phy){
		D<-dist.nodes(phy)
		ntips<-length(phy$tip.label)
		Cii<-D[ntips+1,]
		C<-D; C[,]<-0
		counts<-vector()
		for(i in 1:nrow(D)) for(j in 1:ncol(D)) C[i,j]<-(Cii[i]+Cii[j]-D[i,j])/2
		tol<-1e-10; descendants<-matrix(0,nrow(D),ntips,dimnames=list(rownames(D)))
		for(i in 1:nrow(C)){
			k<-0
			for(j in 1:ntips){
				if(C[i,j]>=(C[i,i]-tol)){
					k<-k+1
					descendants[i,k]<-phy$tip.label[j]
				}
			}
			counts[i]<-k
		}
		names(counts)<-rownames(descendants)
		return(descendants=list(species=descendants,counts=counts))
	}

	# tree step
	tree.step<-function(phy,node,bp,step,up=NA){
		if(step<0)
			step=-step # if user has given -step, positivize
		if(is.na(up))
			up=(runif(1)>0.5) # if up/down is not assigned, we must be in the middle of a branch
		# decide to go up (1) or down (0) with equal probability
		if(up){
			# pick a new position along the branch (or go to the end)
			new.bp<-min(bp+step,phy$edge.length[match(node,phy$edge[,2])])
			# adjust step length to remain step
			step=step-(new.bp-bp)
			# check to see if we're done
			if(step<1e-6){
				return(list(node=node,bp=new.bp,flip=FALSE))
			} else {
				# we're going up, so get the daughters
				daughters<-phy$edge[match.all(node,phy$edge[,1]),2]
				# pick a random daughter
				new.node<-daughters[ceiling(runif(1)*length(daughters))]
				if(is.na(new.node)){
					location<-tree.step(phy,node,phy$edge.length[match(node,phy$edge[,2])],step,up=FALSE) # we're at a tip
				} else {
					location<-tree.step(phy,new.node,0,step,up=TRUE)
				}
			}
		} else {
			# pick a new position along the branch (or go to the start)
			new.bp<-max(bp-step,0)
			# adjust step length
			step=step-(bp-new.bp)
			# check to see if we're done
			if(step<1e-6){
				return(list(node=node,bp=new.bp,flip=FALSE))
			} else {
				# we're going down so find out who the parent is
				parent<-phy$edge[match(node,phy$edge[,2]),][1]
				# find the other daughter(s)				
				daughters<-phy$edge[match.all(parent,phy$edge[,1]),2]
				# don't use parent if root
				if(parent==(length(phy$tip.label)+1)){
					parent=NULL # if at the base of the tree
				}
				# create a vector of the possible nodes: the parent, and sister(s)				
				possible.nodes<-c(parent,daughters[-match(node,daughters)])
				# now pick randomly
				new.node<-possible.nodes[ceiling(runif(1)*length(possible.nodes))]
				# if parent
				if(is.null(parent)==FALSE&&new.node==parent){
					location<-tree.step(phy,new.node,phy$edge.length[match(new.node,phy$edge[,2])],step,up=FALSE)
				} else {
					location<-tree.step(phy,new.node,0,step,up=TRUE)
				}
			}
		}
	}
	
	# likelihood equation
	likelihood<-function(y,phy,C,descendants,sig1,sig2,a,loc){
		C1<-matrix(0,nrow(C),ncol(C),dimnames=dimnames(C))
		C2<-matrix(0,nrow(C),ncol(C),dimnames=dimnames(C))
		n<-length(y)
		D<-matrix(1,n,1)
		if(loc$node>length(phy$tip.label)){
			tr1<-extract.clade(phy,loc$node)
			tr1$root.edge<-phy$edge.length[match(loc$node,phy$edge[,2])]-loc$bp
			temp<-vcv.phylo(tr1)+tr1$root.edge
		} else {
			temp<-matrix(phy$edge.length[match(loc$node,phy$edge[,2])]-loc$bp,1,1,dimnames=list(c(phy$tip.label[loc$node]),c(phy$tip.label[loc$node])))
		}
		if(any(is.na(match(rownames(temp),descendants$species[phy$edge[match.all(length(phy$tip.label)+1,phy$edge[,1]),][1,2],1:descendants$counts[phy$edge[match.all(length(phy$tip.label)+1,phy$edge[,1]),][1,2]]])))){
			C2[rownames(temp),colnames(temp)]<-temp
			C1<-C-C2
			tips<-phy$tip.label[-match(rownames(temp),rownames(C1))]
		} else {
			C1[rownames(temp),colnames(temp)]<-temp
			C2<-C-C1
			tips<-rownames(temp)
		}
		V<-sig1*C1+sig2*C2
		logL<-as.numeric(-t(y-D%*%a)%*%solve(V)%*%(y-D%*%a)/2-n*log(2*pi)/2-determinant(V)$modulus[1]/2)
		return(list(logL=logL,tips=tips))	
	}

	# computes prior
	log.prior<-function(s1,s2,a,location){
		# logpr<-dexp(s1,rate=1/con$sig1mu,log=TRUE)+dexp(s2,rate=1/con$sig2mu,log=TRUE) # exponential prior
		logpr<-dnorm(log(s1)-log(s2),mean=0,sd=con$sdlnr,log=TRUE)-log(s1*s2) # log-normal
		return(logpr)
	}

	# proposal on sig1 or sig2
	propose.sig<-function(sig,scale){
		# if normal
		sig.prime<-abs(sig+rnorm(n=1,sd=scale)) # normal proposal distribution
		# if cauchy
		# sig.prime<-abs(sig+rcauchy(n=1,scale=scale))
		return(sig.prime)
	}

	# proposal on a
	propose.a<-function(a,scale){
		# if normal
		a.prime<-a+rnorm(n=1,sd=scale) # normal proposal distribution
		return(a.prime)
	}

	# proposal on loc
	propose.loc<-function(phy,loc,k,r){
		loc.prime<-list()
		loc.prime$flip=FALSE
		if(runif(1)>r){
			loc.prime<-tree.step(phy,loc$node,loc$bp,step=k*rexp(n=1,rate=1)) # update node & bp by random walk
		} else {
			loc.prime$node<-random.node(phy) # pick random branch
			loc.prime$bp<-runif(1)*phy$edge.length[match(loc.prime$node,phy$edge[,2])]
			if((runif(1)>0.5)) loc.prime$flip=TRUE
		}
		return(loc.prime)
	}

	# obtain remaining starting values for the MCMC
	location<-list()
	location$node<-random.node(tree)
	location$bp<-runif(1)*tree$edge.length[match(location$node,tree$edge[,2])]
	location$flip<-FALSE
	descendants<-compute.descendant.species(tree) # compute descendants
	logL<-likelihood(x,tree,C,descendants,sig1,sig2,a,location)$logL
	
	# create matrix for results
	results<-matrix(NA,(ngen+1),7,dimnames=list(c(0:ngen),c("state","sig1","sig2","a","node","bp","likelihood")))
	results[1,]<-c(0,sig1,sig2,a,location$node,location$bp,logL) # populate the first row
	group.tips<-list()
	group.tips[[1]]<-likelihood(x,tree,C,descendants,sig1,sig2,a,location)$tips
	print(results[1,])

	# now run Markov-chain
	for(i in 1:ngen){
		if(i%%4==1) sig1.prime<-propose.sig(sig1,scale=con$sd1) # update sig1
		else sig1.prime<-sig1
		if(i%%4==2) sig2.prime<-propose.sig(sig2,scale=con$sd2) # update sig2
		else sig2.prime<-sig2
		if(i%%4==3) a.prime<-propose.a(a,scale=con$sda) # update a
		else a.prime<-a
		if(i%%4==0){ 
			location.prime<-propose.loc(phy=tree,loc=location,k=con$kloc,r=con$rand.shift)
			if(location.prime$flip==TRUE){
				sig2.prime<-sig1; sig1.prime<-sig2 # flip the sigmas
			}
		} else location.prime<-location
		temp<-likelihood(x,tree,C,descendants,sig1.prime,sig2.prime,a.prime,location.prime)
		if(exp(temp$logL+log.prior(sig1.prime,sig2.prime,a.prime,location.prime)-results[i,"likelihood"]-log.prior(sig1,sig2,a,location))>runif(1)){
			sig1<-sig1.prime; sig2<-sig2.prime; a<-a.prime; location<-location.prime; logL<-temp$logL
			group.tips[[i+1]]<-temp$tips
		} else group.tips[[i+1]]<-group.tips[[i]]
		rm(temp)
		results[i+1,]<-c(i,sig1,sig2,a,location$node,location$bp,logL)
		if(i%%con$print==0)
			print(results[i+1,])
	}

	return(list(mcmc=results,tips=group.tips))

}