# this function fits a "diversity-dependent-evolutionary-diversification" model (similar to Mahler et al. 2010) # written by Liam Revell, 2010/2011/2012 fitDiversityModel<-function(tree,x,d=NULL,showTree=TRUE,tol=1e-6){ # some minor error checking if(class(tree)!="phylo") stop("tree object must be of class 'phylo.'") if(is.data.frame(x)) x<-as.matrix(x) if(is.matrix(x)) x<-x[,1] if(is.null(names(x))){ if(length(x)==length(tree$tip)){ message("x has no names; assuming x is in the same order as tree$tip.label") names(x)<-tree$tip.label } else stop("x has no names and is a different length than tree$tip.label") } if(any(is.na(match(tree$tip.label,names(x))))){ message("some species in tree are missing from data, dropping missing taxa from the tree") tree<-drop.tip(tree,tree$tip.label[-match(names(x),tree$tip.label)]) } if(any(is.na(match(names(x),tree$tip.label)))){ message("some species in data are missing from tree, dropping missing taxa from the data") x<-x[tree$tip.label] } if(any(is.na(x))){ message("some data given as 'NA', dropping corresponding species from tree") tree<-drop.tip(tree,names(which(is.na(x)))) } if(!is.null(d)){ if(is.data.frame(d)) d<-as.matrix(d) if(is.matrix(d)) d<-d[,1] if(is.null(names(d))){ if(length(d)==tree$Nnode){ message("d has no names; assuming d is in node number order of the resolved tree") names(d)<-c(length(tree$tip)+tol:tree$Nnode) } else stop("d has no names and is a different length than tree$Nnode for the resolved tree") } } else { message("no values for lineage density provided; computing assuming single biogeographic region") # compute lineage diversity at each node ages<-branching.times(tree) d<-vector() for(i in 1:length(ages)) d[i]<-sum(ages>ages[i]) names(d)<-names(ages) } maxd<-max(d) d<-d/(maxd+tol) # likelihood function lik<-function(theta,y,phy,diversity){ scaled.psi<-theta for(i in 1:nrow(phy$edge)){ vi<-phy$edge.length[i] phy$edge.length[i]<-vi+vi*scaled.psi*diversity[as.character(phy$edge[i,1])] } D<-vcv(phy) D<-D[names(y),names(y)] Dinv<-solve(D) a<-as.numeric(colSums(Dinv)%*%y/sum(Dinv)) sig0<-as.numeric(t(y-a)%*%Dinv%*%(y-a)/nrow(D)) Dinv<-Dinv/sig0; D<-D*sig0 logL<-as.numeric(-t(y-a)%*%Dinv%*%(y-a)/2-determinant(D)$modulus[1]/2-length(y)*log(2*pi)/2) if(showTree) plot(phy) return(logL) } # optimize res<-optimize(lik,c(-1,1),y=x,phy=tree,diversity=d,maximum=TRUE) # compute condition sig0 compute_sig0<-function(scaled.psi,y,phy,diversity){ for(i in 1:nrow(phy$edge)){ vi<-phy$edge.length[i] phy$edge.length[i]<-vi+vi*scaled.psi*diversity[as.character(phy$edge[i,1])] } D<-vcv(phy) D<-D[names(y),names(y)] Dinv<-solve(D) a<-as.numeric(colSums(Dinv)%*%y/sum(Dinv)) sig0<-as.numeric(t(y-a)%*%Dinv%*%(y-a)/nrow(D)) return(sig0) } sig0=compute_sig0(res$maximum,x,tree,d) # compute the Hessian compute_Hessian<-function(scaled.psi,sig0,y,phy,d,maxd){ psi<-scaled.psi*sig0/(maxd+tol) likHessian<-function(theta,y,phy,d,maxd){ sig0<-theta[1] psi<-theta[2] for(i in 1:nrow(phy$edge)){ vi<-phy$edge.length[i] phy$edge.length[i]<-vi*(sig0+psi*d[as.character(phy$edge[i,1])]*(maxd+tol)) } D<-vcv(phy) D<-D[names(y),names(y)] Dinv<-solve(D) a<-as.numeric(colSums(Dinv)%*%y/sum(Dinv)) logL<-as.numeric(-t(y-a)%*%Dinv%*%(y-a)/2-determinant(D)$modulus[1]/2-length(y)*log(2*pi)/2) return(logL) } H<-hessian(likHessian,c(sig0,psi),y=y,phy=phy,d=d,maxd=maxd) return(H) } H<-compute_Hessian(res$maximum,sig0,x,tree,d,maxd) # return results to user if(var(d)>0) return(list(logL=res$objective,sig0=sig0,psi=sig0*res$maximum/(maxd+tol),vcv=matrix(solve(-H),2,2,dimnames=list(c("sig0","psi"),c("sig0","psi"))))) else { message("psi not estimable because diversity is constant through time.") return(list(logL=res$objective,sig0=sig0,vcv=matrix(-1/H[1,1],1,1,dimnames=list(c("sig0"),c("sig0"))))) } }