# Function reads stochastic character mapped (or SIMMAP format) trees # Tree format is: # text<-"((A:{0,0.3:1,0.4:0,0.3},B:{0,1.0}):{0,1.0},(C:{0,0.25:1,0.75},D:{0,1.0}):{0,1.0});" # Can also read modified "nexus" formatted files (produced by SIMMAP) and multiple trees # States need not be binary and can be one or several characters long (e.g., "aqua" vs. "terr"): # text<-"((A:{aqua,0.3:terr,0.4:aqua,0.3},B:{aqua,1.0}):{aqua,1.0},(C:{aqua,0.25:terr,0.75},D:{aqua,1.0}):{aqua,1.0});" # Time spent in each state on each edge is stored in matrix phy$mapped.edge # Written by Liam J. Revell, 2010 read.simmap<-function(file="",text,format="phylip",max.char=10000,max.states=10,version=1.0){ # require dependencies require(ape) if(file!=""&&format=="nexus"){ # this is modified from read.nexus() in the ape package X <- scan(file = file, what = "", sep = "\n", quiet = TRUE) left <- grep("\\[", X) right <- grep("\\]", X) if (length(left)) { w <- left == right if (any(w)) { s <- left[w] X[s] <- gsub("\\[[^]]*\\]", "", X[s]) } w <- !w if (any(w)) { s <- left[w] X[s] <- gsub("\\[.*", "", X[s]) sb <- right[w] X[sb] <- gsub(".*\\]", "", X[sb]) if (any(s < sb - 1)) X <- X[-unlist(mapply(":", (s + 1), (sb - 1)))] } } endblock <- grep("END;|ENDBLOCK;", X, ignore.case = TRUE) semico <- grep(";", X) i1 <- grep("begin smptrees;", X, ignore.case = TRUE) i2 <- grep("translate", X, ignore.case = TRUE) translation <- if (length(i2) == 1 && i2 > i1) TRUE else FALSE if (translation) { end <- semico[semico > i2][1] x <- X[(i2 + 1):end] x <- unlist(strsplit(x, "[,; \t]")) x <- x[nzchar(x)] trans <- matrix(x, ncol = 2, byrow = TRUE) trans[, 2] <- gsub("['\"]", "", trans[, 2]) n <- dim(trans)[1] } start <- if (translation) semico[semico > i2][1] + 1 else semico[semico > i1][1] end <- endblock[endblock > i1][1] - 1 tree <- X[start:end] rm(X) tree <- gsub("^.*= *", "", tree) tree <- tree[tree != ""] semico <- grep(";", tree) Ntree <- length(semico) if (Ntree == 1 && length(tree) > 1) { STRING <- paste(tree, collapse = "") } else { if (any(diff(semico) != 1)) { STRING <- character(Ntree) s <- c(1, semico[-Ntree] + 1) j <- mapply(":", s, semico) if (is.list(j)) { for (i in 1:Ntree) STRING[i] <- paste(tree[j[[i]]], collapse = "") } else { for (i in 1:Ntree) STRING[i] <- paste(tree[j[,i]], collapse = "") } } else STRING <- tree } rm(tree); text<-STRING if(translation==TRUE){ rownames(trans)<-trans[,1]; trans<-trans[,2] } } if(file!=""&&format=="phylip"){ text<-scan(file,sep="\n",what="character") } ntrees<-length(text) phy<-list(); class(phy)<-"multiPhylo" for(h in 1:ntrees){ tree<-unlist(strsplit(text[h], NULL)) scm.tree<-array(dim=c(max.states,max.char),dimnames=list(c(1:max.states))) temp1<-vector(mode="character"); backbone<-vector(mode="character") temp2<-vector(mode="character") i<-1; j<-1; l<-1 nstates=1; branch<-vector(mode="numeric"); states<-vector(mode="character") while(tree[i]!=";"){ if(tree[i]!="{"){ for(k in 1:nstates){ scm.tree[k,j]<-tree[i] } backbone[l]<-tree[i] i<-i+1; l<-l+1; j<-j+1 } else if(tree[i]=="{"){ branch[1:nstates]<-0 i=i+1; count.new<-0; new.state<-vector(mode="character") while(tree[i]!="}"){ temp1<-NULL; m<-1 while(tree[i]!=","){ temp1[m]<-tree[i] i=i+1; m=m+1 } temp1<-paste(temp1,collapse="") if(is.na(match(temp1,states))){ curr.state<-temp1 if(length(states)>=1) nstates=nstates+1 dimnames(scm.tree)[[1]][nstates]<-curr.state states[nstates]<-curr.state branch[nstates]<-0; names(branch)[nstates]<-curr.state count.new=count.new+1; new.state[count.new]<-curr.state } else { curr.state<-temp1 } temp2<-NULL; m<-1; i=i+1 while(tree[i]!=":"&&tree[i]!="}"){ temp2[m]<-tree[i] i=i+1; m=m+1 } branch[curr.state]<-branch[curr.state]+as.numeric(paste(temp2,collapse="")) if(tree[i]==":") i=i+1 } for(k in 1:nstates){ if(is.na(match(states,new.state)[k])==FALSE){ scm.tree[k,1:l]<-backbone[1:l] } scm.tree[k,j]<-as.character(branch[k]) } backbone[l]<-0; l=l+1; i=i+1; j=j+1 } } scm.tree[1:nstates,j]<-";" scm.tree<-scm.tree[1:nstates,1:j] scm.tree<-apply(scm.tree,1,function(x) x<-paste(x,collapse="")) tree.list<-list() for(i in 1:nstates) tree.list[[i]]<-read.tree(text=scm.tree[i]) tree<-read.tree(text=paste(c(backbone,";"),collapse="")) tree$mapped.edge<-matrix(NA,length(tree$edge.length),nstates,dimnames=list(edge=apply(tree$edge,1,function(x) paste(x,collapse=",")),state=states)) for(i in 1:length(tree$edge.length)){ for(j in 1:nstates){ tree$edge.length[i]<-tree$edge.length[i]+tree.list[[j]]$edge.length[i] tree$mapped.edge[i,j]<-tree.list[[j]]$edge.length[i] } } # untranslate if(format=="nexus"&&translation==TRUE) tree$tip.label<-trans[tree$tip.label] phy[[h]]<-tree } if(length(phy)==1) phy<-phy[[1]] return(phy) }