Creating treechart from tabbed text in R

人盡茶涼 提交于 2019-12-02 09:18:07

This is a rather convoluted way using igraph. We need to arrange your data in to two columns, from and to indicating an arrow from -> to.

library(zoo)
library(igraph)

# read tab delimited data - keep structure by setting "" to missing
# (would of been great if you had given this in a format easier to use)

dat <- read.table("test.txt", sep="\t", header=FALSE, fill=TRUE, 
                  na.strings="", strip.white=TRUE, stringsAsFactors=FALSE)

head(dat, 7)
#             V1        V2        V3
#1   Vertebrates      <NA>      <NA>
#2          <NA>      fish      <NA>
#3          <NA>      <NA>  goldfish
#4          <NA>      <NA> clownfish
#5          <NA> amphibian      <NA>
#6          <NA>      <NA>      frog
#7          <NA>      <NA>      toad

Prepare data to graph

# carry forward the last value in first two columns to impute missing
dat[1:2] <- sapply(dat[1:2], na.locf, na.rm=FALSE)
dat <- na.omit(dat)

# get edges for graph - we want two columns (from and to) for each edges
edges <- rbind(dat[1:2],setNames(dat[2:3],names(dat[1:2])))

# create graph
g <- graph.data.frame(edges)

# Plot graph
E(g)$curved <- 0
plot.igraph(g, vertex.size=0, edge.arrow.size=0 ,
                      layout=-layout.reingold.tilford(g)[,2:1])

data as there will be better ways to do this!!

dat <- structure(list(V1 = c("Vertebrates", NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), V2 = c(NA, 
"fish", NA, NA, "amphibian", NA, NA, "reptiles", NA, NA, NA, 
NA, "birds", NA, NA, NA, "mammals", NA, NA, NA, NA), V3 = c(NA, 
NA, "goldfish", "clownfish", NA, "frog", "toad", NA, "snake", 
"lizard", "turtle", "tortoise", NA, "sparrow", "crow", "parrot", 
NA, "dog", "cat", "horse", "whale")), .Names = c("V1", "V2", 
"V3"), class = "data.frame", row.names = c(NA, -21L))


EDIT : Update following new data

Calling your updated datadat2

# To prepare the data

# carry forward the last value in columns if lower level (col to the right)
# is non-missing
dat2[1] <- na.locf(dat2[1], na.rm=FALSE)

for(i in ncol(dat2):2)  {
  dat2[[i-1]] <-  ifelse(!is.na(dat2[[i]]), na.locf(dat2[[i-1]], na.rm=F), 
                                                                   dat2[[i-1]])
      }            

# get edges for graph
edges <- rbind(na.omit(dat2[1:2]),
                       do.call('rbind',
                               lapply(1:(ncol(dat2)-2), function(i) 
                                  na.omit(setNames(dat2[(1+i):(2+i)],
                                                         names(dat2[1:2])))))
                         )

Then continue as before, to give

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!