问题
I have data on a company that I want to visualize by a tree diagram. That means, I have their brands nested in classes, nested in subgroups, nested in divisions (http://www.bayer.com/en/products-from-a-to-z.aspx#B).
Even though there are quite some threads on stackoverflow how to create dendrograms, I did not find a code snippet that worked with my data (such as e.g. how to convert a data.frame to tree structure object such as dendrogram).
EDIT: If I am right "dendrogram" is in this case not the right term as I don't have/consider any kind of distance measure. Such I am using the more generic term "tree diagram"
df <- structure(list(Brand = structure(c(73L, 122L, 131L, 44L, 6L,
7L, 8L, 27L, 52L, 84L, 95L, 101L, 121L, 142L, 17L, 21L, 53L,
86L, 99L, 112L, 139L, 4L, 97L, 76L, 47L, 113L, 146L, 71L, 109L,
147L, 148L, 149L, 14L, 80L, 93L, 114L, 3L, 15L, 25L, 26L, 35L,
36L, 37L, 39L, 42L, 56L, 57L, 59L, 60L, 61L, 62L, 63L, 65L, 66L,
67L, 68L, 69L, 72L, 90L, 92L, 117L, 28L, 91L, 9L, 16L, 81L, 45L,
51L, 10L, 130L, 138L, 46L, 74L, 116L, 128L, 137L, 11L, 77L, 82L,
83L, 94L, 111L, 123L, 124L, 134L, 136L, 141L, 18L, 23L, 48L,
75L, 79L, 87L, 88L, 100L, 102L, 126L, 129L, 50L, 54L, 115L, 5L,
22L, 85L, 98L, 118L, 127L, 19L, 38L, 107L, 132L, 58L, 120L, 96L,
2L, 49L, 55L, 20L, 106L, 135L, 143L, 145L, 1L, 133L, 125L, 108L,
119L, 12L, 24L, 33L, 78L, 103L, 104L, 105L, 13L, 29L, 30L, 31L,
32L, 34L, 40L, 41L, 43L, 70L, 89L, 110L, 144L, 64L, 140L), .Label = c("3.0T Prostate eCoil",
"A1CNow", "Acclaim Polyol", "Adalat", "Admire", "Advantage",
"Advantix", "Advocate", "Aleve", "Alka-Seltzer", "Antracol",
"Apec", "Arcol", "Arize", "Artwalk", "Aspirin", "Aspirin Cardio",
"Atlantis", "Attribut", "Avanta", "Avelox", "Bariton", "Basta",
"Bayblend", "Baybond", "Baycoll", "Baycox", "Baycusan", "Baydur",
"Bayfill", "Bayfit", "Bayflex", "Bayfol", "Baygalÿ/Baymidur",
"Bayhydrol", "Bayhydur", "Bayhytherm", "Bayleton", "Baymer",
"Baynat", "Baypreg", "Baypren", "Baytec", "Baytril", "Bepanthenÿ/Bepanthol",
"Berocca", "Betaferonÿ/ Betaseron", "Betanal", "Breezeÿ2", "Calypso",
"Canesten", "Catosal", "Cipro", "Confidor", "Contourÿ/Contour Linkÿ/Contour TS",
"CreKat", "Crelan", "Decis", "Desavin", "Desmocap", "Desmocoll",
"Desmoderm", "Desmodur", "Desmoflex", "Desmolac", "Desmolith",
"Desmolux", "Desmomelt", "Desmopan", "Desmophen", "Diane", "Dispercoll",
"Drontal", "Elevit", "Equip", "Eylea", "Fandango", "Fantasia",
"Fenikan", "FiberMax", "Flanaxÿ/ÿApronax", "Flint", "Folicur",
"Gadovist", "Gaucho", "Glucobay", "Hoestar", "Husar", "Hyperlite",
"Imprafix", "Impranil", "Impraperm", "InVigor", "Input", "Iopamiron",
"K-Othrine", "Kogenate", "Lamardor", "Levitra", "Liberty", "Magnevist",
"MaisTer", "Makroblend", "Makrofol", "Makrolon", "Mark V ProVis",
"Maxforce", "Microlet 2", "Mirena", "Multitec", "Nativo", "Nebido",
"Nexavar", "Nunhems", "Oberon", "One-A-Day", "Pergut", "Poncho",
"Possis Angio Jet", "Premise", "Primovist", "Profender", "Proline",
"Prosaro", "Pulsar", "Puma", "Raxil", "Redoxon", "Rely", "Rennie",
"Rompun", "Ronstar", "Solaris", "Sphere", "Stellant", "Stratego",
"Supradyn", "Talcid", "Testogel", "Texin", "Twist", "Ultravist",
"Vistron CT", "Vulkollan", "XDS", "Xarelto", "Yasmin", "Yasminelle",
"Yaz"), class = "factor"), Class = structure(c(6L, 6L, 6L, 7L,
8L, 8L, 8L, 13L, 13L, 10L, 11L, 11L, 10L, 11L, 12L, 17L, 17L,
17L, 18L, 18L, 18L, 20L, 20L, 29L, 33L, 34L, 39L, 43L, 43L, 43L,
43L, 43L, 1L, 2L, 2L, 41L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
31L, 37L, 3L, 3L, 3L, 9L, 14L, 16L, 16L, 16L, 42L, 42L, 42L,
42L, 42L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 26L, 26L,
26L, 27L, 32L, 32L, 32L, 32L, 32L, 21L, 15L, 19L, 21L, 26L, 36L,
40L, 4L, 4L, 4L, 22L, 22L, 23L, 23L, 23L, 24L, 24L, 25L, 28L,
38L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 31L, 31L, 31L, 31L, 31L,
31L, 31L, 31L, 31L, 31L, 31L, 31L, 31L, 35L, 35L), .Label = c("Agricultural Seed",
"Agricultural seed", "Analgesics", "Blood Glucose Monitoring Systems",
"Coatings, adhesives, specialties", "Companion Animal", "Companion Animal / Food Animal Product",
"Companion animals", "Dermatologicals", "Diagnostic Imaging",
"Diagnostic imaging", "Drug product for protection against heart attack",
"Food Animal Product", "Fungal infections", "Fungicides", "Gastrointestinals",
"General Medicine", "General Medicine / Mens HealthCare", "General insect control",
"Hematology/cardiology", "Herbicides", "Injection Systems / Cardiology",
"Injection Systems / Computer Tomography", "Injection Systems / Magnetic Resonance Tomography",
"Injections Systems Ultrasound", "Insecticides", "Insecticides/seed treatment",
"Lancing Devices", "Ophthalmology", "Polycarbonates", "Polyurethanes",
"Seed treatment", "Specialty Medicine", "Specialty Medicine / Onkology",
"TPU granules", "Termiticides", "Textile coating", "Thrombectomy",
"Thromboembolic diseases", "Vector and locust control", "Vegetable seed",
"Vitamins", "Women's healthcare"), class = "factor"), Subgroup = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L), .Label = c("Bayer CropScience", "Bayer HealthCare",
"Bayer MaterialScience"), class = "factor"), Division = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L), .Label = c("Animal Health",
"Bayer HealthCare Pharmaceuticals, Germany", "BioScience", "Coatings, Adhesives, Specialties",
"Consumer Care", "Crop Protection", "Crop Protection/Environmental Science",
"Environmental Science", "Medical Care", "Polycarbonates", "Polyurethanes",
"Thermoplastic Polyurethanes"), class = "factor")), .Names = c("Brand",
"Class", "Subgroup", "Division"), class = "data.frame", row.names = c(NA,
-149L))
I appreciate any help.
回答1:
Using the package ape
, and assuming your data is olddat
:
library(ape)
newdata <- as.phylo(x=~Division/Subgroup/Class/Brand,data=olddat)
plot.phylo(x=newdata,show.tip.label=TRUE,show.node.label=TRUE,no.margin=TRUE)
You'll need to play with your various plot options, but I think this will get you going in the right direction. I'll see about updating the answer if I can get the plot to look okay.
来源:https://stackoverflow.com/questions/17024626/create-tree-diagram