R: spread function on data frame with duplicates

Deadly 提交于 2019-12-09 03:43:01

问题


I have a data frame that I need to pivot but the data frame has duplicate identifiers, so spread function gives an error Error: Duplicate identifiers for rows (5, 6)

Dimension = c("A","A","B","B","A","A")   
Date = c("Mon","Tue","Mon","Wed","Fri","Fri")    
Metric = c(23,25,7,9,7,8)
df = data.frame(Dimension,Date,Metric)
df

  Dimension Date Metric
1         A  Mon     23
2         A  Tue     25
3         B  Mon      7
4         B  Wed      9
5         A  Fri      7
6         A  Fri      8

library(tidyr)
df1 = spread(df, Date, Metric, fill = " ")

Error: Duplicate identifiers for rows (5, 6)

I then consolidated the rows and pasted the Metric:

dfa = aggregate(df[3], df[-3], 
                FUN = function(X) paste(unique(X), collapse=", "))

  Dimension Date Metric
1         A  Fri   7, 8
2         A  Mon     23
3         B  Mon      7
4         A  Tue     25
5         B  Wed      9

Then repeat and of course it works now:

df1 = spread(dfa, Date, Metric, fill = " ")
df1
  Dimension  Fri Mon Tue Wed
1         A 7, 8  23  25    
2         B        7       9

Question: is there an "easier" way of doing this, or is my method above efficient enough so I don't need to lose sleep over it? Thanks!

EDIT. All codes - mine and 2 akrun's work fine with this small dataset. However, akrun's dplyr version breaks on my real dataset. Here's dput.

structure(list(Dimension = c(10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
10994030020, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
12303485675, 12303485675, 12303485675, 12303485675, 12303485675, 
12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
12701670437), Date = structure(c(1L, 3L, 5L, 7L, 9L, 10L, 11L, 
12L, 13L, 14L, 16L, 18L, 19L, 20L, 22L, 23L, 24L, 26L, 27L, 28L, 
30L, 32L, 33L, 34L, 40L, 41L, 42L, 47L, 48L, 49L, 51L, 52L, 53L, 
54L, 55L, 58L, 59L, 60L, 61L, 62L, 63L, 65L, 66L, 68L, 69L, 70L, 
74L, 75L, 76L, 2L, 3L, 5L, 7L, 8L, 10L, 11L, 15L, 17L, 20L, 21L, 
24L, 25L, 28L, 30L, 31L, 34L, 36L, 42L, 43L, 46L, 48L, 49L, 53L, 
54L, 56L, 65L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 7L, 24L, 30L, 
38L, 65L, 4L, 6L, 7L, 24L, 28L, 29L, 30L, 35L, 37L, 39L, 44L, 
45L, 50L, 57L, 64L, 65L), .Label = c("16", "analog tuner", "aspect ratio", 
"assembled in country of origin", "backlight technology", "battery type", 
"brand", "brightness", "color class", "component video", "composite video", 
"country of origin", "depth w/ stand", "digital audio output", 
"digital tuner", "display technology", "features", "green compliance certificate/authority", 
"green compliant", "hdmi", "headphone jack", "height w/ stand", 
"limited warranty", "manufacturer", "maximum resolution", "media player", 
"motion interpolation technology", "mpn", "multi pack indicator", 
"name", "native contrast ratio", "number of hdmi ports", "number of usb ports", 
"operating power consumption", "origin of components", "package contents", 
"primary color", "product dimensions", "product in in (l x w x h)", 
"product model", "product series", "product type", "remote control incl", 
"remote included", "resolution", "response time", "rms output power", 
"scan format", "screen size", "shipping weight (in lb)", "sound system", 
"speaker output power (w)", "speakers", "standard refresh rate", 
"standby power consumption", "total number of hdmi ports", "tv definition", 
"tv features", "tv refresh rate (hz)", "tv resolution", "tv screen size (in)", 
"tv screen size range", "tv speakers", "tv technology", "unspsc", 
"usb", "vertical viewing angle", "vesa mount standard", "vga", 
"video signal standard", "viewing angle", "warranty length", 
"wattage", "weight (approx)", "weight w/ stand (approx)", "width w/ stand"
), class = "factor"), Metric = structure(c(40L, 13L, 57L, 69L, 
43L, 72L, 72L, 45L, 38L, 72L, 55L, 44L, 72L, 72L, 15L, 3L, 69L, 
72L, 46L, 26L, 70L, 27L, 1L, 29L, 26L, 54L, 58L, 12L, 39L, 25L, 
42L, 11L, 72L, 37L, 28L, 52L, 36L, 39L, 24L, 19L, 72L, 33L, 72L, 
18L, 72L, 49L, 6L, 10L, 23L, 62L, 13L, 48L, 64L, 31L, 72L, 72L, 
41L, 66L, 72L, 72L, 64L, 16L, 63L, 65L, 4L, 32L, 21L, 58L, 71L, 
35L, 8L, 20L, 72L, 37L, 17L, 33L, 14L, 7L, 72L, 50L, 14L, 2L, 
34L, 59L, 59L, 60L, 5L, 33L, 51L, 47L, 67L, 67L, 53L, 61L, 68L, 
51L, 43L, 30L, 72L, 9L, 22L, 49L, 56L, 33L), .Label = c("1", 
"1-year limited", "1 Year", "1,000:1", "1,140 x 145 x 705 in ; 65.6 lb", 
"10.40 lb", "100 x 100", "1080p", "1080p (HDTV)", "11.20 lb", 
"14", "14 W", "16:9", "178 degrees", "18.30 in", "1920 x 1080", 
"2", "200 x 100", "21", "22 in", "22 in FHD LED TV; Remote Control", 
"25.4", "26.20 in", "29", "29 in", "29L1350U", "3", "300 mW", 
"33.80 W", "36.5 x 6.5 x 23.0", "365 Nit", "50 W", "52161505", 
"6 W", "6.50 ms", "60", "60 Hz", "7.10 in", "720p", "9", "ATSC", 
"Audyssey EQ", "Black", "CEC", "China", "ClearScan 120 Hz", "Does Not Contain a Battery", 
"Edge LED", "HDTV", "HDTV 1080p", "Imported", "Internet Apps", 
"KDL40W600B", "L1350U", "LCD", "LCD, Internet Connected, LED", 
"LED", "LED-LCD TV", "LG", "LG 47LY340C - 47 in - commercial use LED-backlit L", 
"No", "NTSC", "PLED2243A", "ProScan", "PROSCAN PLED2243A 22 in 1080p 60 Hz LED HDTV - PTR", 
"Sleep Timer; Auto Program", "Sony", "Sony KDL40W600B 40 in 1080p 60 Hz Smart LED TV (20", 
"Toshiba", "Toshiba 29L1350U 29 in 720p LED-LCD TV - 16:9 - HD", 
"yes", "Yes"), class = "factor")), .Names = c("Dimension", "Date", 
"Metric"), class = c("data.table", "data.frame"), row.names = c(NA, 
-104L), .internal.selfref = <pointer: 0x00000000003d0788>)

回答1:


You could use dcast from the devel version of data.table ie. v1.9.5. Instructions to install are here

library(data.table)#v1.9.5+
dcast(setDT(df), Dimension~Date, value.var='Metric', 
               fun.aggregate=function(x) toString(unique(x)))
#   Dimension  Fri Mon Tue Wed
#1:         A 7, 8  23  25    
#2:         B        7       9

Or

library(dplyr)
library(tidyr)
df %>%
   group_by(Dimension, Date) %>% 
   summarise(Metric=toString(unique(Metric))) %>% 
   spread(Date, Metric, fill='')
#   Dimension  Fri Mon Tue Wed
#1         A 7, 8  23  25    
#2         B        7       9

Update

Using the new dataset from `OP's post

 setDF(df2)
 df2 %>% 
     group_by(Dimension, Date) %>% 
     summarise(Metric=toString(unique(Metric))) %>%
     spread(Date, Metric, fill='') %>%
     head(2) %>%
     select(1:3)
 #    Dimension 16 analog tuner
 #1 10994030020  9             
 #2 12300245685            NTSC



回答2:


I fixed all problems, and all 3 solutions run now: 1 is mine, 2 and 3 are @akrun's. The code is fully reproducible and is shown below. Indeed, as @akrun envisioned, version 3 with dplyr and tidyr runs fastest on larger datasets (test one is 300 rows to fit in 30000 characters in body), diff is more pronounced on even larger sets. My own version "1" was fastest on a smaller datasets (100 rows or so), at least on my machine. Hope this helps someone!

library(microbenchmark)
library(data.table)#v1.9.5+
library(reshape2)
library(tidyr)
library(dplyr)

df = structure(list(GTIN = c(10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 10994030020, 10994030020, 10994030020, 10994030020, 
                            10994030020, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12300245685, 12300245685, 12300245685, 12300245685, 12300245685, 
                            12303485675, 12303485675, 12303485675, 12303485675, 12303485675, 
                            12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
                            12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
                            12701670437, 12701670437, 12701670437, 12701670437, 12701670437, 
                            12701670437, 13201067215, 13201067215, 13201067215, 13201067215, 
                            13201067215, 13964253832, 13964253832, 13964253832, 13964253832, 
                            13964253832, 14818899589, 14818899589, 14818899589, 14818899589, 
                            14818899589, 14818899589, 19748359455, 19748359455, 19748359455, 
                            19748359455, 19748359455, 19748359455, 19748359455, 19748359455, 
                            19748359455, 19748383566, 19748383566, 19748383566, 19748383566, 
                            19748383566, 19748383566, 19748383566, 19748383566, 19748383566, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003435, 22265003435, 22265003435, 22265003435, 
                            22265003435, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003459, 22265003459, 22265003459, 22265003459, 22265003459, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265003947, 
                            22265003947, 22265003947, 22265003947, 22265003947, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012, 22265004012, 22265004012, 22265004012, 
                            22265004012, 22265004012), 
                   Key = structure(c(1L, 10L, 12L, 14L, 
                                     16L, 20L, 21L, 24L, 26L, 29L, 35L, 43L, 44L, 45L, 49L, 56L, 57L, 
                                     59L, 61L, 62L, 64L, 67L, 68L, 69L, 77L, 78L, 79L, 84L, 85L, 87L, 
                                     92L, 93L, 96L, 99L, 100L, 110L, 111L, 112L, 113L, 114L, 115L, 
                                     118L, 119L, 122L, 123L, 125L, 129L, 131L, 134L, 6L, 9L, 12L, 
                                     14L, 15L, 20L, 21L, 30L, 41L, 45L, 47L, 57L, 58L, 62L, 64L, 65L, 
                                     69L, 71L, 79L, 80L, 83L, 85L, 86L, 96L, 99L, 108L, 118L, 121L, 
                                     122L, 123L, 125L, 126L, 127L, 128L, 14L, 57L, 64L, 75L, 118L, 
                                     11L, 13L, 14L, 57L, 62L, 63L, 64L, 70L, 74L, 76L, 81L, 82L, 89L, 
                                     109L, 116L, 118L, 14L, 57L, 62L, 64L, 118L, 14L, 57L, 62L, 64L, 
                                     118L, 14L, 17L, 52L, 57L, 64L, 118L, 14L, 17L, 53L, 57L, 62L, 
                                     62L, 64L, 75L, 118L, 14L, 17L, 52L, 53L, 57L, 62L, 64L, 75L, 
                                     118L, 5L, 7L, 14L, 23L, 25L, 27L, 28L, 31L, 33L, 32L, 34L, 38L, 
                                     42L, 46L, 48L, 50L, 57L, 62L, 64L, 72L, 73L, 79L, 82L, 88L, 91L, 
                                     95L, 94L, 98L, 97L, 106L, 117L, 118L, 120L, 124L, 130L, 133L, 
                                     5L, 7L, 14L, 18L, 23L, 25L, 27L, 28L, 31L, 33L, 32L, 34L, 36L, 
                                     39L, 42L, 46L, 48L, 50L, 57L, 60L, 62L, 64L, 72L, 73L, 79L, 82L, 
                                     88L, 91L, 95L, 94L, 98L, 97L, 106L, 117L, 118L, 120L, 124L, 130L, 
                                     133L, 5L, 7L, 8L, 14L, 18L, 23L, 25L, 27L, 28L, 31L, 33L, 32L, 
                                     34L, 38L, 39L, 42L, 46L, 48L, 55L, 57L, 62L, 64L, 66L, 72L, 73L, 
                                     79L, 88L, 91L, 95L, 98L, 97L, 101L, 102L, 117L, 118L, 120L, 124L, 
                                     132L, 133L, 2L, 3L, 4L, 5L, 7L, 14L, 18L, 19L, 22L, 23L, 25L, 
                                     27L, 28L, 31L, 33L, 32L, 34L, 36L, 37L, 39L, 40L, 42L, 46L, 48L, 
                                     50L, 51L, 54L, 55L, 57L, 60L, 62L, 64L, 72L, 73L, 79L, 82L, 88L, 
                                     90L, 91L, 98L, 97L, 102L, 103L, 104L, 105L, 107L, 117L, 118L), 
                                   .Label = c("16","16:9 Mode", "24p Technology", "3D", "Additional Features", "Analog Tuner", 
                                              "Analog TV Tuner", "Analog Video Input Signals", "aspect ratio", 
                                              "Aspect Ratio", "Assembled in Country of Origin", "Backlight Technology", 
                                              "Battery Type", "Brand", "Brightness", "Color Class", "Color Name", 
                                              "Color Temperature Control", "Compatible with Windows 7", "Component Video", 
                                              "Composite Video", "Connectivity", "Connector Type", "Country of Origin", 
                                              "Depth (Shipping)", "Depth with Stand", "Diagonal Size", "Diagonal Size (cm)", 
                                              "Digital Audio Output", "Digital Tuner", "Digital TV Tuner", 
                                              "Dimensions", "Dimensions & Weight Details", "Display Format", 
                                              "Display Technology", "DLNA", "Dynamic Contrast Ratio", "Enclosure Color", 
                                              "ENERGY STAR Qualified", "Expansion Slots", "Features", "Flat Panel Mount Interface", 
                                              "Green Compliance Certificate/Authority", "Green Compliant", 
                                              "HDMI", "HDMI Ports Qty", "headphone jack", "Height (Shipping)", 
                                              "Height with Stand", "Image Aspect Ratio", "Internet Streaming Services", 
                                              "Item Package Quantity", "Item Weight", "LAN Protocol", "LCD Backlight Technology", 
                                              "Limited Warranty", "Manufacturer", "maximum resolution", "Media Player", 
                                              "Motion Enhancement Technology", "Motion Interpolation Technology", 
                                              "MPN", "Multi Pack Indicator", "Name", "Native Contrast Ratio", 
                                              "Nominal Voltage", "Number of HDMI Ports", "Number of USB Ports", 
                                              "Operating Power Consumption", "Origin of Components", "package contents", 
                                              "PC Interface", "Power Device", "Primary Color", "Product Dimensions", 
                                              "Product in Inches (L x W x H)", "Product Model", "Product Series", 
                                              "Product Type", "remote control included", "Remote Included", 
                                              "Resolution", "response time", "RMS Output Power", "Scan Format", 
                                              "screen size", "Screen Size", "Series", "Shipping Weight (in pounds)", 
                                              "Sound Effects", "Sound Output Mode", "Sound System", "Speaker Output Power (W)", 
                                              "Speaker System", "Speaker(s)", "Speakers", "Stand", "Stand Design", 
                                              "Standard Refresh Rate", "Standby Power Consumption", "Stereo Reception System", 
                                              "Supported Audio Formats", "Supported Memory Cards", "Supported Pictures Formats", 
                                              "Supported Video Formats", "Surround Mode", "Timer Functions", 
                                              "Total Number of HDMI Ports", "Tv Definition", "TV Features", 
                                              "TV Refresh Rate (Hz)", "TV Resolution", "TV Screen Size (inches)", 
                                              "TV Screen Size Range", "TV Speakers", "Tv Technology", "TV Tuner", 
                                              "UNSPSC", "USB", "USB Port", "Vertical Viewing Angle", "VESA Mount Standard", 
                                              "VGA", "Video Interface", "Video Signal Standard", "viewing angle", 
                                              "warranty length", "wattage", "Weight (Approximate)", "Weight (Shipping)", 
                                              "Weight with Stand (Approximate)", "Widescreen Modes", "Width (Shipping)", 
                                              "Width with Stand"), class = "factor"), 
                   Value = structure(c(83L, 
                                       19L, 118L, 156L, 90L, 176L, 176L, 92L, 78L, 176L, 115L, 91L, 
                                       176L, 176L, 21L, 5L, 156L, 176L, 97L, 39L, 157L, 40L, 1L, 46L, 
                                       39L, 114L, 120L, 17L, 80L, 38L, 88L, 16L, 176L, 77L, 42L, 109L, 
                                       76L, 80L, 37L, 30L, 176L, 68L, 176L, 28L, 176L, 105L, 8L, 14L, 
                                       35L, 133L, 19L, 103L, 141L, 51L, 176L, 176L, 86L, 150L, 176L, 
                                       176L, 141L, 22L, 138L, 142L, 6L, 66L, 33L, 120L, 175L, 74L, 11L, 
                                       32L, 176L, 77L, 24L, 68L, 20L, 9L, 176L, 106L, 20L, 2L, 72L, 
                                       122L, 122L, 123L, 7L, 68L, 107L, 100L, 151L, 151L, 113L, 132L, 
                                       152L, 107L, 90L, 50L, 176L, 13L, 34L, 105L, 116L, 68L, 122L, 
                                       122L, 60L, 124L, 68L, 146L, 146L, 165L, 147L, 68L, 170L, 90L, 
                                       1L, 170L, 171L, 68L, 146L, 90L, 75L, 146L, 166L, 167L, 163L, 
                                       47L, 68L, 146L, 89L, 1L, 73L, 146L, 164L, 162L, 48L, 68L, 110L, 
                                       133L, 156L, 55L, 81L, 56L, 10L, 87L, 137L, 173L, 12L, 90L, 29L, 
                                       53L, 36L, 19L, 156L, 59L, 159L, 169L, 140L, 117L, 22L, 143L, 
                                       153L, 27L, 26L, 154L, 108L, 176L, 23L, 68L, 178L, 99L, 62L, 61L, 
                                       129L, 133L, 156L, 176L, 54L, 81L, 63L, 15L, 87L, 134L, 174L, 
                                       12L, 176L, 176L, 57L, 53L, 45L, 19L, 156L, 95L, 64L, 160L, 169L, 
                                       140L, 117L, 22L, 144L, 153L, 27L, 26L, 154L, 108L, 176L, 23L, 
                                       68L, 178L, 99L, 71L, 67L, 111L, 133L, 133L, 156L, 176L, 3L, 65L, 
                                       43L, 82L, 87L, 136L, 173L, 80L, 90L, 176L, 29L, 25L, 31L, 121L, 
                                       156L, 44L, 158L, 85L, 169L, 140L, 119L, 145L, 153L, 125L, 154L, 
                                       108L, 131L, 126L, 23L, 68L, 178L, 99L, 155L, 52L, 176L, 94L, 
                                       177L, 101L, 133L, 156L, 176L, 98L, 172L, 41L, 84L, 69L, 18L, 
                                       87L, 135L, 173L, 12L, 176L, 79L, 176L, 4L, 58L, 53L, 49L, 19L, 
                                       130L, 104L, 121L, 156L, 96L, 70L, 161L, 168L, 139L, 119L, 22L, 
                                       93L, 102L, 153L, 154L, 108L, 127L, 148L, 112L, 128L, 149L, 23L, 
                                       68L), .Label = c("1", "1-year limited", "1 x composite video/audio input ( RCA phono x 3 ) - rear 1 x USB ( 4 pin USB Type A ) - side 1 x component video input ( RCA phono x 3 ) - rear 2 x audio line-in ( RCA phono x 2 ) 1 x VGA input ( 15 pin HD D-Sub (HD-15) ) 2 x HDMI ( 19 pin HDMI Type A ) 1 x VGA input ( 15 pin HD D-Sub (HD-15) ) 1 x audio input ( mini-phone 3.5 mm ) 1 x digital audio input (optical) 1 x antenna", 
                                                        "1 x SD Memory Card", "1 Year", "1,000:1", "1,140 x 145 x 705 inches ; 65.6 pounds", 
                                                        "10.40 lb", "100 x 100", "102 cm", "1080p", "1080p (FullHD)", 
                                                        "1080p (HDTV)", "11.20 lb", "117 cm", "14", "14 W", "140 cm ( 138.7 cm viewable )", 
                                                        "16:09", "178°", "18.30\"", "1920 x 1080", "1x analog, 1x digital", 
                                                        "2", "2 port(s)", "2 speakers", "2 x main channel speaker - built-in", 
                                                        "200 x 100", "200 x 200 mm", "21", "21.8 in", "22\"", "22\" FHD LED TV; Remote Control", 
                                                        "25.4", "26.20\"", "28.9 in", "29", "29\"", "29L1350U", "3", 
                                                        "3 x HDMI input ( 19 pin HDMI Type A ) - rear 1 x HDMI input ( 19 pin HDMI Type A ) - side 1 x component video input - rear 2 x USB ( 4 pin USB Type A ) - side 1 x network ( RJ-45 ) - side 1 x VGA input ( 15 pin HD D-Sub (HD-15) ) - side 1 x antenna - rear 1 x digital audio output (optical) - rear 1 x composite video/audio input ( RCA phono x 3 ) - side 1 x audio input ( mini-phone stereo 3.5 mm ) - side 1 x audio input ( mini-phone stereo 3.5 mm ) - rear", 
                                                        "300 mW", "32\"", "32SL400", "33.1 in", "33.80 W", "34 x 7.8 x 7.6 inches", 
                                                        "34.5 x 7.4 x 7.2 inches", "34.6 in", "36.5 x 6.5 x 23.0", "365 Nit", 
                                                        "38 in", "4 port(s)", "4 x HDMI input ( 19 pin HDMI Type A ) 1 x component video input 1 x HD component video / RGB input 1 x composite video/audio input 1 x digital audio output (optical) 1 x Ethernet ( RJ-45 )", 
                                                        "4 x HDMI input ( 19 pin HDMI Type A ) 1 x component video input 1 x HD component video / RGB input 1 x composite video/audio input 1 x digital audio output (optical) 1 x USB ( 4 pin USB Type A )", 
                                                        "40\"", "400 x 200 mm", "400 x 400 mm", "40E200U", "42LN5400", 
                                                        "44.3 in", "45.2 lbs", "46\"", "46G300U", "5.4 in", "50 W", "50.9 in", 
                                                        "52161505", "55\" Class ( 54.6\" viewable )", "55WX800", "57.3 lbs", 
                                                        "6 W", "6.2 pounds", "6.50 ms", "6.7 pounds", "60", "60 Hz", 
                                                        "7.10\"", "7000000:1", "720p", "8.8 in", "82 cm", "9", "9 in", 
                                                        "AC 120/230 V ( 50/60 Hz )", "ATSC", "ATSC, QAM", "Audyssey EQ", 
                                                        "black", "Black", "CEC", "China", "Cinema", "Cinema Mode 24 fps", 
                                                        "ClearFrame 120Hz", "ClearFrame 240Hz", "ClearScan 120Hz", "Compatible with Windows 7 software and devices carry Microsoft’s assurance that these products have passed tests for compatibility and reliability with 32-bit and 64-bit Windows 7.", 
                                                        "Component, composite, HDMI", "Does Not Contain a Battery", "Dolby Volume, Invisible Speaker System, Audyssey EQ", 
                                                        "Dynamic Bass Boost (DBB)", "Edge LED", "Ethernet", "HDTV", "HDTV 1080p", 
                                                        "Imported", "Included", "Internet Apps", "Invisible Speaker System", 
                                                        "JPEG photo playback, sleep timer, CrystalCoat, DynaLight, ColorStream HD Component Video Inputs, Digital Noise Reduction (DNR)", 
                                                        "JPG", "KDL40W600B", "L1350U", "LCD", "LCD , Internet Connected , LED", 
                                                        "LCD TV", "LED", "LED-backlit LCD TV", "LED-LCD TV", "LED backlight", 
                                                        "LG", "LG 47LY340C - 47\" - commercial use LED-backlit LCD flat panel display - 1080p (FullHD) - direct-lit LED - dark titan", 
                                                        "Lg LG 42-Inch LED-Backlit LCD TV - 42LN5400 1080p 120Hz HDTV (42LN5400)", 
                                                        "Main channel speaker", "MP3", "MP3, AAC, LPCM", "MPEG-2, MPEG-4, AVCHD", 
                                                        "Mute button, Invisible Speaker System, Audyssey EQ", "Net TV", 
                                                        "NICAM", "No", "NTSC", "Panel with stand - 44.3 in x 13.7 in x 30.2 in x 46.3 lbs", 
                                                        "Panel with stand - 50.4 in x 14 in x 33.5 in x 71 lbs Panel without stand - 50.4 in x 1.1 in x 30.4 in x 59.3 lbs", 
                                                        "Panel without stand - 30.5 in x 1.4 in x 18.9 in", "Panel without stand - 39.1 in x 3.5 in x 25.3 in x 33.5 lbs", 
                                                        "PLED2243A", "Power adapter", "Power supply - internal", "ProScan", 
                                                        "PROSCAN PLED2243A 22\" 1080p 60Hz LED HDTV - PTR-PLED2243A", 
                                                        "REGZA E Series", "REGZA G Series", "REGZA SL Series", "Samsung", 
                                                        "Samsung UN46D7900 46-Inch 1080p 240HZ 3D LED HDTV Bundle with 3D Starter Kit and 3D Blu-Ray Player (Silver)", 
                                                        "SD Memory Card", "Sleep", "Sleep Timer; Auto Program", "Sony", 
                                                        "Sony KDL40W600B 40-Inch 1080p 60Hz Smart LED TV (2014 Model)", 
                                                        "Stereo", "Tabletop", "TheaterWide", "Toshiba", "Toshiba 29L1350U 29\" 720p LED-LCD TV - 16:9 - HDTV - Audyssey EQ, Audyssey ABX - 3 x HDMI - USB - Media PlayerShow More +", 
                                                        "Toshiba 32SL400U 32-Inch 720p Ultra Thin LED HDTV, Black", "Toshiba 40E200U 40-Inch 1080p LCD HDTV (Black Gloss)", 
                                                        "Toshiba 46G300U 46-Inch 1080p 120 Hz LCD HDTV (Black Gloss)", 
                                                        "Toshiba 55WX800U - 55\" LED TV - 1080p (FullHD)", "TV Wall Mount Kit for Samsung 40\" TV for UN40H6350, UN40H5500, UN40FH6030, UN40H5203, UN40H6400, UN40H4005, UN40H6203, UN40H5003, UN40EH5300, UN40EH5000, UN40HU6950, RM40D, UN40F6300, UN40H5203AF, UN40H5203AFXZA, UN40F5500, UN40EH6000, UN40F6400, UN40ES6100, DB40D, UN40ES6500, UN40B6000, LN40A550, H40B, HG40NA577LF, LN40E550, UN40C6300, LN40C530, UN40H5003AF, UA-40H5100, DM40D, LN40D630, MD40C, LN40D550, UN40EH6030, PE40C, LN40B530, LN40B650, LN40D503, ME40C, LN-S4052D, LN40A530, UN40EH5300FXZAB, LNT4065F, LNT4061F, LN40A750, LNT4071F, LNT4069FX Tvs. Includes 32\"-60\" Flat TV Wall Mount + 2 High Speed Gold Plated HDMI Cables + TV Cleaner Set + Microfiber Cleaning Cloth.", 
                                                        "Ultra Slim Flat Wall Mount for Samsung 55\" for (ME55C, UN55H6350, UN55H6400, UN55H7150, UN55FH6030, UN55F8000, UN55HU8550, UN55H8000, UN55F9000, UN55H6203, UN55HU9000, UN55F7500, UN55FH6200, UN55FH6003, UN55HU7250, UN55FH6003, UN55F7100, UN55HU8700, UN55HU6950, UN55F6300, UN55F6400, UN55D8000, UN55ES6100, UN55EH6000, UN55ES7500, UN55ES8000, UN55D7000, MD55C, 55UB8500, UN55ES6600, UN55ES7100, UN55B8000, UN55C6300, UN55C7000, UN55C8000, UN55ES6500, UN55D6300, UN55D6400, UN55ES6580, UN55EH6070, UN55B7000, UN55B6000, UN55HU6840, 55UB8200, UN55D6500, UN55ES6003, UN55D6000, UN55B8500, LN55C630, UN55D6050, UN55HU7200, UN55HU7200H, UN55D7050, UN55ES6150, UN55HU6950FXZA, UN55JS9000, UE55D, UN55HU8550FXZA, UN55HU7250FXZA) Tvs. Includes Tilt Wall Mount + 2 HDMI Cables + TV Cleaner Set + Microfiber Cleaning Cloth", 
                                                        "UN40H5003", "UN46D7900", "UN55ES6003", "UN55FH6003", "VGA (HD-15)", 
                                                        "VGA (HD-15), HDMI", "Vizio", "Vizio M220NV M221NV HDTV Vesa Mount Adapter 1712-0101-7920", 
                                                        "Wi-Fi, LAN", "With stand", "Without stand", "yes", "Yes", "Yes ( 3D glasses sold separately )", 
                                                        "Yes , 1 port(s)"), class = "factor")), 
              .Names = c("GTIN", "Key", 
                         "Value"), class = "data.frame", row.names = c(NA, -300L))

df = setDF(df)

############################
dfa = aggregate(df[3], df[-3], FUN = function(X) paste(unique(X), collapse=", "))
test1 = spread(dfa, Key, Value, fill = '')

print("My solution")
print(microbenchmark(aggregate(df[3], df[-3], FUN = function(X) paste(unique(X), collapse=", ")),spread(dfa, Key, Value, fill = " ")),times=100)
cat("\n")

############################
#akrun's solution #1
#Dcast and data.table
test2 = dcast(setDT(df), GTIN~Key, value.var='Value', 
              fun.aggregate=function(x) toString(unique(x)))

############################
#akrun's solution #2
#dplyr 
test3 = df %>%
  group_by(GTIN, Key) %>% 
  summarise(Value=toString(unique(Value))) %>% 
  spread(Key, Value, fill='')

#timing
print("dcast and aggregate")
print(microbenchmark(dcast(setDT(df), GTIN~Key, value.var='Value', 
                           fun.aggregate=function(x) toString(unique(x))),times=100))
cat("\n")

print("dplyr and tidyr")
print(microbenchmark(df %>%
                       group_by(GTIN, Key) %>% 
                       summarise(Value=toString(unique(Value))) %>% 
                       spread(Key, Value, fill=''),times=100))

Results:

[1] "My solution"
Unit: milliseconds
                                                                          expr       min        lq      mean    median
 aggregate(df[3], df[-3], FUN = function(X) paste(unique(X), collapse = ", ")) 40.423792 41.549188 44.496005 43.840846
                                           spread(dfa, Key, Value, fill = " ")  1.421713  1.533474  1.717194  1.626987
       uq       max neval cld
 44.84422 75.155029   100   b
  1.71860  4.422517   100  a 

[1] "dcast and aggregate"
Unit: milliseconds
                                                                                               expr      min       lq
 dcast(setDT(df), GTIN ~ Key, value.var = "Value", fun.aggregate = function(x) toString(unique(x))) 45.48038 47.35578
    mean  median       uq      max neval
 50.2578 48.9037 50.40563 67.42152   100

[1] "dplyr and tidyr"
Unit: milliseconds
                                                                                                             expr      min
 df %>% group_by(GTIN, Key) %>% summarise(Value = toString(unique(Value))) %>%      spread(Key, Value, fill = "") 40.70889
       lq     mean   median       uq      max neval
 41.41405 43.97719 43.06974 43.97275 93.33509   100

Results on 1000 rows show that solution #3 is 10% faster than #1. The more rows - the more it gets faster



来源:https://stackoverflow.com/questions/31209388/r-spread-function-on-data-frame-with-duplicates

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!