Create count per item by year/decade

后端 未结 2 1791
我在风中等你
我在风中等你 2021-01-24 02:04

I have data in a data.table that is as follows:

> x<-df[sample(nrow(df), 10),]
> x      

>                   Importer                 Exporter               


        
2条回答
  •  深忆病人
    2021-01-24 02:48

    I think with will work with aggregate in base R:

    my.data <- read.csv(text = '
            Importer,             Exporter,           Date
             Ecuador,       United Kingdom,     2004-01-13
              Mexico,        United States,     2013-11-19
           Australia,        United States,     2006-08-11
       United States,        United States,     2009-05-04
               India,        United States,     2007-07-16
           Guatemala,            Guatemala,     2014-07-02
              Israel,               Israel,     2000-02-22
               India,        United States,     2014-02-11
                Peru,                 Peru,     2007-03-26
              Poland,               France,     2014-09-15
    ', header = TRUE, stringsAsFactors = TRUE, strip.white = TRUE)
    
    my.data$my.Date <- as.Date(my.data$Date, format = "%Y-%m-%d")
    
    my.data <- data.frame(my.data,
                     year  = as.numeric(format(my.data$my.Date, format = "%Y")),
                     month = as.numeric(format(my.data$my.Date, format = "%m")),
                     day   = as.numeric(format(my.data$my.Date, format = "%d")))
    
    my.data$my.decade <- my.data$year - (my.data$year %% 10)
    
    importer.count <- with(my.data, aggregate(cbind(count = Importer) ~ my.decade + Importer, FUN = function(x) { NROW(x) }))
    exporter.count <- with(my.data, aggregate(cbind(count = Exporter) ~ my.decade + Exporter, FUN = function(x) { NROW(x) }))
    
    colnames(importer.count) <- c('my.decade', 'country', 'importer.count')
    colnames(exporter.count) <- c('my.decade', 'country', 'exporter.count')
    
    my.counts <- merge(importer.count, exporter.count, by = c('my.decade', 'country'), all = TRUE)
    
    my.counts$importer.count[is.na(my.counts$importer.count)] <- 0
    my.counts$exporter.count[is.na(my.counts$exporter.count)] <- 0
    
    my.counts
    
    #    my.decade        country importer.count exporter.count
    # 1       2000      Australia              1              0
    # 2       2000        Ecuador              1              0
    # 3       2000          India              1              0
    # 4       2000         Israel              1              1
    # 5       2000           Peru              1              1
    # 6       2000  United States              1              3
    # 7       2000 United Kingdom              0              1
    # 8       2010      Guatemala              1              1
    # 9       2010          India              1              0
    # 10      2010         Mexico              1              0
    # 11      2010         Poland              1              0
    # 12      2010  United States              0              2
    # 13      2010         France              0              1
    

提交回复
热议问题