Why is mosaic::derivedFactor twice as slow as a base function?

女生的网名这么多〃 提交于 2019-12-23 19:17:36

问题


I'm trying to use derivedFactor from the mosaic package in R to create a factor variable, but it's surprisingly slow. When I coded the same function using a series of if statements and ran that, it seems to run almost twice as quickly.

Here's a reproducible example (sorry for the length):

library(microbenchmark)
library(mosaic)
library(lubridate)
library(data.table)
library(dplyr)

df <- structure(
  list(
    study.week = structure(
      c(
        1299369600, 1299974400,
        1300579200, 1301184000, 1301788800, 1302393600, 1302998400, 1303603200,
        1304208000, 1304812800, 1305417600, 1306022400, 1306627200, 1307232000,
        1307836800, 1308441600, 1309046400, 1309651200, 1310256000, 1310860800,
        1311465600, 1312070400, 1312675200, 1313280000, 1313884800, 1314489600,
        1315094400, 1315699200, 1316304000, 1316908800, 1317513600, 1318118400,
        1318723200, 1319328000, 1319932800, 1320537600, 1321142400, 1321747200,
        1322352000, 1322956800, 1323561600, 1324166400, 1324771200, 1325376000,
        1325980800, 1326585600, 1327190400, 1327795200, 1328400000, 1329004800,
        1329609600, 1330214400, 1330819200, 1331424000, 1332028800, 1332633600,
        1333238400, 1333843200, 1334448000, 1335052800, 1335657600, 1336262400,
        1336867200, 1337472000, 1338076800, 1338681600, 1339286400, 1339891200,
        1340496000, 1341100800, 1341705600, 1342310400, 1342915200, 1343520000,
        1344124800, 1344729600, 1345334400, 1345939200, 1346544000, 1347148800,
        1347753600, 1348358400, 1348963200, 1349568000, 1350172800, 1350777600,
        1351382400, 1351987200, 1352592000, 1353196800, 1353801600, 1354406400,
        1355011200, 1355616000, 1356220800, 1356825600, 1357430400, 1358035200,
        1358640000, 1359244800, 1359849600, 1360454400, 1361059200, 1361664000,
        1362268800, 1362873600, 1363478400, 1364083200, 1364688000, 1365292800,
        1365897600, 1366502400, 1367107200, 1367712000, 1368316800, 1368921600,
        1369526400, 1370131200, 1370736000, 1371340800, 1371945600, 1372550400,
        1373155200, 1373760000, 1374364800, 1374969600, 1375574400, 1376179200,
        1376784000, 1377388800, 1377993600, 1378598400, 1379203200, 1379808000,
        1380412800, 1381017600, 1381622400, 1382227200, 1382832000, 1383436800,
        1384041600, 1384646400, 1385251200, 1385856000, 1386460800, 1387065600,
        1387670400, 1388275200, 1388880000, 1389484800, 1390089600, 1390694400,
        1391299200, 1391904000, 1392508800, 1393113600, 1393718400, 1394323200,
        1394928000, 1395532800, 1396137600, 1396742400, 1397347200
      ), class = c("POSIXct",
                   "POSIXt"), tzone = "UTC"
    ), time.min = structure(
      c(
        1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
        1389227642, 1389227642, 1389227642, 1389227642, 1389227642
      ), class = c("POSIXct",
                   "POSIXt"), tzone = "UTC"
    ), time.max = structure(
      c(
        1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
        1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694
      ), class = c("POSIXct", "POSIXt"), tzone = "UTC"
    ), adopt = structure(
      c(
        1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L
      ), .Label = c("experiment", "abandon", "adopt"), class = "factor"
    ),
    floor.min = structure(
      c(
        1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
        1388880000, 1388880000, 1388880000, 1388880000, 1388880000
      ), tzone = "UTC", class = c("POSIXct", "POSIXt")
    ), sup.using = c(
      FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE,
      FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
    ), sup.use = structure(
      c(
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
        1L, 1L, 1L, 1L, 1L
      ), .Label = c("never used", "experimented",
                    "abandoned"), class = "factor"
    )
  ), .Names = c(
    "SupSID", "sid",
    "study.week", "event", "n.posts", "cum.posts", "time.min", "time.max",
    "adopt", "floor.min", "sup.using", "sup.use"
  ), sorted = "SupSID", class = c("tbl_dt",
                                  "tbl", "data.table", "data.frame"), row.names = c(NA,-163L)
)

# base R function:
recodeTimes <- Vectorize(function(floor.min, study.week, time.max, adopt) {
  if (is.na(floor.min) | study.week < floor.min) {
    out <- "never used"
  } else if (study.week > time.max) {
    if (adopt == "experiment") {
      out <- "experimented"
    } else if (adopt == "abandon") {
      out <- "abandoned"
    } else {
      out <- "currently using"
    }
  } else {
    out <- "currently using"
  }
  return(out)
})

microbenchmark(
    {
    df1 <- df %>%
      mutate(
          floor.min = floor_date(time.min, "week"),
          sup.using = study.week %within% interval(floor.min, time.max),
          sup.using = ifelse(is.na(sup.using), FALSE, sup.using),
          sup.use = derivedFactor(  
            "never used" = (is.na(floor.min) | study.week < floor.min),
            "experimented" = (study.week > time.max & adopt == "experiment"),
            "abandoned" = (study.week > time.max & adopt == "abandon"),
            .method = "first",
            .default = "currently using"
          )
      )
    }, {
    df2 <- df %>% 
      mutate(
          floor.min = floor_date(time.min, "week"),
          sup.using = study.week %within% interval(floor.min, time.max),
          sup.using = ifelse(is.na(sup.using), FALSE, sup.using),
          sup.use = recodeTimes(floor.min, study.week, time.max, adopt)
          )
    }
    )

# results:
#       min       lq     mean  median        uq      max neval
#  57.41792 62.77737 87.01017 72.6734 104.12907 242.4751   100
#  32.77108 34.84122 50.51734 43.2975  60.34229 122.6671   100

Any guesses what's causing the large time difference?

来源:https://stackoverflow.com/questions/33787691/why-is-mosaicderivedfactor-twice-as-slow-as-a-base-function

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!