问题
I'm trying to use derivedFactor
from the mosaic
package in R to create a factor variable, but it's surprisingly slow. When I coded the same function using a series of if
statements and ran that, it seems to run almost twice as quickly.
Here's a reproducible example (sorry for the length):
library(microbenchmark)
library(mosaic)
library(lubridate)
library(data.table)
library(dplyr)
df <- structure(
list(
study.week = structure(
c(
1299369600, 1299974400,
1300579200, 1301184000, 1301788800, 1302393600, 1302998400, 1303603200,
1304208000, 1304812800, 1305417600, 1306022400, 1306627200, 1307232000,
1307836800, 1308441600, 1309046400, 1309651200, 1310256000, 1310860800,
1311465600, 1312070400, 1312675200, 1313280000, 1313884800, 1314489600,
1315094400, 1315699200, 1316304000, 1316908800, 1317513600, 1318118400,
1318723200, 1319328000, 1319932800, 1320537600, 1321142400, 1321747200,
1322352000, 1322956800, 1323561600, 1324166400, 1324771200, 1325376000,
1325980800, 1326585600, 1327190400, 1327795200, 1328400000, 1329004800,
1329609600, 1330214400, 1330819200, 1331424000, 1332028800, 1332633600,
1333238400, 1333843200, 1334448000, 1335052800, 1335657600, 1336262400,
1336867200, 1337472000, 1338076800, 1338681600, 1339286400, 1339891200,
1340496000, 1341100800, 1341705600, 1342310400, 1342915200, 1343520000,
1344124800, 1344729600, 1345334400, 1345939200, 1346544000, 1347148800,
1347753600, 1348358400, 1348963200, 1349568000, 1350172800, 1350777600,
1351382400, 1351987200, 1352592000, 1353196800, 1353801600, 1354406400,
1355011200, 1355616000, 1356220800, 1356825600, 1357430400, 1358035200,
1358640000, 1359244800, 1359849600, 1360454400, 1361059200, 1361664000,
1362268800, 1362873600, 1363478400, 1364083200, 1364688000, 1365292800,
1365897600, 1366502400, 1367107200, 1367712000, 1368316800, 1368921600,
1369526400, 1370131200, 1370736000, 1371340800, 1371945600, 1372550400,
1373155200, 1373760000, 1374364800, 1374969600, 1375574400, 1376179200,
1376784000, 1377388800, 1377993600, 1378598400, 1379203200, 1379808000,
1380412800, 1381017600, 1381622400, 1382227200, 1382832000, 1383436800,
1384041600, 1384646400, 1385251200, 1385856000, 1386460800, 1387065600,
1387670400, 1388275200, 1388880000, 1389484800, 1390089600, 1390694400,
1391299200, 1391904000, 1392508800, 1393113600, 1393718400, 1394323200,
1394928000, 1395532800, 1396137600, 1396742400, 1397347200
), class = c("POSIXct",
"POSIXt"), tzone = "UTC"
), time.min = structure(
c(
1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642, 1389227642,
1389227642, 1389227642, 1389227642, 1389227642, 1389227642
), class = c("POSIXct",
"POSIXt"), tzone = "UTC"
), time.max = structure(
c(
1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694,
1390345694, 1390345694, 1390345694, 1390345694, 1390345694, 1390345694
), class = c("POSIXct", "POSIXt"), tzone = "UTC"
), adopt = structure(
c(
1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L
), .Label = c("experiment", "abandon", "adopt"), class = "factor"
),
floor.min = structure(
c(
1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000,
1388880000, 1388880000, 1388880000, 1388880000, 1388880000
), tzone = "UTC", class = c("POSIXct", "POSIXt")
), sup.using = c(
FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
), sup.use = structure(
c(
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L
), .Label = c("never used", "experimented",
"abandoned"), class = "factor"
)
), .Names = c(
"SupSID", "sid",
"study.week", "event", "n.posts", "cum.posts", "time.min", "time.max",
"adopt", "floor.min", "sup.using", "sup.use"
), sorted = "SupSID", class = c("tbl_dt",
"tbl", "data.table", "data.frame"), row.names = c(NA,-163L)
)
# base R function:
recodeTimes <- Vectorize(function(floor.min, study.week, time.max, adopt) {
if (is.na(floor.min) | study.week < floor.min) {
out <- "never used"
} else if (study.week > time.max) {
if (adopt == "experiment") {
out <- "experimented"
} else if (adopt == "abandon") {
out <- "abandoned"
} else {
out <- "currently using"
}
} else {
out <- "currently using"
}
return(out)
})
microbenchmark(
{
df1 <- df %>%
mutate(
floor.min = floor_date(time.min, "week"),
sup.using = study.week %within% interval(floor.min, time.max),
sup.using = ifelse(is.na(sup.using), FALSE, sup.using),
sup.use = derivedFactor(
"never used" = (is.na(floor.min) | study.week < floor.min),
"experimented" = (study.week > time.max & adopt == "experiment"),
"abandoned" = (study.week > time.max & adopt == "abandon"),
.method = "first",
.default = "currently using"
)
)
}, {
df2 <- df %>%
mutate(
floor.min = floor_date(time.min, "week"),
sup.using = study.week %within% interval(floor.min, time.max),
sup.using = ifelse(is.na(sup.using), FALSE, sup.using),
sup.use = recodeTimes(floor.min, study.week, time.max, adopt)
)
}
)
# results:
# min lq mean median uq max neval
# 57.41792 62.77737 87.01017 72.6734 104.12907 242.4751 100
# 32.77108 34.84122 50.51734 43.2975 60.34229 122.6671 100
Any guesses what's causing the large time difference?
来源:https://stackoverflow.com/questions/33787691/why-is-mosaicderivedfactor-twice-as-slow-as-a-base-function