问题
I am trying to run a fitting function on a rather large data frame, grouped by a variable named "big_group"
and 'small_group'
.
In particular, I am trying to get predictions and coefs values of every small_group
inside of big_group
.
That is, I'm trying to add these new columns to my new data.frame at the end of do({
function.
Some of the groups of this data cannot be fitted due to either lack of data points or "singular gradient matrix at initial parameter estimates" error.
So, I used tryCatch
method from this post of how-do-i-ignore-errors-and-continue-processing-list-items and I used following answer of @Koshke
R : catching errors in `nls`
OTH, after solving this issue I come to encounter an error is saying
Error: Results are not data frames at positions: 3
There is some discussions about this error but I could not figure it how to implement to my problem.
Here is my reproducible example; (This example is similar to my real data that's why I built the example like this)
library(minpack.lm)
library(dplyr)
set.seed(100)
data.list <- lapply(1:2, function(big_group) {
xx <- c(sort(runif(5,1,5)),sort(runif(5,-8,-2)), rep(5,2)) ##I intentionall added the last two 5 to get unfitted groups
yy<- sort(runif(12,0,10))
small_group <- rep(c('a','b','c'),times=c(5,5,2)) ##small groups in under the big_group
df <- data.frame(xx,yy,small_group,big_group)
df <- df%>%
group_by(big_group,small_group)%>%
do({
#fitting part
fit <- tryCatch(nlsLM(yy~k*xx/2+U, start=c(k=1,U=5), data = ., trace=T,
control = nls.lm.control(maxiter=100)),error=function(e) NULL)
if(!("NULL" %in% class(fit))){
new.range<- data.frame(xx=seq(1,10,length.out=nrow(.)))
predicted <- predict(fit, newdata =new.range)
coefs <- data.frame(k=coef(fit)[1],U=coef(fit)[2])
data.frame(., new.range,predicted,coefs,row.names=NULL) ##This is the part the error came from I guess!
}})
})
This is what the data looks like; @RomanLuštrik
data.list <- lapply(1:2, function(big_group) {
xx <- c(sort(runif(5,1,5)),sort(runif(5,-8,-2)), rep(5,2)) ##I intentionall added the last two 5 to get unfitted groups
yy<- sort(runif(12,0,10))
small_group <- rep(c('a','b','c'),times=c(5,5,2)) ##small groups in under the big_group
df <- data.frame(xx,yy,small_group,big_group)
})
df <- bind_rows(data.list)
> df
xx yy small_group big_group
1 1.685681 1.302889 a 1
2 2.680406 1.804072 a 1
3 3.153395 3.306605 a 1
4 3.995889 3.486920 a 1
5 4.081206 6.293909 a 1
6 -6.333657 6.952741 b 1
7 -5.070164 7.775844 b 1
8 -4.705420 8.273034 b 1
9 -2.708278 8.651205 b 1
10 -2.428970 8.894535 b 1
11 5.000000 9.541577 c 1
12 5.000000 9.895641 c 1
13 1.830856 1.234872 a 2
14 2.964927 2.114086 a 2
15 3.413297 2.299059 a 2
16 4.121434 2.533907 a 2
17 4.536908 3.577738 a 2
18 -6.807926 4.451480 b 2
19 -6.585834 4.637012 b 2
20 -6.350680 5.913211 b 2
21 -6.157485 5.975753 b 2
22 -6.016821 6.471012 b 2
23 5.000000 6.763982 c 2
24 5.000000 9.605731 c 2
回答1:
How about this? The trouble seemed to be forcing the traditional R code to work with the %>%
pipe, so I just worked around it.
# Libraries and Options ---------------------------------------------------
library(minpack.lm)
library(dplyr)
set.seed(100)
# Create the data ---------------------------------------------------------
data.list <- lapply(1:2, function(big_group) {
xx <- c(sort(runif(5,1,5)),sort(runif(5,-8,-2)), rep(5,2)) ##I intentionall added the last two 5 to get unfitted groups
yy<- sort(runif(12,0,10))
small_group <- rep(c('a','b','c'),times=c(5,5,2)) ##small groups in under the big_group
df <- data.frame(xx,yy,small_group,big_group)
})
df <- bind_rows(data.list)
# Fit the Model -----------------------------------------------------------
print("My understanding here is that you want a separate model fit for each combination of big group and small group")
# Create fit-level groups
df$big_small <- paste0(df$big_group, df$small_group)
# Create results object
df1 <- structure(list(xx = numeric(0), yy = numeric(0), small_group = structure(integer(0), .Label = c("a",
"b", "c"), class = "factor"), big_group = integer(0), big_small = character(0),
xx.1 = numeric(0), predicted = numeric(0), k = numeric(0),
U = numeric(0)), .Names = c("xx", "yy", "small_group", "big_group",
"big_small", "xx.1", "predicted", "k", "U"), row.names = integer(0), class = "data.frame")
# Fit model, get results
for(b_s in unique(df$big_small)){
fit <- tryCatch(nlsLM(yy~k*xx/2+U, start=c(k=1,U=5), data = df[df$big_small==b_s,], trace=T,
control = nls.lm.control(maxiter=100)),error=function(e) NULL)
if(!("NULL" %in% class(fit))){
new.range<- data.frame(xx=seq(1,10,length.out=nrow(df[df$big_small==b_s,])))
predicted <- predict(fit, newdata =new.range)
coefs <- data.frame(k=coef(fit)[1],U=coef(fit)[2])
df1 <- rbind(df1, data.frame(df[df$big_small==b_s,], new.range,predicted,coefs,row.names=NULL))
}
}
It. 0, RSS = 44.4318, Par. = 1 5 It. 1, RSS = 0.259895, Par. = 1.89421 1.00916 It. 2, RSS = 0.259895, Par. = 1.89421 1.00916 It. 0, RSS = 81.5517, Par. = 1 5 It. 1, RSS = 0.256959, Par. = 0.912615 8.80728 It. 2, RSS = 0.256959, Par. = 0.912615 8.80728 It. 0, RSS = 1.76253, Par. = 1 5 It. 1, RSS = 0.715381, Par. = -156.969 400.646 It. 2, RSS = 0.715381, Par. = -156.969 400.646 It. 0, RSS = 64.766, Par. = 1 5 It. 1, RSS = 4.27941, Par. = 3.32947 -1.95395 It. 2, RSS = 4.27941, Par. = 3.32947 -1.95395 It. 0, RSS = 137.22, Par. = 1 5 It. 1, RSS = 0.209219, Par. = 0.893139 10.0071 It. 2, RSS = 0.209219, Par. = 0.893139 10.0071 It. 0, RSS = 9.90713, Par. = 1 5 It. 1, RSS = 0.0626808, Par. = -156.67 401.394 It. 2, RSS = 0.0626808, Par. = -156.67 401.394
df1
xx yy small_group big_group big_small xx.1 predicted k U 1 1.225533 2.046122 a 1 1a 1.00 1.9562669 1.8942075 1.009163 2 2.030690 2.803538 a 1 1a 3.25 4.0872502 1.8942075 1.009163 3 2.231064 3.575249 a 1 1a 5.50 6.2182336 1.8942075 1.009163 4 2.874197 3.594751 a 1 1a 7.75 8.3492170 1.8942075 1.009163 5 3.209290 3.984879 a 1 1a 10.00 10.4802004 1.8942075 1.009163 6 -6.978428 5.358112 b 1 1b 1.00 9.2635844 0.9126145 8.807277 7 -5.778077 6.249965 b 1 1b 3.25 10.2902757 0.9126145 8.807277 8 -5.097376 6.690217 b 1 1b 5.50 11.3169671 0.9126145 8.807277 9 -4.720648 6.902905 b 1 1b 7.75 12.3436585 0.9126145 8.807277 10 -3.125584 7.108038 b 1 1b 10.00 13.3703498 0.9126145 8.807277 11 1.685681 1.302889 a 2 2a 1.00 -0.2892182 3.3294688 -1.953953 12 2.680406 1.804072 a 2 2a 3.25 3.4564342 3.3294688 -1.953953 13 3.153395 3.306605 a 2 2a 5.50 7.2020866 3.3294688 -1.953953 14 3.995889 3.486920 a 2 2a 7.75 10.9477390 3.3294688 -1.953953 15 4.081206 6.293909 a 2 2a 10.00 14.6933913 3.3294688 -1.953953 16 -6.333657 6.952741 b 2 2b 1.00 10.4536476 0.8931386 10.007078 17 -5.070164 7.775844 b 2 2b 3.25 11.4584286 0.8931386 10.007078 18 -4.705420 8.273034 b 2 2b 5.50 12.4632095 0.8931386 10.007078 19 -2.708278 8.651205 b 2 2b 7.75 13.4679905 0.8931386 10.007078 20 -2.428970 8.894535 b 2 2b 10.00 14.4727715 0.8931386 10.007078
来源:https://stackoverflow.com/questions/45220231/error-results-are-not-data-frames-at-positions