Is there any method to combine data frames properly?

后端 未结 3 473
野趣味
野趣味 2021-01-29 11:14

I have a problem with combining data frames which share the same rows. I imported rasters from different folders and converted them into data frames and tried to combine them to

相关标签:
3条回答
  • 2021-01-29 11:30

    I'm not sure if you need more than just these 4 dataframes, but I've put the main idea here for you. You/we can adjust for more than these 4, or when the dataframes are in a list, etc...

    Data:

    dput(df1)
    structure(list(x = c(696060L, 696090L, 696120L, 696150L, 696180L, 
    696210L, 696240L, 696270L, 696300L, 696330L), y = c(-3327450L, 
    -3327450L, -3327450L, -3327450L, -3327450L, -3327450L, -3327450L, 
    -3327450L, -3327450L, -3327450L), R1 = c(66L, 71L, 69L, 65L, 
    67L, 68L, 67L, 68L, 69L, 0L)), class = "data.frame", row.names = c(NA, 
    -10L))
    dput(df2)
    structure(list(x = c(696060L, 696090L, 696120L, 696150L, 696180L, 
    696210L, 696240L, 696270L, 696300L, 696330L), y = c(-3327450L, 
    -3327450L, -3327450L, -3327450L, -3327450L, -3327450L, -3327450L, 
    -3327450L, -3327450L, -3327450L), R2 = c(66L, 71L, 69L, 65L, 
    67L, 68L, 67L, 68L, 69L, 0L)), class = "data.frame", row.names = c(NA, 
    -10L))
    dput(df3)
    structure(list(x = c(753810L, 753840L, 753870L, 753900L, 753930L, 
    753960L, 753990L, 754020L, 754050L, 754080L), y = c(-3339930L, 
    -3339930L, -3339930L, -3339930L, -3339930L, -3339930L, -3339930L, 
    -3339930L, -3339930L, -3339930L), R1 = c(109L, 108L, 108L, 109L, 
    108L, 109L, 109L, 109L, 110L, 109L)), class = "data.frame", row.names = c(NA, 
    -10L))
    dput(df4)
    structure(list(x = c(753810L, 753840L, 753870L, 753900L, 753930L, 
    753960L, 753990L, 754020L, 754050L, 754080L), y = c(-3339930L, 
    -3339930L, -3339930L, -3339930L, -3339930L, -3339930L, -3339930L, 
    -3339930L, -3339930L, -3339930L), R2 = c(109L, 108L, 108L, 109L, 
    108L, 109L, 109L, 109L, 110L, 109L)), class = "data.frame", row.names = c(NA, 
    -10L))
    

    First combine per 2 dataframes by using dplyrs left_join:

    combined1 <- left_join(df1, df2, by = c("x" = "x", "y" = "y"))
    combined1
            x        y R1 R2
    1  696060 -3327450 66 66
    2  696090 -3327450 71 71
    3  696120 -3327450 69 69
    4  696150 -3327450 65 65
    5  696180 -3327450 67 67
    6  696210 -3327450 68 68
    7  696240 -3327450 67 67
    8  696270 -3327450 68 68
    9  696300 -3327450 69 69
    10 696330 -3327450  0  0
    combined2 <- left_join(df3, df4, by = c("x" = "x", "y" = "y"))
    combined2
            x        y  R1  R2
    1  753810 -3339930 109 109
    2  753840 -3339930 108 108
    3  753870 -3339930 108 108
    4  753900 -3339930 109 109
    5  753930 -3339930 108 108
    6  753960 -3339930 109 109
    7  753990 -3339930 109 109
    8  754020 -3339930 109 109
    9  754050 -3339930 110 110
    10 754080 -3339930 109 109
    

    Then you can rbind these into the full dataframe you want:

    allCombined <- rbind(combined1, combined2)
    allCombined
            x        y  R1  R2
    1  696060 -3327450  66  66
    2  696090 -3327450  71  71
    3  696120 -3327450  69  69
    4  696150 -3327450  65  65
    5  696180 -3327450  67  67
    6  696210 -3327450  68  68
    7  696240 -3327450  67  67
    8  696270 -3327450  68  68
    9  696300 -3327450  69  69
    10 696330 -3327450   0   0
    11 753810 -3339930 109 109
    12 753840 -3339930 108 108
    13 753870 -3339930 108 108
    14 753900 -3339930 109 109
    15 753930 -3339930 108 108
    16 753960 -3339930 109 109
    17 753990 -3339930 109 109
    18 754020 -3339930 109 109
    19 754050 -3339930 110 110
    20 754080 -3339930 109 109
    

    Does this make sense?

    0 讨论(0)
  • 2021-01-29 11:37

    In dplyr you will find the inner_join or left_join functions (using the by = c() parameter to specify the join column).

    I'd recommend those as an easy way of linking data, especially if you're familiar with SQL.

    0 讨论(0)
  • 2021-01-29 11:41

    You may adapt this solution. The order you can get by adding an id column before merging.

    res <- Reduce(function(...) merge(..., all=TRUE), 
           Map(`[<-`, All, "id", value=substring(names(All), 2)))
    res[order(res$id), -3]  # order here by `id`
    #         x        y  R2  R1
    # 1  696060 -3327450  NA  66
    # 3  696090 -3327450  NA  71
    # 5  696120 -3327450  NA  69
    # 7  696150 -3327450  NA  65
    # 9  696180 -3327450  NA  67
    # 11 696210 -3327450  NA  68
    # 13 696240 -3327450  NA  67
    # 15 696270 -3327450  NA  68
    # 17 696300 -3327450  NA  69
    # 19 696330 -3327450  NA   0
    # 2  696060 -3327450  66  NA
    # 4  696090 -3327450  71  NA
    # 6  696120 -3327450  69  NA
    # 8  696150 -3327450  65  NA
    # 10 696180 -3327450  67  NA
    # 12 696210 -3327450  68  NA
    # 14 696240 -3327450  67  NA
    # 16 696270 -3327450  68  NA
    # 18 696300 -3327450  69  NA
    # 20 696330 -3327450   0  NA
    # 21 753810 -3339930  NA 109
    # 23 753840 -3339930  NA 108
    # 25 753870 -3339930  NA 108
    # 27 753900 -3339930  NA 109
    # 29 753930 -3339930  NA 108
    # 31 753960 -3339930  NA 109
    # 33 753990 -3339930  NA 109
    # 35 754020 -3339930  NA 109
    # 37 754050 -3339930  NA 110
    # 39 754080 -3339930  NA 109
    # 22 753810 -3339930 109  NA
    # 24 753840 -3339930 108  NA
    # 26 753870 -3339930 108  NA
    # 28 753900 -3339930 109  NA
    # 30 753930 -3339930 108  NA
    # 32 753960 -3339930 109  NA
    # 34 753990 -3339930 109  NA
    # 36 754020 -3339930 109  NA
    # 38 754050 -3339930 110  NA
    # 40 754080 -3339930 109  NA
    

    Data

    All <- list(structure(list(x = c(696060L, 696090L, 696120L, 696150L, 
    696180L, 696210L, 696240L, 696270L, 696300L, 696330L), y = c(-3327450L, 
    -3327450L, -3327450L, -3327450L, -3327450L, -3327450L, -3327450L, 
    -3327450L, -3327450L, -3327450L), R1 = c(66L, 71L, 69L, 65L, 
    67L, 68L, 67L, 68L, 69L, 0L)), row.names = c(NA, -10L), class = "data.frame"), 
        structure(list(x = c(696060L, 696090L, 696120L, 696150L, 
        696180L, 696210L, 696240L, 696270L, 696300L, 696330L), y = c(-3327450L, 
        -3327450L, -3327450L, -3327450L, -3327450L, -3327450L, -3327450L, 
        -3327450L, -3327450L, -3327450L), R2 = c(66L, 71L, 69L, 65L, 
        67L, 68L, 67L, 68L, 69L, 0L)), row.names = c(NA, -10L), class = "data.frame"), 
        structure(list(x = c(753810L, 753840L, 753870L, 753900L, 
        753930L, 753960L, 753990L, 754020L, 754050L, 754080L), y = c(-3339930L, 
        -3339930L, -3339930L, -3339930L, -3339930L, -3339930L, -3339930L, 
        -3339930L, -3339930L, -3339930L), R1 = c(109L, 108L, 108L, 
        109L, 108L, 109L, 109L, 109L, 110L, 109L)), row.names = c(NA, 
        -10L), class = "data.frame"), structure(list(x = c(753810L, 
        753840L, 753870L, 753900L, 753930L, 753960L, 753990L, 754020L, 
        754050L, 754080L), y = c(-3339930L, -3339930L, -3339930L, 
        -3339930L, -3339930L, -3339930L, -3339930L, -3339930L, -3339930L, 
        -3339930L), R2 = c(109L, 108L, 108L, 109L, 108L, 109L, 109L, 
        109L, 110L, 109L)), row.names = c(NA, -10L), class = "data.frame"))
    
    0 讨论(0)
提交回复
热议问题