How to join list of dataframes to one dataframe, using the DF / list indices?
How to join list of dataframes to one dataframe, using the DF / list indices?
library(dplyr); library(tibble)
Here is my sample data. A list of small dataframes (listOfDFs
) I want to join to a single dataframe, (points
).
listOfDFs
points
listOfDfs
has 5 small dataframes with 7 rows total, and points
is one dataframe with 7 rows:
listOfDfs
points
points <- structure(list(EVENT_ID_CNTY = c("LBY1243", "LBY3389", "LBY3393",
"LBY3506", "LBY3822"), year = c(2013, 2015, 2015, 2015, 2015),
COUNTRY = c("Libya", "Libya", "Libya", "Libya", "Libya")), .Names = c("EVENT_ID_CNTY",
"year", "COUNTRY"), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"))
listOfDFs <- structure(list(`1` = structure(list(CELL_ID = c(165267, 164547
), gwno = c(620L, 620L), POP = c(751.0737, 754.5745), prio_country = c("Libya",
"Libya")), .Names = c("CELL_ID", "gwno", "POP", "prio_country"
), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
)), `2` = structure(list(CELL_ID = 172475, gwno = 620L, POP = 11676,
prio_country = "Libya"), .Names = c("CELL_ID", "gwno", "POP",
"prio_country"), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), `3` = structure(list(CELL_ID = 172475,
gwno = 620L, POP = 11676, prio_country = "Libya"), .Names = c("CELL_ID",
"gwno", "POP", "prio_country"), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), `4` = structure(list(CELL_ID = 172475,
gwno = 620L, POP = 11676, prio_country = "Libya"), .Names = c("CELL_ID",
"gwno", "POP", "prio_country"), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame")), `5` = structure(list(CELL_ID = c(165267,
164547), gwno = c(620L, 620L), POP = c(751.0737, 754.5745), prio_country = c("Libya",
"Libya")), .Names = c("CELL_ID", "gwno", "POP", "prio_country"
), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
))), .Names = c("1", "2", "3", "4", "5"))
These data look like the following:
points
#> # A tibble: 5 x 3
#> EVENT_ID_CNTY year COUNTRY
#> <chr> <dbl> <chr>
#> 1 LBY1243 2013. Libya # Corresponds to the two items in listOfDFs[[1]]
#> 2 LBY3389 2015. Libya # Corresponds to the one item in listOfDFs[[2]]
#> 3 LBY3393 2015. Libya
#> 4 LBY3506 2015. Libya
#> 5 LBY3822 2015. Libya
listOfDFs
#> $`1`
#> # A tibble: 2 x 4
#> CELL_ID gwno POP prio_country
#> <dbl> <int> <dbl> <chr>
#> 1 165267. 620 751. Libya
#> 2 164547. 620 755. Libya
#>
#> $`2`
#> # A tibble: 1 x 4
#> CELL_ID gwno POP prio_country
#> <dbl> <int> <dbl> <chr>
#> 1 172475. 620 11676. Libya
#>
#> $`3`
#> # A tibble: 1 x 4
#> CELL_ID gwno POP prio_country
#> <dbl> <int> <dbl> <chr>
#> 1 172475. 620 11676. Libya
#>
#> $`4`
#> # A tibble: 1 x 4
#> CELL_ID gwno POP prio_country
#> <dbl> <int> <dbl> <chr>
#> 1 172475. 620 11676. Libya
#>
#> $`5`
#> # A tibble: 2 x 4
#> CELL_ID gwno POP prio_country
#> <dbl> <int> <dbl> <chr>
#> 1 165267. 620 751. Libya
#> 2 164547. 620 755. Libya
How do I join these two such that they follow the pattern below? Do I have to find a way to bind them on the dataframe's row index or is there a more elegant way?:
#> EVENT_ID_CNTY year COUNTRY CELL_ID gwno POP prio_country
#> <chr> <dbl> <chr> <dbl> <int> <dbl> <chr>
#> LBY1243 2013. Libya 165267. 620 751. Libya
#> LBY1243 2013. Libya 164547. 620 755. Libya
#> LBY3389 2015. Libya 172475. 620 11676. Libya
#> LBY3393 2015. Libya 172475. 620 11676. Libya
#> LBY3506 2015. Libya 172475. 620 11676. Libya
#> LBY3822 2015. Libya 165267. 620 751. Libya
#> LBY3822 2015. Libya 164547. 620 755. Libya
.........
3 Answers
3
library(tidyr)
points %>% mutate(mm=listOfDFs) %>% unnest
# A tibble: 7 x 7
EVENT_ID_CNTY year COUNTRY CELL_ID gwno POP prio_country
<chr> <dbl> <chr> <dbl> <int> <dbl> <chr>
1 LBY1243 2013 Libya 165267 620 751. Libya
2 LBY1243 2013 Libya 164547 620 755. Libya
3 LBY3389 2015 Libya 172475 620 11676 Libya
4 LBY3393 2015 Libya 172475 620 11676 Libya
5 LBY3506 2015 Libya 172475 620 11676 Libya
6 LBY3822 2015 Libya 165267 620 751. Libya
7 LBY3822 2015 Libya 164547 620 755. Libya
Based on the example, we replicate the rows of the 'points' dataset with the nrow of each element of the list
and bind the columns together
list
library(tidyverse)
bind_cols(points[rep(seq_len(nrow(points)),
map_dbl(listOfDFs, nrow)),], bind_rows(listOfDFs))
# A tibble: 7 x 7
#EVENT_ID_CNTY year COUNTRY CELL_ID gwno POP prio_country
# <chr> <dbl> <chr> <dbl> <int> <dbl> <chr>
#1 LBY1243 2013 Libya 165267 620 751. Libya
#2 LBY1243 2013 Libya 164547 620 755. Libya
#3 LBY3389 2015 Libya 172475 620 11676 Libya
#4 LBY3393 2015 Libya 172475 620 11676 Libya
#5 LBY3506 2015 Libya 172475 620 11676 Libya
#6 LBY3822 2015 Libya 165267 620 751. Libya
#7 LBY3822 2015 Libya 164547 620 755. Libya
Or using in a chain
map_dbl(listOfDFs, nrow) %>%
rep(seq_len(nrow(points)), .) %>%
magrittr::extract(points, ., ) %>%
bind_cols(bind_rows(listOfDFs))
Or set the names of the list
with 'EVENT_ID_CNTY' to create an id column and join with the 'points'
list
listOfDFs %>%
set_names(points$EVENT_ID_CNTY) %>%
bind_rows(.id = 'EVENT_ID_CNTY') %>%
left_join(points)
An option using dplyr::bind_rows
and dplyr::inner_join
can be as:
dplyr::bind_rows
dplyr::inner_join
library(dplyr)
mutate(points, rn = as.character(row_number())) %>%
inner_join(bind_rows(listOfDFs, .id="Name"), by=c("rn" = "Name")) %>%
select(-rn)
# # A tibble: 7 x 7
# EVENT_ID_CNTY year COUNTRY CELL_ID gwno POP prio_country
# <chr> <dbl> <chr> <dbl> <int> <dbl> <chr>
# 1 LBY1243 2013 Libya 165267 620 751 Libya
# 2 LBY1243 2013 Libya 164547 620 755 Libya
# 3 LBY3389 2015 Libya 172475 620 11676 Libya
# 4 LBY3393 2015 Libya 172475 620 11676 Libya
# 5 LBY3506 2015 Libya 172475 620 11676 Libya
# 6 LBY3822 2015 Libya 165267 620 751 Libya
# 7 LBY3822 2015 Libya 164547 620 755 Libya
By clicking "Post Your Answer", you acknowledge that you have read our updated terms of service, privacy policy and cookie policy, and that your continued use of the website is subject to these policies.
Comments
Post a Comment