How to join list of dataframes to one dataframe, using the DF / list indices?

library(dplyr); library(tibble)

Here is my sample data. A list of small dataframes (listOfDFs) I want to join to a single dataframe, (points).

listOfDFs

points

listOfDfs has 5 small dataframes with 7 rows total, and points is one dataframe with 7 rows:

listOfDfs

points

points <- structure(list(EVENT_ID_CNTY = c("LBY1243", "LBY3389", "LBY3393", "LBY3506", "LBY3822"), year = c(2013, 2015, 2015, 2015, 2015), COUNTRY = c("Libya", "Libya", "Libya", "Libya", "Libya")), .Names = c("EVENT_ID_CNTY", "year", "COUNTRY"), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame")) listOfDFs <- structure(list(`1` = structure(list(CELL_ID = c(165267, 164547 ), gwno = c(620L, 620L), POP = c(751.0737, 754.5745), prio_country = c("Libya", "Libya")), .Names = c("CELL_ID", "gwno", "POP", "prio_country" ), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame" )), `2` = structure(list(CELL_ID = 172475, gwno = 620L, POP = 11676, prio_country = "Libya"), .Names = c("CELL_ID", "gwno", "POP", "prio_country"), row.names = c(NA, -1L), class = c("tbl_df", "tbl", "data.frame")), `3` = structure(list(CELL_ID = 172475, gwno = 620L, POP = 11676, prio_country = "Libya"), .Names = c("CELL_ID", "gwno", "POP", "prio_country"), row.names = c(NA, -1L), class = c("tbl_df", "tbl", "data.frame")), `4` = structure(list(CELL_ID = 172475, gwno = 620L, POP = 11676, prio_country = "Libya"), .Names = c("CELL_ID", "gwno", "POP", "prio_country"), row.names = c(NA, -1L), class = c("tbl_df", "tbl", "data.frame")), `5` = structure(list(CELL_ID = c(165267, 164547), gwno = c(620L, 620L), POP = c(751.0737, 754.5745), prio_country = c("Libya", "Libya")), .Names = c("CELL_ID", "gwno", "POP", "prio_country" ), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame" ))), .Names = c("1", "2", "3", "4", "5"))

These data look like the following:

points #> # A tibble: 5 x 3 #> EVENT_ID_CNTY year COUNTRY #> <chr> <dbl> <chr> #> 1 LBY1243 2013. Libya # Corresponds to the two items in listOfDFs[[1]] #> 2 LBY3389 2015. Libya # Corresponds to the one item in listOfDFs[[2]] #> 3 LBY3393 2015. Libya #> 4 LBY3506 2015. Libya #> 5 LBY3822 2015. Libya listOfDFs #> $`1` #> # A tibble: 2 x 4 #> CELL_ID gwno POP prio_country #> <dbl> <int> <dbl> <chr> #> 1 165267. 620 751. Libya #> 2 164547. 620 755. Libya #> #> $`2` #> # A tibble: 1 x 4 #> CELL_ID gwno POP prio_country #> <dbl> <int> <dbl> <chr> #> 1 172475. 620 11676. Libya #> #> $`3` #> # A tibble: 1 x 4 #> CELL_ID gwno POP prio_country #> <dbl> <int> <dbl> <chr> #> 1 172475. 620 11676. Libya #> #> $`4` #> # A tibble: 1 x 4 #> CELL_ID gwno POP prio_country #> <dbl> <int> <dbl> <chr> #> 1 172475. 620 11676. Libya #> #> $`5` #> # A tibble: 2 x 4 #> CELL_ID gwno POP prio_country #> <dbl> <int> <dbl> <chr> #> 1 165267. 620 751. Libya #> 2 164547. 620 755. Libya

How do I join these two such that they follow the pattern below? Do I have to find a way to bind them on the dataframe's row index or is there a more elegant way?:

#> EVENT_ID_CNTY year COUNTRY CELL_ID gwno POP prio_country #> <chr> <dbl> <chr> <dbl> <int> <dbl> <chr> #> LBY1243 2013. Libya 165267. 620 751. Libya #> LBY1243 2013. Libya 164547. 620 755. Libya #> LBY3389 2015. Libya 172475. 620 11676. Libya #> LBY3393 2015. Libya 172475. 620 11676. Libya #> LBY3506 2015. Libya 172475. 620 11676. Libya #> LBY3822 2015. Libya 165267. 620 751. Libya #> LBY3822 2015. Libya 164547. 620 755. Libya .........

3 Answers
3

library(tidyr) points %>% mutate(mm=listOfDFs) %>% unnest # A tibble: 7 x 7 EVENT_ID_CNTY year COUNTRY CELL_ID gwno POP prio_country <chr> <dbl> <chr> <dbl> <int> <dbl> <chr> 1 LBY1243 2013 Libya 165267 620 751. Libya 2 LBY1243 2013 Libya 164547 620 755. Libya 3 LBY3389 2015 Libya 172475 620 11676 Libya 4 LBY3393 2015 Libya 172475 620 11676 Libya 5 LBY3506 2015 Libya 172475 620 11676 Libya 6 LBY3822 2015 Libya 165267 620 751. Libya 7 LBY3822 2015 Libya 164547 620 755. Libya

Based on the example, we replicate the rows of the 'points' dataset with the nrow of each element of the list and bind the columns together

list

library(tidyverse) bind_cols(points[rep(seq_len(nrow(points)), map_dbl(listOfDFs, nrow)),], bind_rows(listOfDFs)) # A tibble: 7 x 7 #EVENT_ID_CNTY year COUNTRY CELL_ID gwno POP prio_country # <chr> <dbl> <chr> <dbl> <int> <dbl> <chr> #1 LBY1243 2013 Libya 165267 620 751. Libya #2 LBY1243 2013 Libya 164547 620 755. Libya #3 LBY3389 2015 Libya 172475 620 11676 Libya #4 LBY3393 2015 Libya 172475 620 11676 Libya #5 LBY3506 2015 Libya 172475 620 11676 Libya #6 LBY3822 2015 Libya 165267 620 751. Libya #7 LBY3822 2015 Libya 164547 620 755. Libya

Or using in a chain

map_dbl(listOfDFs, nrow) %>% rep(seq_len(nrow(points)), .) %>% magrittr::extract(points, ., ) %>% bind_cols(bind_rows(listOfDFs))

Or set the names of the list with 'EVENT_ID_CNTY' to create an id column and join with the 'points'

list

listOfDFs %>% set_names(points$EVENT_ID_CNTY) %>% bind_rows(.id = 'EVENT_ID_CNTY') %>% left_join(points)

An option using dplyr::bind_rows and dplyr::inner_join can be as:

dplyr::bind_rows

dplyr::inner_join

library(dplyr) mutate(points, rn = as.character(row_number())) %>% inner_join(bind_rows(listOfDFs, .id="Name"), by=c("rn" = "Name")) %>% select(-rn) # # A tibble: 7 x 7 # EVENT_ID_CNTY year COUNTRY CELL_ID gwno POP prio_country # <chr> <dbl> <chr> <dbl> <int> <dbl> <chr> # 1 LBY1243 2013 Libya 165267 620 751 Libya # 2 LBY1243 2013 Libya 164547 620 755 Libya # 3 LBY3389 2015 Libya 172475 620 11676 Libya # 4 LBY3393 2015 Libya 172475 620 11676 Libya # 5 LBY3506 2015 Libya 172475 620 11676 Libya # 6 LBY3822 2015 Libya 165267 620 751 Libya # 7 LBY3822 2015 Libya 164547 620 755 Libya

By clicking "Post Your Answer", you acknowledge that you have read our updated terms of service, privacy policy and cookie policy, and that your continued use of the website is subject to these policies.

Search This Blog

Mgiyuk