join.tbl_sql {dbplyr} | R Documentation |
See join for a description of the general purpose of the functions.
## S3 method for class 'tbl_lazy' inner_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), auto_index = FALSE, ...) ## S3 method for class 'tbl_lazy' left_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), auto_index = FALSE, ...) ## S3 method for class 'tbl_lazy' right_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), auto_index = FALSE, ...) ## S3 method for class 'tbl_lazy' full_join(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), auto_index = FALSE, ...) ## S3 method for class 'tbl_lazy' semi_join(x, y, by = NULL, copy = FALSE, auto_index = FALSE, ...) ## S3 method for class 'tbl_lazy' anti_join(x, y, by = NULL, copy = FALSE, auto_index = FALSE, ...)
x |
tbls to join |
y |
tbls to join |
by |
a character vector of variables to join by. If To join by different variables on x and y use a named vector.
For example, |
copy |
If This allows you to join tables across srcs, but it's potentially expensive operation so you must opt into it. |
suffix |
If there are non-joined duplicate variables in |
auto_index |
if |
... |
other parameters passed onto methods |
Semi-joins are implemented using WHERE EXISTS
, and anti-joins with
WHERE NOT EXISTS
. Support for semi-joins is somewhat partial: you
can only create semi joins where the x
and y
columns are
compared with =
not with more general operators.
## Not run: library(dplyr) if (has_lahman("sqlite")) { # Left joins ---------------------------------------------------------------- lahman_s <- lahman_sqlite() batting <- tbl(lahman_s, "Batting") team_info <- select(tbl(lahman_s, "Teams"), yearID, lgID, teamID, G, R:H) # Combine player and whole team statistics first_stint <- select(filter(batting, stint == 1), playerID:H) both <- left_join(first_stint, team_info, type = "inner", by = c("yearID", "teamID", "lgID")) head(both) explain(both) # Join with a local data frame grid <- expand.grid( teamID = c("WAS", "ATL", "PHI", "NYA"), yearID = 2010:2012) top4a <- left_join(batting, grid, copy = TRUE) explain(top4a) # Indices don't really help here because there's no matching index on # batting top4b <- left_join(batting, grid, copy = TRUE, auto_index = TRUE) explain(top4b) # Semi-joins ---------------------------------------------------------------- people <- tbl(lahman_s, "Master") # All people in half of fame hof <- tbl(lahman_s, "HallOfFame") semi_join(people, hof) # All people not in the hall of fame anti_join(people, hof) # Find all managers manager <- tbl(lahman_s, "Managers") semi_join(people, manager) # Find all managers in hall of fame famous_manager <- semi_join(semi_join(people, manager), hof) famous_manager explain(famous_manager) # Anti-joins ---------------------------------------------------------------- # batters without person covariates anti_join(batting, people) } ## End(Not run)