Skip to content
Merged
1 change: 1 addition & 0 deletions r/.lintr
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,6 @@ linters: linters_with_defaults(
)
exclusions: list(
"R/arrowExports.R",
"R/dplyr-funcs-doc.R",
"data-raw/codegen.R"
)
1 change: 1 addition & 0 deletions r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ Collate:
'dplyr-join.R'
'dplyr-mutate.R'
'dplyr-select.R'
'dplyr-slice.R'
'dplyr-summarize.R'
'dplyr-union.R'
'record-batch.R'
Expand Down
3 changes: 3 additions & 0 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,8 @@ importFrom(rlang,as_quosure)
importFrom(rlang,call2)
importFrom(rlang,call_args)
importFrom(rlang,caller_env)
importFrom(rlang,check_dots_empty)
importFrom(rlang,dots_list)
importFrom(rlang,dots_n)
importFrom(rlang,enexpr)
importFrom(rlang,enexprs)
Expand Down Expand Up @@ -472,6 +474,7 @@ importFrom(stats,na.fail)
importFrom(stats,na.omit)
importFrom(stats,na.pass)
importFrom(stats,quantile)
importFrom(stats,runif)
importFrom(tidyselect,all_of)
importFrom(tidyselect,contains)
importFrom(tidyselect,ends_with)
Expand Down
4 changes: 2 additions & 2 deletions r/R/array.R
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,7 @@ stop_cant_convert_array <- function(x, type) {
"Can't create Array from object of type %s",
paste(class(x), collapse = " / ")
),
call = rlang::caller_env()
call = caller_env()
)
} else {
abort(
Expand All @@ -358,7 +358,7 @@ stop_cant_convert_array <- function(x, type) {
format(type$code()),
paste(class(x), collapse = " / ")
),
call = rlang::caller_env()
call = caller_env()
)
}
}
Expand Down
6 changes: 6 additions & 0 deletions r/R/arrow-datum.R
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,9 @@ head.ArrowDatum <- function(x, n = 6L, ...) {
} else {
n <- min(len, n)
}
if (!is.integer(n)) {
n <- floor(n)
}
if (n == len) {
return(x)
}
Expand All @@ -310,6 +313,9 @@ head.ArrowDatum <- function(x, n = 6L, ...) {
tail.ArrowDatum <- function(x, n = 6L, ...) {
assert_is(n, c("numeric", "integer"))
assert_that(length(n) == 1)
if (!is.integer(n)) {
n <- floor(n)
}
len <- NROW(x)
if (n < 0) {
# tail(x, negative) means all but the first n rows
Expand Down
27 changes: 26 additions & 1 deletion r/R/arrow-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
#' @importFrom rlang expr caller_env is_character quo_name is_quosure enexpr enexprs as_quosure
#' @importFrom rlang is_list call2 is_empty as_function as_label arg_match is_symbol is_call call_args
#' @importFrom rlang quo_set_env quo_get_env is_formula quo_is_call f_rhs parse_expr f_env new_quosure
#' @importFrom rlang new_quosures expr_text
#' @importFrom rlang new_quosures expr_text caller_env check_dots_empty dots_list
#' @importFrom tidyselect vars_pull vars_rename vars_select eval_select
#' @importFrom glue glue
#' @useDynLib arrow, .registration = TRUE
Expand Down Expand Up @@ -64,6 +64,31 @@ supported_dplyr_methods <- list(
rename_with = NULL,
union = NULL,
union_all = NULL,
slice_head = c(
"slicing within groups not supported;",
"Arrow datasets do not have row order, so head is non-deterministic;",
"`prop` only supported on queries where `nrow()` is knowable without evaluating"
),
slice_tail = c(
"slicing within groups not supported;",
"Arrow datasets do not have row order, so tail is non-deterministic;",
"`prop` only supported on queries where `nrow()` is knowable without evaluating"
),
slice_min = c(
"slicing within groups not supported;",
"`with_ties = TRUE` (dplyr default) is not supported;",
"`prop` only supported on queries where `nrow()` is knowable without evaluating"
),
slice_max = c(
"slicing within groups not supported;",
"`with_ties = TRUE` (dplyr default) is not supported;",
"`prop` only supported on queries where `nrow()` is knowable without evaluating"
),
slice_sample = c(
"slicing within groups not supported;",
"`replace = TRUE` and the `weight_by` argument not supported;",
"`n` only supported on queries where `nrow()` is knowable without evaluating"
),
glimpse = NULL,
show_query = NULL,
explain = NULL
Expand Down
16 changes: 13 additions & 3 deletions r/R/dataset-scan.R
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,14 @@ names.Scanner <- function(x) names(x$schema)

#' @export
head.Scanner <- function(x, n = 6L, ...) {
assert_is(n, c("numeric", "integer"))
assert_that(length(n) == 1)
# Negative n requires knowing nrow(x), which requires a scan itself
assert_that(n >= 0)
dataset___Scanner__head(x, n)
if (!is.integer(n)) {
n <- floor(n)
}
dataset___Scanner__head(x, floor(n))
}

#' @export
Expand All @@ -168,8 +173,13 @@ tail.Scanner <- function(x, n = 6L, ...) {
}

tail_from_batches <- function(batches, n) {
assert_is(n, c("numeric", "integer"))
assert_that(length(n) == 1)
# Negative n requires knowing nrow(x), which requires a scan itself
assert_that(n >= 0) # For now
assert_that(n >= 0)
if (!is.integer(n)) {
n <- floor(n)
}
result <- list()
batch_num <- 0
# Given a list of batches, iterate from the back
Expand Down Expand Up @@ -224,7 +234,7 @@ map_batches <- function(X, FUN, ..., .schema = NULL, .lazy = FALSE, .data.frame
}
FUN <- as_mapper(FUN)
reader <- as_record_batch_reader(X)
dots <- rlang::list2(...)
dots <- list2(...)

# If no schema is supplied, we have to evaluate the first batch here
if (is.null(.schema)) {
Expand Down
17 changes: 11 additions & 6 deletions r/R/dplyr-funcs-doc.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@

#' Functions available in Arrow dplyr queries
#'
#' The `arrow` package contains methods for 32 `dplyr` table functions, many of
#' The `arrow` package contains methods for 37 `dplyr` table functions, many of
#' which are "verbs" that do transformations to one or more tables.
#' The package also has mappings of 205 R functions to the corresponding
#' The package also has mappings of 207 R functions to the corresponding
#' functions in the Arrow compute library. These allow you to write code inside
#' of `dplyr` methods that call R functions, including many in packages like
#' `stringr` and `lubridate`, and they will get translated to Arrow and run
Expand Down Expand Up @@ -62,6 +62,11 @@
#' * [`select()`][dplyr::select()]
#' * [`semi_join()`][dplyr::semi_join()]
#' * [`show_query()`][dplyr::show_query()]
#' * [`slice_head()`][dplyr::slice_head()]: slicing within groups not supported; Arrow datasets do not have row order, so head is non-deterministic; `prop` only supported on queries where `nrow()` is knowable without evaluating
#' * [`slice_max()`][dplyr::slice_max()]: slicing within groups not supported; `with_ties = TRUE` (dplyr default) is not supported; `prop` only supported on queries where `nrow()` is knowable without evaluating
#' * [`slice_min()`][dplyr::slice_min()]: slicing within groups not supported; `with_ties = TRUE` (dplyr default) is not supported; `prop` only supported on queries where `nrow()` is knowable without evaluating
#' * [`slice_sample()`][dplyr::slice_sample()]: slicing within groups not supported; `replace = TRUE` and the `weight_by` argument not supported; `n` only supported on queries where `nrow()` is knowable without evaluating
#' * [`slice_tail()`][dplyr::slice_tail()]: slicing within groups not supported; Arrow datasets do not have row order, so tail is non-deterministic; `prop` only supported on queries where `nrow()` is knowable without evaluating
#' * [`summarise()`][dplyr::summarise()]
#' * [`tally()`][dplyr::tally()]
#' * [`transmute()`][dplyr::transmute()]
Expand All @@ -78,7 +83,7 @@
#' Functions can be called either as `pkg::fun()` or just `fun()`, i.e. both
#' `str_sub()` and `stringr::str_sub()` work.
#'
#' In addition to these functions, you can call any of Arrow's 243 compute
#' In addition to these functions, you can call any of Arrow's 244 compute
#' functions directly. Arrow has many functions that don't map to an existing R
#' function. In other cases where there is an R function mapping, you can still
#' call the Arrow function directly if you don't want the adaptations that the R
Expand Down Expand Up @@ -185,13 +190,13 @@
#'
#' ## dplyr
#'
#' * [`across()`][dplyr::across()]: not yet supported inside `filter()`;
#' purrr-style lambda functions
#' and use of `where()` selection helper not yet supported
#' * [`across()`][dplyr::across()]: Use of `where()` selection helper not yet supported
#' * [`between()`][dplyr::between()]
#' * [`case_when()`][dplyr::case_when()]
#' * [`coalesce()`][dplyr::coalesce()]
#' * [`desc()`][dplyr::desc()]
#' * [`if_all()`][dplyr::if_all()]
#' * [`if_any()`][dplyr::if_any()]
#' * [`if_else()`][dplyr::if_else()]
#' * [`n()`][dplyr::n()]
#' * [`n_distinct()`][dplyr::n_distinct()]
Expand Down
4 changes: 2 additions & 2 deletions r/R/dplyr-funcs-type.R
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ register_bindings_type_cast <- function() {
# it is difficult to replicate the .name_repair semantics and expanding of
# unnamed data frame arguments in the same way that the tibble() constructor
# does.
args <- rlang::dots_list(..., .named = TRUE, .homonyms = "error")
args <- dots_list(..., .named = TRUE, .homonyms = "error")

build_expr(
"make_struct",
Expand All @@ -151,7 +151,7 @@ register_bindings_type_cast <- function() {
if (!is.null(row.names)) arrow_not_supported("row.names")
if (!is.null(check.rows)) arrow_not_supported("check.rows")

args <- rlang::dots_list(..., .named = fix.empty.names)
args <- dots_list(..., .named = fix.empty.names)
if (is.null(names(args))) {
names(args) <- rep("", length(args))
}
Expand Down
14 changes: 13 additions & 1 deletion r/R/dplyr-funcs.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,9 @@ call_binding_agg <- function(fun_name, ...) {
agg_funcs[[fun_name]](...)
}

# Called in .onLoad()
#' @importFrom stats runif
create_binding_cache <- function() {
# Called in .onLoad()
.cache$docs <- list()

# Register all available Arrow Compute functions, namespaced as arrow_fun.
Expand All @@ -160,6 +161,17 @@ create_binding_cache <- function() {
register_bindings_type()
register_bindings_augmented()

# HACK because random() doesn't work (ARROW-17974)
register_scalar_function(
"_random_along",
function(context, x) {
Array$create(runif(length(x)))
},
in_type = schema(x = boolean()),
out_type = float64(),
auto_convert = FALSE
)

# We only create the cache for nse_funcs and not agg_funcs
.cache$functions <- c(as.list(nse_funcs), arrow_funcs)
}
Expand Down
Loading