From 10a2b621f7d52ccd9c88793c66cab4336f3009d7 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 28 Nov 2019 23:52:05 +0800 Subject: [PATCH 1/3] remove set2key,v, key2 --- NAMESPACE | 1 - NEWS.md | 2 ++ R/setkey.R | 5 ----- inst/tests/tests.Rraw | 4 +--- man/set2key.Rd | 22 ------------------- ...le-secondary-indices-and-auto-indexing.Rmd | 4 +++- 6 files changed, 6 insertions(+), 32 deletions(-) delete mode 100644 man/set2key.Rd diff --git a/NAMESPACE b/NAMESPACE index 49bd3a35d6..7689afe383 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,7 +7,6 @@ exportClasses(data.table, IDate, ITime) export(data.table, tables, setkey, setkeyv, key, "key<-", haskey, CJ, SJ, copy) export(setindex, setindexv, indices) -export(set2key, set2keyv, key2) # deprecated with helpful error; remove after May 2019 (see #3399) export(as.data.table,is.data.table,test.data.table) export(last,first,like,"%like%","%ilike%","%flike%",between,"%between%",inrange,"%inrange%") export(timetaken) diff --git a/NEWS.md b/NEWS.md index 4a0033fcfa..4d8dd9e9fb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -18,6 +18,8 @@ 3. C internals have been standardized to use `PRI[u|d]64` to print `[u]int64_t`. This solves new warnings from `gcc-8` on Windows with `%lld`, [#4062](https://github.com/Rdatatable/data.table/issues/4062), in many cases already working around `snprintf` on Windows not supporting `%zu`. Release procedures have been augmented to prevent any internal use of `llu`, `lld`, `zu` or `zd`. +4. `set2key`, `set2keyv`, and `key2` have been completely removed. Reminder that note 11 in v1.11.0 (May 2018) warned that `set2key()` and `key2()` will be removed in May 2019. They have been warning since v1.9.8 (Nov 2016) and their warnings were upgraded to errors in v1.11.0 (May 2018). When they were introduced in version 1.9.4 (Oct 2014) they were marked as 'experimental'. + # data.table [v1.12.6](https://github.com/Rdatatable/data.table/milestone/18?closed=1) (18 Oct 2019) diff --git a/R/setkey.R b/R/setkey.R index 42a4355d5c..5caefcf68f 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -18,11 +18,6 @@ setindexv = function(x, cols, verbose=getOption("datatable.verbose")) { } } -# remove these 3 after May 2019; see discussion in #3399 and notes in v1.12.2. They were marked experimental after all. -set2key = function(...) stop("set2key() is now deprecated. Please use setindex() instead.") -set2keyv = function(...) stop("set2keyv() is now deprecated. Please use setindexv() instead.") -key2 = function(...) stop("key2() is now deprecated. Please use indices() instead.") - # upgrade to error after Mar 2020. Has already been warning since 2012, and stronger warning in Mar 2019 (note in news for 1.12.2); #3399 "key<-" = function(x,value) { warning("key(x)<-value is deprecated and not supported. Please change to use setkey() with perhaps copy(). Has been warning since 2012 and will be an error in future.") diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f10fe26ce1..1157907be5 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -12454,9 +12454,7 @@ test(1897.2, attributes(attr(DT, 'index')), list(`__a` = c(3L, 2L, 4L, 1L, 5L), `__a__b` = c(3L, 4L, 2L, 1L, 5L))) -test(1898.1, set2key(DT, a), error="deprecated. Please use setindex() instead.") -test(1898.2, set2keyv(DT, "a"), error="deprecated. Please use setindexv() instead.") -test(1898.3, key2(DT), error="deprecated. Please use indices() instead.") +# tests 1898.{1,2,3} for set2key etc. deprecation were removed along with those functions # Allow column to be used as rownames when converting to matrix #2702 DT = data.table(id = letters[1:4], X = 1:4, Y = 5:8) diff --git a/man/set2key.Rd b/man/set2key.Rd deleted file mode 100644 index 4acf847696..0000000000 --- a/man/set2key.Rd +++ /dev/null @@ -1,22 +0,0 @@ -\name{set2key} -\alias{set2key} -\alias{set2keyv} -\alias{key2} -\alias{key<-} -\title{ Deprecated. } -\keyword{internal} -\description{ - These functions are deprecated. They will be removed in future. Please use the functions in \code{\link{setkey}}. -} -\usage{ -set2key(...) # DEPRECATED; helpful error since May 2018 and warning since Nov 2016 -set2keyv(...) # DEPRECATED; helpful error since May 2018 and warning since Nov 2016 -key2(...) # DEPRECATED; helpful error since May 2018 and warning since Nov 2016 -key(x) <- value # warning since 2012; DEPRECATED since Mar 2019 -} -\arguments{ -\item{\dots}{ Deprecated. } -\item{x}{ Deprecated. } -\item{value}{ Deprecated. } -} - diff --git a/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd b/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd index 64bc04877f..869cfea54b 100644 --- a/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd +++ b/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd @@ -73,13 +73,15 @@ names(attributes(flights)) ``` * `setindex` and `setindexv()` allows adding a secondary index to the data.table. -* Originally it was `set2key` until data.table 1.9.6, then [changed to current names](https://github.com/Rdatatable/data.table/issues/1442). + * Note that `flights` is **not** physically reordered in increasing order of `origin`, as would have been the case with `setkey()`. * Also note that the attribute `index` has been added to `flights`. * `setindex(flights, NULL)` would remove all secondary indices. +* Historical note: `setindex` was originally called `set2key` through `data.table` 1.9.6,. + #### -- How can we get all the secondary indices set so far in `flights`? ```{r} From 16ee8a6dae8c3b68ecee843cebcabf7a924a08e6 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 29 Nov 2019 04:17:57 +0800 Subject: [PATCH 2/3] restore man page which also had key<- (not removed) --- man/deprecated.Rd | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 man/deprecated.Rd diff --git a/man/deprecated.Rd b/man/deprecated.Rd new file mode 100644 index 0000000000..c1bb9afc16 --- /dev/null +++ b/man/deprecated.Rd @@ -0,0 +1,13 @@ +\name{key<-} +\alias{key<-} +\title{ Deprecated. } +\keyword{internal} +\description{ + This function is deprecated. It will be removed in future. Please use \code{\link{setkey}}. +} +\usage{ +key(x) <- value # warning since 2012; DEPRECATED since Mar 2019 +} +\arguments{ +\item{x}{ Deprecated. } +} From 5e63fee0ea829cca1473813443f0701ebbe07491 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 17 Dec 2019 15:13:24 -0800 Subject: [PATCH 3/3] news item tweak, and removed historical reference to set2key in vignette --- NEWS.md | 2 +- ...le-secondary-indices-and-auto-indexing.Rmd | 30 +++++++++---------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/NEWS.md b/NEWS.md index 1837cb6aa5..b3bb563e99 100644 --- a/NEWS.md +++ b/NEWS.md @@ -18,7 +18,7 @@ 1. `as.IDate`, `as.ITime`, `second`, `minute`, and `hour` now recognize UTC equivalents for speed: GMT, GMT-0, GMT+0, GMT0, Etc/GMT, and Etc/UTC, [#4116](https://github.com/Rdatatable/data.table/issues/4116). -2. `set2key`, `set2keyv`, and `key2` have been removed, as they have been warning since v1.9.8 (Nov 2016) and failing with error since v1.11.0 (May 2018). When they were introduced in version 1.9.4 (Oct 2014) they were marked as 'experimental' and quickly superceded by `setindex` and `indices`. +2. `set2key`, `set2keyv`, and `key2` have been removed, as they have been warning since v1.9.8 (Nov 2016) and halting with helpful message since v1.11.0 (May 2018). When they were introduced in version 1.9.4 (Oct 2014) they were marked as 'experimental' and quickly superceded by `setindex` and `indices`. # data.table [v1.12.8](https://github.com/Rdatatable/data.table/milestone/15?closed=1) (09 Dec 2019) diff --git a/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd b/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd index 869cfea54b..ef506605c3 100644 --- a/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd +++ b/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd @@ -1,7 +1,7 @@ --- title: "Secondary indices and auto indexing" date: "`r Sys.Date()`" -output: +output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Secondary indices and auto indexing} @@ -76,12 +76,10 @@ names(attributes(flights)) * Note that `flights` is **not** physically reordered in increasing order of `origin`, as would have been the case with `setkey()`. -* Also note that the attribute `index` has been added to `flights`. +* Also note that the attribute `index` has been added to `flights`. * `setindex(flights, NULL)` would remove all secondary indices. -* Historical note: `setindex` was originally called `set2key` through `data.table` 1.9.6,. - #### -- How can we get all the secondary indices set so far in `flights`? ```{r} @@ -113,15 +111,15 @@ a) computing the order vector for the column(s) provided, here, `origin`, and b) reordering the entire data.table, by reference, based on the order vector computed. -# +# -Computing the order isn't the time consuming part, since data.table uses true radix sorting on integer, character and numeric vectors. However reordering the data.table could be time consuming (depending on the number of rows and columns). +Computing the order isn't the time consuming part, since data.table uses true radix sorting on integer, character and numeric vectors. However reordering the data.table could be time consuming (depending on the number of rows and columns). Unless our task involves repeated subsetting on the same column, fast key based subsetting could effectively be nullified by the time to reorder, depending on our data.table dimensions. #### -- There can be only one `key` at the most -Now if we would like to repeat the same operation but on `dest` column instead, for the value "LAX", then we have to `setkey()`, *again*. +Now if we would like to repeat the same operation but on `dest` column instead, for the value "LAX", then we have to `setkey()`, *again*. ```{r, eval = FALSE} ## not run @@ -129,7 +127,7 @@ setkey(flights, dest) flights["LAX"] ``` -And this reorders `flights` by `dest`, *again*. What we would really like is to be able to perform the fast subsetting by eliminating the reordering step. +And this reorders `flights` by `dest`, *again*. What we would really like is to be able to perform the fast subsetting by eliminating the reordering step. And this is precisely what *secondary indices* allow for! @@ -147,11 +145,11 @@ As we will see in the next section, the `on` argument provides several advantage * allows easy reuse of existing indices by just checking the attributes. -* allows for a cleaner syntax by having the columns on which the subset is performed as part of the syntax. This makes the code easier to follow when looking at it at a later point. +* allows for a cleaner syntax by having the columns on which the subset is performed as part of the syntax. This makes the code easier to follow when looking at it at a later point. Note that `on` argument can also be used on keyed subsets as well. In fact, we encourage to provide the `on` argument even when subsetting using keys for better readability. -# +# ## 2. Fast subsetting using `on` argument and secondary indices @@ -163,7 +161,7 @@ As we will see in the next section, the `on` argument provides several advantage flights["JFK", on = "origin"] ## alternatively -# flights[.("JFK"), on = "origin"] (or) +# flights[.("JFK"), on = "origin"] (or) # flights[list("JFK"), on = "origin"] ``` @@ -278,9 +276,9 @@ flights[.(c("LGA", "JFK", "EWR"), "XNA"), mult = "last", on = c("origin", "dest" ## 3. Auto indexing -First we looked at how to fast subset using binary search using *keys*. Then we figured out that we could improve performance even further and have more cleaner syntax by using secondary indices. +First we looked at how to fast subset using binary search using *keys*. Then we figured out that we could improve performance even further and have more cleaner syntax by using secondary indices. -That is what *auto indexing* does. At the moment, it is only implemented for binary operators `==` and `%in%`. An index is automatically created *and* saved as an attribute. That is, unlike the `on` argument which computes the index on the fly each time (unless one already exists), a secondary index is created here. +That is what *auto indexing* does. At the moment, it is only implemented for binary operators `==` and `%in%`. An index is automatically created *and* saved as an attribute. That is, unlike the `on` argument which computes the index on the fly each time (unless one already exists), a secondary index is created here. Let's start by creating a data.table big enough to highlight the advantage. @@ -314,15 +312,15 @@ The time to subset the first time is the time to create the index + the time to system.time(dt[x %in% 1989:2012]) ``` -* Running the first time took `r sprintf("%.3f", t1["elapsed"])` seconds where as the second time took `r sprintf("%.3f", t2["elapsed"])` seconds. +* Running the first time took `r sprintf("%.3f", t1["elapsed"])` seconds where as the second time took `r sprintf("%.3f", t2["elapsed"])` seconds. * Auto indexing can be disabled by setting the global argument `options(datatable.auto.index = FALSE)`. * Disabling auto indexing still allows to use indices created explicitly with `setindex` or `setindexv`. You can disable indices fully by setting global argument `options(datatable.use.index = FALSE)`. -# +# -In recent version we extended auto indexing to expressions involving more than one column (combined with `&` operator). In the future, we plan to extend binary search to work with more binary operators like `<`, `<=`, `>` and `>=`. +In recent version we extended auto indexing to expressions involving more than one column (combined with `&` operator). In the future, we plan to extend binary search to work with more binary operators like `<`, `<=`, `>` and `>=`. We will discuss fast *subsets* using keys and secondary indices to *joins* in the next vignette, *"Joins and rolling joins"*.