Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ if (getRversion() >= "3.6.0") {

# IDateTime support:
export(as.IDate,as.ITime,IDateTime)
export(second,minute,hour,yday,wday,mday,week,isoweek,month,quarter,year)
export(second,minute,hour,yday,wday,mday,week,isoweek,month,quarter,year,yearmon,yearqtr)

S3method("[", ITime)
S3method("+", IDate)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,8 @@
# 2: 2 10
```

40. New functions `yearmon()` and `yearqtr` give a combined representation of `year()` and `month()`/`quarter()`. These and also `yday`, `wday`, `mday`, `week`, `month` and `year` are now optimized for memory and compute efficiency by removing the `POSIXlt` dependency, [#649](https://github.com/Rdatatable/data.table/issues/649). Thanks to Matt Dowle for the request, and Benjamin Schwendinger for the PR.

## BUG FIXES

1. `by=.EACHI` when `i` is keyed but `on=` different columns than `i`'s key could create an invalidly keyed result, [#4603](https://github.com/Rdatatable/data.table/issues/4603) [#4911](https://github.com/Rdatatable/data.table/issues/4911). Thanks to @myoung3 and @adamaltmejd for reporting, and @ColeMiller1 for the PR. An invalid key is where a `data.table` is marked as sorted by the key columns but the data is not sorted by those columns, leading to incorrect results from subsequent queries.
Expand Down
20 changes: 13 additions & 7 deletions R/IDateTime.R
Original file line number Diff line number Diff line change
Expand Up @@ -338,10 +338,10 @@ hour = function(x) {
if (inherits(x, 'ITime')) return(as.integer(x) %/% 3600L %% 24L)
as.POSIXlt(x)$hour
}
yday = function(x) as.POSIXlt(x)$yday + 1L
wday = function(x) (unclass(as.IDate(x)) + 4L) %% 7L + 1L
mday = function(x) as.POSIXlt(x)$mday
week = function(x) yday(x) %/% 7L + 1L
yday = function(x) convertDate(as.IDate(x), "yday")
wday = function(x) convertDate(as.IDate(x), "wday")
mday = function(x) convertDate(as.IDate(x), "mday")
week = function(x) convertDate(as.IDate(x), "week")
isoweek = function(x) {
# ISO 8601-conformant week, as described at
# https://en.wikipedia.org/wiki/ISO_week_date
Expand All @@ -356,7 +356,13 @@ isoweek = function(x) {
1L + (nearest_thurs - year_start) %/% 7L
}

month = function(x) as.POSIXlt(x)$mon + 1L
quarter = function(x) as.POSIXlt(x)$mon %/% 3L + 1L
year = function(x) as.POSIXlt(x)$year + 1900L
month = function(x) convertDate(as.IDate(x), "month")
quarter = function(x) convertDate(as.IDate(x), "quarter")
year = function(x) convertDate(as.IDate(x), "year")
yearmon = function(x) convertDate(as.IDate(x), "yearmon")
yearqtr = function(x) convertDate(as.IDate(x), "yearqtr")

convertDate = function(x, type) {
type = match.arg(type, c("yday", "wday", "mday", "week", "month", "quarter", "year", "yearmon", "yearqtr"))
.Call(CconvertDate, x, type)
}
46 changes: 37 additions & 9 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,19 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
last = data.table::last # xts
first = data.table::first # xts, S4Vectors
copy = data.table::copy # bit64 v4; bit64 offered to rename though so this is just in case bit64 unoffers
second = data.table::second # lubridate #1135
minute = data.table::minute # lubridate
hour = data.table::hour # lubridate
yday = data.table::yday # lubridate
wday = data.table::wday # lubridate
mday = data.table::mday # lubridate
week = data.table::week # lubridate
isoweek = data.table::isoweek # lubridate
month = data.table::month # lubridate
quarter = data.table::quarter # lubridate
year = data.table::year # lubridate
yearmon = data.table::yearmon # zoo
yearqtr = data.table::yearqtr # zoo
}

# Load optional Suggests packages, which are tested by Travis for code coverage, and on CRAN
Expand Down Expand Up @@ -10449,15 +10462,17 @@ test(1692, capture.output(as.data.table(structure(57600L, class = "ITime"))),

# testing all time part extraction routines (subsumes #874)
t <- "2016-08-03 01:02:03.45"
test(1693.1, second(t), 3L)
test(1693.2, minute(t), 2L)
test(1693.3, hour(t), 1L)
test(1693.4, yday(t), 216L)
test(1693.5, wday(t), 4L)
test(1693.6, week(t), 31L)
test(1693.7, month(t), 8L)
test(1693.8, quarter(t), 3L)
test(1693.9, year(t), 2016L)
test(1693.01, second(t), 3L)
test(1693.02, minute(t), 2L)
test(1693.03, hour(t), 1L)
test(1693.04, yday(t), 216L)
test(1693.05, wday(t), 4L)
test(1693.06, week(t), 31L)
test(1693.07, month(t), 8L)
test(1693.08, quarter(t), 3L)
test(1693.09, year(t), 2016L)
test(1693.10, yearmon(t), 2016+7/12)
test(1693.11, yearqtr(t), 2016.5)

# fix for #1740 - sub-assigning NAs for factors
dt = data.table(x = 1:5, y = factor(c("","a","b","a", "")), z = 5:9)
Expand Down Expand Up @@ -18769,3 +18784,16 @@ test(2234.9, DT[, min(.SD), by=somefun(.I)], error="by.*contains .I.*supported")
DT = data.table(x = 1)
test(2235.1, copy(DT)[, c("z", "x") := {x = NULL; list(2, NULL)}], data.table(z = 2))
test(2235.2, copy(DT)[, c("z", "x") := {list(2, NULL)}], data.table(z = 2))

# move IDate from POSIXlt to C, add yearquarter; #649
x = c("1111-11-11", "2019-01-01", "2019-02-28", "2019-03-01", "2019-12-31", "2020-02-29", "2020-03-01", "2020-12-31", "2040-01-01", "2040-12-31", "2100-03-01")
test(2236.1, yday(x), c(315L, 1L, 59L, 60L, 365L, 60L, 61L, 366L, 1L, 366L, 60L))
test(2236.2, mday(x), c(11L, 1L, 28L, 1L, 31L, 29L, 1L, 31L, 1L, 31L, 1L))
test(2236.3, wday(x), c(7L, 3L, 5L, 6L, 3L, 7L, 1L, 5L, 1L, 2L, 2L))
test(2236.4, week(x), c(46L, 1L, 9L, 9L, 53L, 9L, 9L, 53L, 1L, 53L, 9L))
test(2236.5, month(x), c(11L, 1L, 2L, 3L, 12L, 2L, 3L, 12L, 1L, 12L, 3L))
test(2236.6, quarter(x), c(4L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 4L, 1L))
test(2236.7, year(x), c(1111L, 2019L, 2019L, 2019L, 2019L, 2020L, 2020L, 2020L, 2040L, 2040L, 2100L))
test(2236.8, yearmon(x), c(1111+10/12, 2019, 2019+1/12, 2019+2/12, 2019+11/12, 2020+1/12, 2020+2/12, 2020+11/12, 2040, 2040+11/12, 2100+2/12))
test(2236.9, yearqtr(x), c(1111.75, 2019, 2019, 2019, 2019.75, 2020, 2020, 2020.75, 2040, 2040.75, 2100))

23 changes: 16 additions & 7 deletions man/IDateTime.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
\alias{month}
\alias{quarter}
\alias{year}
\alias{yearmon}
\alias{yearqtr}
\alias{IDate-class}
\alias{ITime-class}

Expand Down Expand Up @@ -93,6 +95,8 @@ isoweek(x)
month(x)
quarter(x)
year(x)
yearmon(x)
yearqtr(x)

}

Expand Down Expand Up @@ -164,11 +168,11 @@ functions \code{weekdays}, \code{months}, and \code{quarters} can also
be used, but these return character values, so they must be converted to
factors for use with data.table. \code{isoweek} is ISO 8601-consistent.

The \code{round} method for IDate's is useful for grouping and plotting.
The \code{round} method for IDate's is useful for grouping and plotting.
It can round to weeks, months, quarters, and years. Similarly, the \code{round}
and \code{trunc} methods for ITime's are useful for grouping and plotting.
They can round or truncate to hours and minutes.
Note for ITime's with 30 seconds, rounding is inconsistent due to rounding off a 5.
They can round or truncate to hours and minutes.
Note for ITime's with 30 seconds, rounding is inconsistent due to rounding off a 5.
See 'Details' in \code{\link{round}} for more information.

}
Expand All @@ -188,9 +192,14 @@ See 'Details' in \code{\link{round}} for more information.
and \code{year} return integer values
for second, minute, hour, day of year, day of week,
day of month, week, month, quarter, and year, respectively.

These values are all taken directly from the \code{POSIXlt} representation
of \code{x}, with the notable difference that while \code{yday}, \code{wday},
\code{yearmon} and \code{yearqtr} return double values representing
respectively `year + (month-1) / 12` and `year + (quarter-1) / 4`.

\code{second}, \code{minute}, \code{hour} are taken directly from
the \code{POSIXlt} representation.
All other values are computed from the underlying integer representation
and comparable with the values of their \code{POSIXlt} representation
of \code{x}, with the notable difference that while \code{yday}, \code{wday},
and \code{mon} are all 0-based, here they are 1-based.

}
Expand Down Expand Up @@ -253,7 +262,7 @@ round(seqdates, "months")
(seqtimes <- seq(as.ITime("07:00"), as.ITime("08:00"), by = 20))
round(seqtimes, "hours")
trunc(seqtimes, "hours")

}
\keyword{utilities}

154 changes: 154 additions & 0 deletions src/idatetime.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
#include "data.table.h"

#define YEARS400 146097
#define YEARS100 36524
#define YEARS4 1461
#define YEARS1 365

typedef enum { YDAY, WDAY, MDAY, WEEK, MONTH, QUARTER, YEAR, YEARMON, YEARQTR} datetype;

static inline bool isLeapYear(int year) {
return (year % 100 != 0 || year % 400 == 0) && year % 4 == 0;
}

void convertSingleDate(int x, datetype type, void *out)
{
static const char months[] = {31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 29};
static const int quarter[] = {31, 91, 92, 92, 60};

if (type == WDAY) {
int wday = (x + 4) % 7;
if (wday < 0) wday += 7;
*(int *)out = ++wday;
return;
}

int days = x - 11017;

int years400 = days / YEARS400;
days %= YEARS400;
if (days < 0) {
days += YEARS400;
years400--;
}

int years100 = days / YEARS100;
days %= YEARS100;

int years4 = days / YEARS4;
days %= YEARS4;

int years1 = days / YEARS1;
days %= YEARS1;

int year = 2000 + years1 + 4*years4 + 100*years100 + 400*years400;
if (days > 305)
++year;

if (type == YEAR) {
*(int *)out = year;
return;
}

int leap = !years1 && (years4 || !years100);

if (type == YDAY || type == WEEK) {
int yday = days + 31 + 28 + leap;
if (yday >= YEARS1 + leap)
yday -= YEARS1 + leap;
*(int *)out = ++yday;
if (type == WEEK)
*(int *)out = (*(int *)out / 7) + 1;
return;
}

if (type == MONTH || type == YEARMON) {
int i;
if (days==0 && !leap && isLeapYear(year)) {
i = 1;
} else {
i = 2;
while (months[i-2] <= days) {
days -= months[i-2];
i++;
}
}
if (i >= 12)
i -= 12;

if (type == MONTH) {
*(int *)out = i + 1;
} else {
*(double *)out = year + i / 12.0;
}
return;
}

if (type == MDAY) {
if (days==0 && !leap && isLeapYear(year)) {
*(int *)out = 29;
return;
}
int i = 0;
while (months[i] <= days) {
days -= months[i];
i++;
}
*(int *)out = ++days;
return;
}

if (type == QUARTER || type == YEARQTR) {
int i = 0;
while (quarter[i] <= days) {
days -= quarter[i];
i++;
}
if (i >= 4)
i -= 4;
if (type == QUARTER) {
*(int *)out = i + 1;
} else {
*(double *)out = year + (i / 4.0);
}
return;
}
}

SEXP convertDate(SEXP x, SEXP type)
{
if (!isInteger(x)) error(_("x must be an integer vector"));
const int *ix = INTEGER(x);
const int n = length(x);
if (!isString(type) || length(type) != 1)
error(_("Internal error: invalid type for convertDate(), should have been caught before. please report to data.table issue tracker")); // # nocov
datetype ctype;
bool ansint = true;
if (!strcmp(CHAR(STRING_ELT(type, 0)), "yday")) ctype = YDAY;
else if (!strcmp(CHAR(STRING_ELT(type, 0)), "wday")) ctype = WDAY;
else if (!strcmp(CHAR(STRING_ELT(type, 0)), "mday")) ctype = MDAY;
else if (!strcmp(CHAR(STRING_ELT(type, 0)), "week")) ctype = WEEK;
else if (!strcmp(CHAR(STRING_ELT(type, 0)), "month")) ctype = MONTH;
else if (!strcmp(CHAR(STRING_ELT(type, 0)), "quarter")) ctype = QUARTER;
else if (!strcmp(CHAR(STRING_ELT(type, 0)), "year")) ctype = YEAR;
else if (!strcmp(CHAR(STRING_ELT(type, 0)), "yearmon")) { ctype = YEARMON; ansint = false; }
else if (!strcmp(CHAR(STRING_ELT(type, 0)), "yearqtr")) { ctype = YEARQTR; ansint = false; }
else error(_("Internal error: invalid type for convertDate, should have been caught before. please report to data.table issue tracker")); // # nocov

SEXP ans;
if (ansint) {
ans = PROTECT(allocVector(INTSXP, n));
int *ansp = INTEGER(ans);
for (int i=0; i < n; ++i) {
convertSingleDate(ix[i], ctype, &ansp[i]);
}
} else {
ans = PROTECT(allocVector(REALSXP, n));
double *ansp = REAL(ans);
for (int i=0; i < n; ++i) {
convertSingleDate(ix[i], ctype, &ansp[i]);
}
}
UNPROTECT(1);
return ans;
}
2 changes: 2 additions & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ SEXP allNAR();
SEXP test_dt_win_snprintf();
SEXP dt_zlib_version();
SEXP startsWithAny();
SEXP convertDate();

// .Externals
SEXP fastmean();
Expand Down Expand Up @@ -228,6 +229,7 @@ R_CallMethodDef callMethods[] = {
{"Cdt_zlib_version", (DL_FUNC)&dt_zlib_version, -1},
{"Csubstitute_call_arg_namesR", (DL_FUNC) &substitute_call_arg_namesR, -1},
{"CstartsWithAny", (DL_FUNC)&startsWithAny, -1},
{"CconvertDate", (DL_FUNC)&convertDate, -1},
{NULL, NULL, 0}
};

Expand Down