1

I would like to create a column which flags whether a value is the last day of the month. Consider my initial datatable and code:

      Date       Companyvar variable value DOW Month EndOfMonth
 1: 1992-07-31     VAHXRI       MV    NA Friday   Jul          0
 2: 1992-07-31     VAHXTD       MV    NA Friday   Jul          0
 3: 1992-07-31     VAHXND       MV    NA Friday   Jul          0
 4: 1992-07-31      ASXMV       MV    NA Friday   Jul          0
 5: 1992-07-31      ASXRI       MV    NA Friday   Jul          1
 6: 1992-08-03     AIZXMV       MV    NA Monday   Aug          0
 7: 1992-08-03     AIZXRI       MV    NA Monday   Aug          0
 8: 1992-08-03     AIZXTD       MV    NA Monday   Aug          0
 9: 1992-08-03     AIZXND       MV    NA Monday   Aug          0
10: 1992-08-03     AQZXMV       MV    NA Monday   Aug          0

sapply(trans, class) 
       Date  Companyvar    variable       value         DOW       Month  EndOfMonth 
     "Date"    "factor"    "factor"   "numeric" "character" "character"   "numeric" 

And i created the "EndOfMonth" column doing:

trans <- trans[, EndOfMonth := ifelse(shift(Month, type = "lead", n = 1) != Month, 1, 0)]

However, as you can see, I have multiple dates on July 31st, yet only the last one is flagged as "EndOfMonth". I understand that my code is only looking for when there is a change in the Month column, which by definition will only pick up the singular change from July to August.

I was wondering whether there is a way so that all 5 of the dates on July 31st will be flagged as "EndOfMonth", instead of just the last one.

I.e the desired output is:

          Date Companyvar variable value    DOW Month EndOfMonth
 1: 1992-07-31     VAHXRI       MV    NA Friday   Jul          1
 2: 1992-07-31     VAHXTD       MV    NA Friday   Jul          1
 3: 1992-07-31     VAHXND       MV    NA Friday   Jul          1
 4: 1992-07-31      ASXMV       MV    NA Friday   Jul          1
 5: 1992-07-31      ASXRI       MV    NA Friday   Jul          1
 6: 1992-08-03     AIZXMV       MV    NA Monday   Aug          0
 7: 1992-08-03     AIZXRI       MV    NA Monday   Aug          0
 8: 1992-08-03     AIZXTD       MV    NA Monday   Aug          0
 9: 1992-08-03     AIZXND       MV    NA Monday   Aug          0
10: 1992-08-03     AQZXMV       MV    NA Monday   Aug          0

Where the first 5 rows are flagged as "EndOfMonth".

Here's 30 rows of the data:

 structure(list(Date = structure(c(8247, 8247, 8247, 8247, 8247, 
8247, 8247, 8247, 8247, 8247, 8247, 8247, 8247, 8247, 8247, 8247, 
8247, 8247, 8247, 8247, 8247, 8250, 8250, 8250, 8250, 8250, 8250, 
8250, 8250, 8250, 8250), class = "Date"), Companyvar = structure(c(50L, 
51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 
64L, 65L, 66L, 67L, 68L, 69L, 70L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L), .Label = c("AIZXMV", "AIZXRI", "AIZXTD", "AIZXND", 
"AQZXMV", "AQZXRI", "AQZXTD", "AQZXND", "AIAYMV", "AIAYRI", "AIAYTD", 
"AIAYND", "AZJXMV", "AZJXRI", "AZJXTD", "AZJXND", "CHRXMV", "CHRXRI", 
"CHRXTD", "CHRXND", "CLXXMV", "CLXXRI", "CLXXTD", "CLXXND", "KSCXMV", 
"KSCXRI", "KSCXTD", "KSCXND", "LAUXMV", "LAUXRI", "LAUXTD", "LAUXND", 
"MQAXMV", "MQAXRI", "MQAXTD", "MQAXND", "MRMXMV", "MRMXRI", "MRMXTD", 
"MRMXND", "QANXMV", "QANXRI", "QANXTD", "QANXND", "QUBXMV", "QUBXRI", 
"QUBXTD", "QUBXND", "REXXMV", "REXXRI", "REXXTD", "REXXND", "SYDXMV", 
"SYDXRI", "SYDXTD", "SYDXND", "TTIXMV", "TTIXRI", "TTIXTD", "TTIXND", 
"TCLXMV", "TCLXRI", "TCLXTD", "TCLXND", "VAHXMV", "VAHXRI", "VAHXTD", 
"VAHXND", "ASXMV", "ASXRI"), class = "factor"), variable = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), class = "factor", .Label = c("MV", 
"ND", "RI", "TD")), value = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), 
    DOW = c("Friday", "Friday", "Friday", "Friday", "Friday", 
    "Friday", "Friday", "Friday", "Friday", "Friday", "Friday", 
    "Friday", "Friday", "Friday", "Friday", "Friday", "Friday", 
    "Friday", "Friday", "Friday", "Friday", "Monday", "Monday", 
    "Monday", "Monday", "Monday", "Monday", "Monday", "Monday", 
    "Monday", "Monday"), Month = c("Jul", "Jul", "Jul", "Jul", 
    "Jul", "Jul", "Jul", "Jul", "Jul", "Jul", "Jul", "Jul", "Jul", 
    "Jul", "Jul", "Jul", "Jul", "Jul", "Jul", "Jul", "Jul", "Aug", 
    "Aug", "Aug", "Aug", "Aug", "Aug", "Aug", "Aug", "Aug", "Aug"
    ), EndOfMonth = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), .Names = c("Date", 
"Companyvar", "variable", "value", "DOW", "Month", "EndOfMonth"
), class = c("data.table", "data.frame"), row.names = c(NA, -31L
), .internal.selfref = <pointer: 0x000000000b000788>)

Cheers

Gin_Salmon
  • 757
  • 1
  • 6
  • 18

0 Answers0