1

This question is an extension of the answer to this and this Here's an example df:

      name score end.s time
    1    a    1    NA   1 
    2    a    2    NA   2
    3    a    3    NA   3
    4    b    4    4    1
    5    b    5    4    2
    6    b    6    4    3
    7    c    7    NA   1
    8    c    8    NA   2
    9    d    6    6    1
   10    d    7    6    3

And the output I would like:

      name score end.s time
    1    a    1    3    1 
    2    a    2    3    2
    3    a    NA   3    3
    4    b    4    4    1
    5    b    5    4    2
    6    b    6    4    3
    7    c    7    8    1
    8    c    NA   8    2
    9    d    6    6    1
   10    d    7    6    3

There are two features of the transformation - taking the last value from the 'score' column to replace the NAs in the 'end.s' column and replacing that value from the 'score' column with NA.

I thought that I could replace the NAs with using the syntax from the previous posts but it doesn't work out that way once I took a look and thought about it for a second.

I was thinking an apply function would be the way to go but I haven't even managed to muddle through the first step.

M--
  • 18,939
  • 7
  • 44
  • 76
Mik
  • 367
  • 5
  • 12

3 Answers3

2

What you want is a bit complicated, so is the answer:

library(dplyr)
df %>% group_by(name) %>% mutate(help=last(score)) %>% 
   mutate(score = ifelse(is.na(end.s), c(score[-n()], NA), score)) %>% 
   mutate_at(vars(end.s), funs(ifelse(is.na(.), help, .))) %>% select(-help)

## # A tibble: 10 x 4
## # Groups:   name [4]
##      name score end.s  time
##    <fctr> <int> <int> <int>
##  1      a     1     3     1
##  2      a     2     3     2
##  3      a    NA     3     3
##  4      b     4     4     1
##  5      b     5     4     2
##  6      b     6     4     3
##  7      c     7     8     1
##  8      c    NA     8     2
##  9      d     6     6     1
## 10      d     7     6     3

Data:

 df <- structure(list(name = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,      
     3L, 4L, 4L), .Label = c("a", "b", "c", "d"), class = "factor"),      
         score = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 6L, 7L), end.s = c(NA, 
         NA, NA, 4L, 4L, 4L, NA, NA, 6L, 6L), time = c(1L, 2L, 3L,        
         1L, 2L, 3L, 1L, 2L, 1L, 3L)), .Names = c("name", "score",        
     "end.s", "time"), row.names = c("1", "2", "3", "4", "5", "6",        
     "7", "8", "9", "10"), class = "data.frame")   
M--
  • 18,939
  • 7
  • 44
  • 76
2

Here is another option with data.table

library(data.table)
i1 <- setDT(df)[is.na(end.s), .I[.N], name]$V1
df[is.na(end.s),  end.s := score[.N], name][i1, score := NA][]
#     name score end.s time
# 1:    a     1     3    1
# 2:    a     2     3    2
# 3:    a    NA     3    3
# 4:    b     4     4    1
# 5:    b     5     4    2
# 6:    b     6     4    3
# 7:    c     7     8    1
# 8:    c    NA     8    2
# 9:    d     6     6    1
#10:    d     7     6    3
akrun
  • 674,427
  • 24
  • 381
  • 486
1

I believe the following does what you want.

test <- do.call(rbind, lapply(split(test, test$name), function(x){
    i <- is.na(x$end.s)
    x$end.s[i] <- x$score[nrow(x)]
    if(any(i)) x$score[nrow(x)] <- NA
    x
}))
row.names(test) <- NULL
test
Rui Barradas
  • 44,483
  • 8
  • 22
  • 48