0

I would like to replace the na´s in v1 to v4 with the median of the same columns

Here are some example data

id <- c(1,2,3,4)
v1 <- c(1,3,0,2)
v2 <- c(NA,1,NA,2)
v3 <- c(2,4,1,2)
v4 <- c(NA,1,0,2)
v5 <- c(5,1,NA,2)
v6 <- c(7,1,9,NA)

df <- data.frame(id, v1, v2, v3,v4,v5,v6)
df_pre <- df %>% group_by(id) %>% mutate(Median_v1_v4 = median(c(v1,v2,v3,v4), na.rm=TRUE))

This is what data looks like now:

id v1 v2 v3 v4 v5 v6  Median_v1_v4
 1  1 NA  2 NA  5  7          1.5
 2  3  1  4  1  1  1          2.0
 3  0 NA  1  0 NA  9          0.0
 4  2  2  2  2  2 NA          2.0

This is what i want the data to look like

id v1  v2 v3  v4 v5 v6 Median_v1_v4
1  1 1.5  2 1.5  5  7          1.5
2  3 1.0  4 1.0  1  1          2.0
3  0 0.0  1 0.0 NA  9          0.0
4  2 2.0  2 2.0  2 NA          2.0
Carl
  • 101
  • 6
  • 1
    https://stackoverflow.com/questions/25835643/replace-missing-values-with-column-mean...can u check this one – sai saran Nov 14 '18 at 11:52
  • Related: [Rowwise median for multiple columns using dplyr](https://stackoverflow.com/questions/47773350/rowwise-median-for-multiple-columns-using-dplyr) – markus Nov 14 '18 at 12:01

3 Answers3

0

What about this solution:

df[,2:5] <- t( apply(df[,2:5], 1, function(x) { 
  x[is.na(x)] <- median(x,na.rm=T)
  return(x)}
) )

df

  id v1  v2 v3 v4 v5 v6
1  1  1 1.0  2  1  5  7
2  2  3 1.0  4  1  1  1
3  3  0 0.5  1  0 NA  9
4  4  2 2.0  2  2  2 NA

Adjusted from: Replace NA values by row means

PS: Saw the comment too late (@Sai Saran), this is an adjustment of the solution in the link above.

Andre Elrico
  • 8,959
  • 1
  • 37
  • 61
alex_555
  • 1,122
  • 1
  • 8
  • 20
0

You can try

library(tidyverse)
df %>% 
  gather(k, v, -id) %>% 
  group_by(id) %>%   
  mutate(Median=median(v[k %in% c("v1", "v2", "v3","v4")], na.rm = T)) %>% 
  mutate(v=ifelse(is.na(v) & k %in% c("v1", "v2", "v3","v4"), Median, v)) %>% 
  spread(k, v)
# A tibble: 4 x 8
# Groups:   id [4]
     id Median    v1    v2    v3    v4    v5    v6
  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1     1    1.5     1   1.5     2   1.5     5     7
2     2    2       3   1       4   1       1     1
3     3    0       0   0       1   0      NA     9
4     4    2       2   2       2   2       2    NA
Roman
  • 14,297
  • 1
  • 25
  • 40
0

Take a look into this code.

library(tidyverse)
id <- c(1,2,3,4)
v1 <- c(1,3,0,2)
v2 <- c(NA,1,NA,2)
v3 <- c(2,4,1,2)
v4 <- c(NA,1,0,2)
v5 <- c(5,1,NA,2)
v6 <- c(7,1,9,NA)

df <- data.frame(id, v1, v2, v3,v4,v5,v6)
df_pre <- df %>% 
  group_by(id) %>% 
  mutate(Median_v1_v4 = median(c(v1,v2,v3,v4), na.rm=TRUE))

df_pre %>% 
  mutate_at(vars(v1,v2,v3,v4),  
            funs(replace(., is.na(.), Median_v1_v4))) -> df_pre
Luis
  • 978
  • 7
  • 21