2

I have a large data set that requires some converting but I am not sure what to do. Let's say I have 2 participants in my study.

football_enjoyment <- c(5,3)
basketball_enjoyment <- c(5,5)
football_participation <- c(1,2)
basketball_participation <- c(1,3)

df<- data.frame(football_enjoyment,football_participation, 
                basketball_enjoyment,basketball_participation)
df$id <- seq.int(nrow(df))
df

##  football_enjoyment football_participation basketball_enjoyment basketball_participation id
#                  5                      1                    5                        1    1
#                  3                      2                    5                        3    2

I want it to be like this

sports <- c("football","football", "basketball","basketball")
enjoyment_score <- c(5,3,5,5)
participation_score <- c(1,2,1,3)

id <- c(1,2)

df2 <- data.frame(sports, enjoyment_score,participation_score, id)
df2

##    sports    enjoyment_score    participation_score id
#   football               5                   1        1
#   football               3                   2        2
# basketball               5                   1        1
# basketball               5                   3        2

I am stuck with the structure and the column/row names are just for demonstration purpose.

Yukilia
  • 41
  • 3
  • Possible duplicate of [r - gather multiple columns in multiple key columns with tidyr](https://stackoverflow.com/questions/53146553/r-gather-multiple-columns-in-multiple-key-columns-with-tidyr) – Gabriel Silva Sep 20 '19 at 11:47

2 Answers2

1

With tidyverse you could do:

library(tidyverse)
library(reshape2)

df %>% gather("variable", "value", - id) %>%
    separate(variable, into = c("sports", "variable"), sep = "_") %>%
    dcast(id + sports ~ variable) %>% arrange(desc(sports))

#  id     sports enjoyment participation
#1  1   football         5             1
#2  2   football         3             2
#3  1 basketball         5             1
#4  2 basketball         5             3

Or, in base you could do:

df2 <- reshape(df, varying = c("football_enjoyment", "football_participation", "basketball_enjoyment", "basketball_participation"), 
   direction = "long", 
   idvar = "id", 
   sep = "_", 
   timevar = "sports", 
   times = c("football", "basketball"), v.names = c('enjoyment', 'participation'))
rownames(df2) <- NULL

#  id     sports enjoyment participation
#1  1   football         5             1
#2  2   football         3             2
#3  1 basketball         5             1
#4  2 basketball         5             3
Matt
  • 2,702
  • 1
  • 7
  • 18
0

tidyr 1.0.0 has a pivot_longer function that can do this:

library(tidyr)

football_enjoyment <- c(5,3)
basketball_enjoyment <- c(5,5)
football_participation <- c(1,2)
basketball_participation <- c(1,3)

df<- data.frame(football_enjoyment,football_participation, 
                basketball_enjoyment,basketball_participation)
df$id <- seq.int(nrow(df))
df
#>   football_enjoyment football_participation basketball_enjoyment
#> 1                  5                      1                    5
#> 2                  3                      2                    5
#>   basketball_participation id
#> 1                        1  1
#> 2                        3  2

df %>% pivot_longer(-id, names_to = c("sports",".value"), names_sep = "_")
#> # A tibble: 4 x 4
#>      id sports     enjoyment participation
#>   <int> <chr>          <dbl>         <dbl>
#> 1     1 football           5             1
#> 2     1 basketball         5             1
#> 3     2 football           3             2
#> 4     2 basketball         5             3

Created on 2019-09-20 by the reprex package (v0.3.0)

Arthur Yip
  • 4,223
  • 21
  • 42