This is a dput()sample of my data:
structure(list(ID = c("101", "101", "101", "101", "101", "101",
"101", "101", "101", "101", "101", "101", "101", "101", "101",
"101", "101", "101", "101", "101", "101", "101", "101", "101",
"101", "101", "101", "101", "101", "101", "101", "101", "101",
"101", "101", "101", "101", "101", "101", "101", "101", "101",
"101", "101", "101", "101", "101", "101", "101", "101"), IDA = c("1000",
"1279", "1392", "534", "835", "910", "748", "589", "675", "500",
"1243", "635", "1181", "791", "755", "1069", "640", "1229", "1856",
"116", "767", "1126", "863", "1141", "1858", "899", "5", "225",
"175", "1764", "1017", "497", "771", "41", "816", "1046", "439",
"930", "1350", "641", "1057", "1021", "503", "553", "1738", "1379",
"774", "442", "1113", "1503"), DATE = structure(c(1497315600,
1552352400, 1552957200, 1390438800, 1439427600, 1479776400, 1455757200,
1402534800, 1409187600, 1383008400, 1536022800, 1414630800, 1545094800,
1551142800, 1461805200, 1483405200, 1420506000, 1534813200, 1493600400,
1348448400, 1458176400, 1521075600, 1464656400, 1527555600, 1504573200,
1478134800, 1278378000, 1320886800, 1309395600, 1598576400, 1500512400,
1385600400, 1436403600, 1284426000, 1430960400, 1485824400, 1381971600,
1477962000, 1510023600, 1420509600, 1508806800, 1499302800, 1386205200,
1379466000, 1555290000, 1565226000, 1435798800, 1494896400, 1516064400,
1593478800), tzone = "UTC", class = c("POSIXct", "POSIXt")),
NR = c("CH-1000", " CH-1279", "CH-1392",
"CH-0534", "CH-0835", " CH-0910", "CH-0748",
"CH-0589", "CH-0675", "CH-0500", "CH-1243",
"CH-0635", "CH-1181", "CH-0791", "CH-0755",
"CH-1069", "CH-0640", "CH-1229", "CH-1856",
"CH-0116", "CH-0767", "CH-1126", "CH-0863",
"CH-1141", "CH-1858", "CH-0899", "CH-0005",
"CH-0225", "CH-0175", "CH-1764", "CH-1017",
"CH-0497", "CH-0771", "CH-0041", "CH-0816",
"CH-1046", "CH-0439", "CH-0930", "CH-1350",
"CH-0641", "CH-1057", "CH-1021", "CH-0503",
"CH-0553", "CH-1738", "CH-1379", "CH-0774",
"CH-0442", "CH-1113", "CH-1503"), PAT = c("101-1000",
"101-1279", "101-1392", "101-534", "101-835", "101-910",
"101-748", "101-589", "101-675", "101-500", "101-1243", "101-635",
"101-1181", "101-791", "101-755", "101-1069", "101-640",
"101-1229", "101-1856", "101-116", "101-767", "101-1126",
"101-863", "101-1141", "101-1858", "101-899", "101-5", "101-225",
"101-175", "101-1764", "101-1017", "101-497", "101-771",
"101-41", "101-816", "101-1046", "101-439", "101-930", "101-1350",
"101-641", "101-1057", "101-1021", "101-503", "101-553",
"101-1738", "101-1379", "101-774", "101-442", "101-1113",
"101-1503"), INT1 = c(NA, NA, NA, 280035, 280040, NA,
280040, 280040, 285030, 245040, NA, 280035, NA, NA, 280040,
NA, 220035, NA, NA, 280040, 280040, NA, 245005, NA, NA, 275005,
240070, 220035, 280040, NA, NA, 280040, 280040, 240005, 280040,
NA, 280040, 240005, 245040, 240030, NA, NA, 260010, NA, NA,
NA, 280040, NA, NA, NA), INT2 = c(NA, NA, NA, NA, NA,
NA, NA, 280040, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 240030, NA, 260005, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), INT3 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), INT4 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), INTX1 = c(NA, 280005, 220035, NA, NA, NA,
NA, NA, NA, NA, 280050, NA, 240080, 280050, NA, 240085, NA,
280050, 270010, NA, NA, 280050, NA, 280005, NA, NA, NA, NA,
NA, 275045, 280050, NA, NA, NA, NA, 245005, NA, NA, 245040,
NA, NA, 280050, NA, NA, 220035, 280050, NA, 255005, 280050,
220005), INTX2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 240085, NA, NA, NA, NA, NA, NA, NA, NA, 280050,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), INTX3 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), INTX4 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), KAT = c(1,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -50L
), class = c("tbl_df", "tbl", "data.frame"))
What I needed to do first was
first, to recode some values from INT1:INT4 and INTX1:INTX4 and put them in new columns. To this aim, I used long_pivot as below:
longDATA <- DATA %>%
pivot_longer(cols = c('INT1':'INTX4'),
names_to = "INT", values_to = "Code")
Then I used the long list to mutate new variables as below:
longDATA1 <- longDATA %>% mutate(palm = case_when(Code == 210025 ~ 1))
longDATA2 <- longDATA1 %>% mutate(bio = case_when(Code == 210015 ~ '12.06.25',Code == 210020 ~ '12.07.25',Code == 275015 ~ '12.06.25',Code == 275020 ~ '12.07.25'))
longDATA3 <- longDATA2 %>% mutate(EPX = case_when(Code == 280005 ~ 1, Code == 280010 ~ 1))
Then I need to return it to wide format. I used the code below:
WideDATA <- longDATA3 %>% pivot_wider(names_from = INT, values_from = Code)
Below you can see the output but it does not show the problem as I have a huge dataset. I realized the number of rows has changed from 2480 (my initial data before doing longer-pivot)to 2633 (the Wide data).I realized that the added rows are created when in one of the EPX, bio or palm columns a value has been recoded. for example if there is a row where INT1 and INT2 are filled with values other than NA (e.g., 280010 and 280040), EPX is assigned value 1, based on the mutation that we did earlier. However, EPX=1 and INT1=280010 appear in one row and 280040 appears in another row. like the photo below.
[1]: https://i.stack.imgur.com/jjEea.png
I have spent a week to figure this out but no luck. I would highly appreciate your advice.
structure(list(ID = c("101", "101", "101", "101", "101", "101",
"101", "101", "101", "101", "101", "101", "101", "101", "101",
"101", "101", "101", "101", "101", "101", "101", "101", "101",
"101", "101", "101", "101", "101", "101", "101", "101", "101",
"101", "101", "101", "101", "101", "101", "101", "101", "101",
"101", "101", "101", "101", "101", "101", "101", "101"), IDA = c("1000",
"1279", "1392", "534", "835", "910", "748", "589", "675", "500",
"1243", "635", "1181", "791", "755", "1069", "640", "1229", "1856",
"116", "767", "1126", "863", "1141", "1858", "899", "5", "225",
"175", "1764", "1017", "497", "771", "41", "816", "1046", "439",
"930", "1350", "641", "1057", "1021", "503", "553", "1738", "1379",
"774", "442", "1113", "1503"), DATE = structure(c(1497315600,
1552352400, 1552957200, 1390438800, 1439427600, 1479776400, 1455757200,
1402534800, 1409187600, 1383008400, 1536022800, 1414630800, 1545094800,
1551142800, 1461805200, 1483405200, 1420506000, 1534813200, 1493600400,
1348448400, 1458176400, 1521075600, 1464656400, 1527555600, 1504573200,
1478134800, 1278378000, 1320886800, 1309395600, 1598576400, 1500512400,
1385600400, 1436403600, 1284426000, 1430960400, 1485824400, 1381971600,
1477962000, 1510023600, 1420509600, 1508806800, 1499302800, 1386205200,
1379466000, 1555290000, 1565226000, 1435798800, 1494896400, 1516064400,
1593478800), tzone = "UTC", class = c("POSIXct", "POSIXt")),
NR = c("CH-1000", " CH-1279", "CH-1392",
"CH-0534", "CH-0835", " CH-0910", "CH-0748",
"CH-0589", "CH-0675", "CH-0500", "CH-1243",
"CH-0635", "CH-1181", "CH-0791", "CH-0755",
"CH-1069", "CH-0640", "CH-1229", "CH-1856",
"CH-0116", "CH-0767", "CH-1126", "CH-0863",
"CH-1141", "CH-1858", "CH-0899", "CH-0005",
"CH-0225", "CH-0175", "CH-1764", "CH-1017",
"CH-0497", "CH-0771", "CH-0041", "CH-0816",
"CH-1046", "CH-0439", "CH-0930", "CH-1350",
"CH-0641", "CH-1057", "CH-1021", "CH-0503",
"CH-0553", "CH-1738", "CH-1379", "CH-0774",
"CH-0442", "CH-1113", "CH-1503"), PAT = c("101-1000",
"101-1279", "101-1392", "101-534", "101-835", "101-910",
"101-748", "101-589", "101-675", "101-500", "101-1243", "101-635",
"101-1181", "101-791", "101-755", "101-1069", "101-640",
"101-1229", "101-1856", "101-116", "101-767", "101-1126",
"101-863", "101-1141", "101-1858", "101-899", "101-5", "101-225",
"101-175", "101-1764", "101-1017", "101-497", "101-771",
"101-41", "101-816", "101-1046", "101-439", "101-930", "101-1350",
"101-641", "101-1057", "101-1021", "101-503", "101-553",
"101-1738", "101-1379", "101-774", "101-442", "101-1113",
"101-1503"), palm = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), bio= c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), EPx = c(NA,
NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), INT1 = c(NA, NA, NA, 280035, 280040, NA,
280040, 280040, 285030, 245040, NA, 280035, NA, NA, 280040,
NA, 220035, NA, NA, 280040, 280040, NA, 245005, NA, NA, 275005,
240070, 220035, 280040, NA, NA, 280040, 280040, 240005, 280040,
NA, 280040, 240005, 245040, 240030, NA, NA, 260010, NA, NA,
NA, 280040, NA, NA, NA), INT2 = c(NA, NA, NA, NA, NA,
NA, NA, 280040, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 240030, NA, 260005, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA), INT3 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), INT4 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), INTX1 = c(NA, 280005, 220035, NA, NA, NA,
NA, NA, NA, NA, 280050, NA, 240080, 280050, NA, 240085, NA,
280050, 270010, NA, NA, 280050, NA, 280005, NA, NA, NA, NA,
NA, 275045, 280050, NA, NA, NA, NA, 245005, NA, NA, 245040,
NA, NA, 280050, NA, NA, 220035, 280050, NA, 255005, 280050,
220005), INTX2 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, 240085, NA, NA, NA, NA, NA, NA, NA, NA, 280050,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), INTX3 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), INTX4 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), KAT = c(1,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0)), row.names = c(NA, -50L
), class = c("tbl_df", "tbl", "data.frame"))
I copied pasted your code and both the DATA and WideDATA objects have the same number of rows (10), just with the desired variables added in WideDATA. Its possible there is something in your full data set that is creating the additional rows. I see some NAs in the sample you shared, and its worth noting that case_when doesn't treat those specially. If you need them handled as NA explicitly, you need to include is.na in a conditional statement. If sharing the full data set isn't possible, maybe randomly sampling would help.
e.g. dput(sample_n(DATA, 50))
I'm adding this as an answer though I realize it may need further editing as I couldn't usefully share the following in a comment.
In the code below I mostly kept your examples intact, but put everything into a single piped chunk of code. It might be simpler to stick with if_else in cases where there's only a single condition, but there's no harm using case_when throughout if you prefer the syntax.
You'll note I include TRUE ~ ... in each. Without a way to evaluate general cases, you get NA whenever the explicit statements aren't true.
Hopefully this lets you spot what is happening in your full data set, and if not please continue to update the question.
library(tidyverse)
DATA <- structure(list(ID = c("101", "101", "101", "101", "101", "101","101", "101", "101", "101"), IDA = c("1", "1", "2", "3", "4","5", "5", "1859", "1860", "1861"), DATE = structure(c(1300928400,1277946000, 1277946000, 1278550800, 1278550800, 1453770000, 1329958800,1506474000, 1485133200, 1485133200), tzone = "UTC", class = c("POSIXct","POSIXt")), NR = c("CH-0001", "CH-0001","CH-0002", "CH-0003", "CH-0004", "CH-0005","CH-0005", "CH-1859", "CH-1860", "CH-1861"), PAT = c("101-1", "101-1", "101-2", "101-3", "101-4", "101-5","101-5", "101-1859", "101-1860", "101-1861"), INT1 = c(245005,280040, 280040, 280040, 280040, 240040, 240040, NA, NA, NA),INT2 = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), INT3 = c(NA_real_,NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,NA_real_, NA_real_, NA_real_), INT4 = c(NA_real_, NA_real_,NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,NA_real_, NA_real_), INTX1 = c(NA_real_, NA_real_, NA_real_,NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,NA_real_), INTX2 = c(NA_real_, NA_real_, NA_real_, NA_real_,NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), INTX173 = c(NA_real_, NA_real_, NA_real_, NA_real_,NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), INTX4 = c(NA_real_, NA_real_, NA_real_, NA_real_,NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), KAT = c(0, 0, 0, 0, 0, 0, 0, 1, 1, 1)), row.names = c(NA,-10L), class = c("tbl_df", "tbl", "data.frame"))
longDATA <- DATA %>%
pivot_longer(cols = c('INT1':'INTX4'),
names_to = "INT", values_to = "Code") %>%
mutate(
palm = case_when(
Code == 210025 ~ 1,
TRUE ~ 0),
bio = case_when(
Code == 210015 ~ '12.06.25',
Code == 210020 ~ '12.07.25',
Code == 275015 ~ '12.06.25',
Code == 275020 ~ '12.07.25',
TRUE ~ ''
),
EPX = case_when(
Code == 280005 ~ 1,
Code == 280010 ~ 1,
TRUE ~ 0
)
) %>%
pivot_wider(names_from = INT,
values_from = Code)
longDATA
#> # A tibble: 10 × 17
#> ID IDA DATE NR PAT KAT palm bio EPX INT1
#> <chr> <chr> <dttm> <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl>
#> 1 101 1 2011-03-24 01:00:00 CH-0001 101-1 0 0 "" 0 245005
#> 2 101 1 2010-07-01 01:00:00 CH-0001 101-1 0 0 "" 0 280040
#> 3 101 2 2010-07-01 01:00:00 CH-0002 101-2 0 0 "" 0 280040
#> 4 101 3 2010-07-08 01:00:00 CH-0003 101-3 0 0 "" 0 280040
#> 5 101 4 2010-07-08 01:00:00 CH-0004 101-4 0 0 "" 0 280040
#> 6 101 5 2016-01-26 01:00:00 CH-0005 101-5 0 0 "" 0 240040
#> 7 101 5 2012-02-23 01:00:00 CH-0005 101-5 0 0 "" 0 240040
#> 8 101 1859 2017-09-27 01:00:00 CH-1859 101-1… 1 0 "" 0 NA
#> 9 101 1860 2017-01-23 01:00:00 CH-1860 101-1… 1 0 "" 0 NA
#> 10 101 1861 2017-01-23 01:00:00 CH-1861 101-1… 1 0 "" 0 NA
#> # … with 7 more variables: INT2 <dbl>, INT3 <dbl>, INT4 <dbl>, INTX1 <dbl>,
#> # INTX2 <dbl>, INTX173 <dbl>, INTX4 <dbl>
Created on 2022-12-12 with reprex v2.0.2
Try this, check where I noted #fix
library(dplyr)
library(tidyr)
longDATA <- DATA %>%
pivot_longer(cols = c('INT1':'INTX4'),
names_to = "INT", values_to = "Code")
longDATA1 <- longDATA %>% mutate(palm = case_when(Code == 210025 ~ 1,
TRUE ~ NA_real_)) #fix
longDATA2 <- longDATA1 %>% mutate(bio = case_when(Code == 210015 ~ '12.06.25',
Code == 210020 ~ '12.07.25',
Code == 275015 ~ '12.06.25',
Code == 275020 ~ '12.07.25',
TRUE ~ NA_character_))#fix
longDATA3 <- longDATA2 %>% mutate(EPX = case_when(Code == 280005 ~ 1,
Code == 280010 ~ 1,
TRUE ~ NA_real_))#fix
WideDATA <- longDATA3 %>% pivot_wider(id_cols = ID:KAT, #fix
names_from = INT, values_from = Code)
#########
# Check #
#########
nrow(DATA)
#> [1] 50
nrow(WideDATA)
#> [1] 50
I have a shiny application with a login interface. I need to subset data based on the user login. Within the data, I have a column which marks the data whether the data belongs to them. The column is called "memberstate" and essentially contains the login username. The code I am tyring to use is borrowed from R Studio and is as follows:
user <- reactive({
session$user
})
### Code to manage row level security
isManager <- reactive({
if (user() == "manager"){
return(TRUE)
} else{
return(FALSE)
}
})
# Based on the logged in user, pull out only the data this user
# should be able to see.
data <- read.csv("data/DHA&PPTabletsClean.csv")
myData <- reactive({
if (isManager()){
# If a manager, show everything.
return(data)
} else{
# If a Member State, only show their own data.
return(data[data$memberstate == user(),])
}
})
I then try and use MyData for plotting graphs. I am getting the following error message
"Error in Mydata: could not find function "Mydata" " I am a newbie to R Shiny. Kindly assist.
Part of the data is as follows:
dput(data)
structure(list(Brand = c("Malaril", "Malaril", "Malaril", "Malaril",
"Malaril", "Malaril", "Malaril", "Malaril", "Malaril", "Malaril",
"Malaril", "Malaril", "Malaril", "Malaril", "Malaril", "Malaril",
"Malaril", "Malaril", "Malaril", "Malaril", "Malaril", "Malaril",
"Malaril", "Malaril", "Malaril", "Malaril", "Malaril", "Malaril",
"Malaril", "Malaril", "Malaril", "Malaril", "Malaril", "Malaril"
), ActiveIngredient = c("Dihydroartemisinin", "Dihydroartemisinin",
"Dihydroartemisinin", "Dihydroartemisinin", "Dihydroartemisinin",
"Dihydroartemisinin", "Piperaquine Phosphate", "Piperaquine Phosphate",
"Piperaquine Phosphate", "Piperaquine Phosphate", "Piperaquine Phosphate",
"Piperaquine Phosphate", "Dihydroartemisinin", "Dihydroartemisinin",
"Dihydroartemisinin", "Dihydroartemisinin", "Dihydroartemisinin",
"Dihydroartemisinin", "Piperaquine Phosphate", "Piperaquine Phosphate",
"Piperaquine Phosphate", "Piperaquine Phosphate", "Piperaquine Phosphate",
"Piperaquine Phosphate", "Dihydroartemisinin", "Dihydroartemisinin",
"Dihydroartemisinin", "Dihydroartemisinin", "Dihydroartemisinin",
"Dihydroartemisinin", "Piperaquine Phosphate", "Piperaquine Phosphate",
"Piperaquine Phosphate", "Piperaquine Phosphate"), Assay = c(94.9,
94.9, 94.9, 94.9, 94.9, 94.9, 101.6, 101.6, 101.6, 101.6, 101.6,
101.6, 95, 95, 95, 95, 95, 95, 100.2, 100.2, 100.2, 100.2, 100.2,
100.2, 96.4, 96.4, 96.4, 96.4, 96.4, 96.4, 100.6, 100.6, 100.6,
100.6), Assayperc = c(0.949, 0.949, 0.949, 0.949, 0.949, 0.949,
1.016, 1.016, 1.016, 1.016, 1.016, 1.016, 0.95, 0.95, 0.95, 0.95,
0.95, 0.95, 1.002, 1.002, 1.002, 1.002, 1.002, 1.002, 0.965,
0.965, 0.965, 0.965, 0.965, 0.965, 1.006, 1.006, 1.006, 1.006
), AssayLL = c(90L, 90L, 90L, 90L, 90L, 90L, 93L, 93L, 93L, 93L,
93L, 93L, 90L, 90L, 90L, 90L, 90L, 90L, 93L, 93L, 93L, 93L, 93L,
93L, 90L, 90L, 90L, 90L, 90L, 90L, 93L, 93L, 93L, 93L), AssaypercLL = c(0.9,
0.9, 0.9, 0.9, 0.9, 0.9, 0.93, 0.93, 0.93, 0.93, 0.93, 0.93,
0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.93, 0.93, 0.93, 0.93, 0.93, 0.93,
0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.93, 0.93, 0.93, 0.93), AssayUL = c(110L,
110L, 110L, 110L, 110L, 110L, 107L, 107L, 107L, 107L, 107L, 107L,
110L, 110L, 110L, 110L, 110L, 110L, 107L, 107L, 107L, 107L, 107L,
107L, 110L, 110L, 110L, 110L, 110L, 110L, 107L, 107L, 107L, 107L
), AssaypercUL = c(1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.07, 1.07,
1.07, 1.07, 1.07, 1.07, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.07, 1.07,
1.07, 1.07, 1.07, 1.07, 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.07, 1.07,
1.07, 1.07), DateManufacture = c("1/10/2017", "1/10/2017", "1/10/2017",
"1/10/2017", "1/10/2017", "1/10/2017", "1/10/2017", "1/10/2017",
"1/10/2017", "1/10/2017", "1/10/2017", "1/10/2017", "1/6/2018",
"1/6/2018", "1/6/2018", "1/6/2018", "1/6/2018", "1/6/2018", "1/6/2018",
"1/6/2018", "1/6/2018", "1/6/2018", "1/6/2018", "1/6/2018", "1/8/2018",
"1/8/2018", "1/8/2018", "1/8/2018", "1/8/2018", "1/8/2018", "1/8/2018",
"1/8/2018", "1/8/2018", "1/8/2018"), ExpiryDate = c("1/9/2019",
"1/9/2019", "1/9/2019", "1/9/2019", "1/9/2019", "1/9/2019", "1/9/2019",
"1/9/2019", "1/9/2019", "1/9/2019", "1/9/2019", "1/9/2019", "1/5/2020",
"1/5/2020", "1/5/2020", "1/5/2020", "1/5/2020", "1/5/2020", "1/5/2020",
"1/5/2020", "1/5/2020", "1/5/2020", "1/5/2020", "1/5/2020", "1/7/2020",
"1/7/2020", "1/7/2020", "1/7/2020", "1/7/2020", "1/7/2020", "1/7/2020",
"1/7/2020", "1/7/2020", "1/7/2020"), ShelfLifeYrs = c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), ShelfLifeDysRecpt = c(-547L, -547L, -547L, -547L, -547L, -547L,
-547L, -547L, -547L, -547L, -547L, -547L, -304L, -304L, -304L,
-304L, -304L, -304L, -304L, -304L, -304L, -304L, -304L, -304L,
-243L, -243L, -243L, -243L, -243L, -243L, -243L, -243L, -243L,
-243L), DateReceiptSample = c("1/3/2021", "1/3/2021", "1/3/2021",
"1/3/2021", "1/3/2021", "1/3/2021", "1/3/2021", "1/3/2021", "1/3/2021",
"1/3/2021", "1/3/2021", "1/3/2021", "1/3/2021", "1/3/2021", "1/3/2021",
"1/3/2021", "1/3/2021", "1/3/2021", "1/3/2021", "1/3/2021", "1/3/2021",
"1/3/2021", "1/3/2021", "1/3/2021", "1/3/2021", "1/3/2021", "1/3/2021",
"1/3/2021", "1/3/2021", "1/3/2021", "1/3/2021", "1/3/2021", "1/3/2021",
"1/3/2021"), COADateIssue = c("27/5/2021", "27/5/2021", "27/5/2021",
"27/5/2021", "27/5/2021", "27/5/2021", "27/5/2021", "27/5/2021",
"27/5/2021", "27/5/2021", "27/5/2021", "27/5/2021", "27/5/2021",
"27/5/2021", "27/5/2021", "27/5/2021", "27/5/2021", "27/5/2021",
"27/5/2021", "27/5/2021", "27/5/2021", "27/5/2021", "27/5/2021",
"27/5/2021", "27/5/2021", "27/5/2021", "27/5/2021", "27/5/2021",
"27/5/2021", "27/5/2021", "27/5/2021", "27/5/2021", "27/5/2021",
"27/5/2021"), TestingOutcome = c("Pass", "Pass", "Pass", "Pass",
"Pass", "Pass", "Pass", "Pass", "Pass", "Pass", "Pass", "Pass",
"Fail", "Fail", "Fail", "Fail", "Fail", "Fail", "Fail", "Fail",
"Fail", "Fail", "Fail", "Fail", "Pass", "Pass", "Pass", "Pass",
"Pass", "Pass", "Pass", "Pass", "Pass", "Pass"), FailureReason = c("",
"", "", "", "", "", "", "", "", "", "", "", "Dihydroartemisinin Dissolution",
"Dihydroartemisinin Dissolution", "Dihydroartemisinin Dissolution",
"Dihydroartemisinin Dissolution", "Dihydroartemisinin Dissolution",
"Dihydroartemisinin Dissolution", "Dihydroartemisinin Dissolution",
"Dihydroartemisinin Dissolution", "Dihydroartemisinin Dissolution",
"Dihydroartemisinin Dissolution", "Dihydroartemisinin Dissolution",
"Dihydroartemisinin Dissolution", "", "", "", "", "", "", "",
"", "", ""), Dissolution = c(77L, 81L, 84L, 86L, 82L, 81L, 100L,
96L, 98L, 101L, 97L, 102L, 62L, 59L, 62L, 66L, 65L, 61L, 99L,
95L, 97L, 103L, 99L, 102L, 97L, 80L, 81L, 86L, 80L, 80L, 103L,
101L, 101L, 101L), Dissolutionperc = c(0.77, 0.81, 0.84, 0.86,
0.82, 0.81, 1, 0.96, 0.98, 1.01, 0.97, 1.02, 0.62, 0.59, 0.62,
0.66, 0.65, 0.61, 0.99, 0.95, 0.97, 1.03, 0.99, 1.02, 0.97, 0.8,
0.81, 0.86, 0.8, 0.8, 1.03, 1.01, 1.01, 1.01), DissolLL = c(70L,
70L, 70L, 70L, 70L, 70L, 80L, 80L, 80L, 80L, 80L, 80L, 70L, 70L,
70L, 70L, 70L, 70L, 80L, 80L, 80L, 80L, 80L, 80L, 70L, 70L, 70L,
70L, 70L, 70L, 80L, 80L, 80L, 80L), DissolutionpercLL = c(0.7,
0.7, 0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.7, 0.7,
0.7, 0.7, 0.7, 0.7, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.7, 0.7, 0.7,
0.7, 0.7, 0.7, 0.8, 0.8, 0.8, 0.8), Mass = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), pH = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA), Dosageform = c("Tablet", "Tablet", "Tablet", "Tablet", "Tablet",
"Tablet", "Tablet", "Tablet", "Tablet", "Tablet", "Tablet", "Tablet",
"Tablet", "Tablet", "Tablet", "Tablet", "Tablet", "Tablet", "Tablet",
"Tablet", "Tablet", "Tablet", "Tablet", "Tablet", "Tablet", "Tablet",
"Tablet", "Tablet", "Tablet", "Tablet", "Tablet", "Tablet", "Tablet",
"Tablet"), Therapeuticclass = c("Antimalarial", "Antimalarial",
"Antimalarial", "Antimalarial", "Antimalarial", "Antimalarial",
"Antimalarial", "Antimalarial", "Antimalarial", "Antimalarial",
"Antimalarial", "Antimalarial", "Antimalarial", "Antimalarial",
"Antimalarial", "Antimalarial", "Antimalarial", "Antimalarial",
"Antimalarial", "Antimalarial", "Antimalarial", "Antimalarial",
"Antimalarial", "Antimalarial", "Antimalarial", "Antimalarial",
"Antimalarial", "Antimalarial", "Antimalarial", "Antimalarial",
"Antimalarial", "Antimalarial", "Antimalarial", "Antimalarial"
), memberstate = c("", "", "", "", "", "ruvimbo", "ruvimbo",
"ruvimbo", "ruvimbo", "ruvimbo", "ruvimbo", "ruvimbo", "ruvimbo",
"ruvimbo", "ruvimbo", "ruvimbo", "ruvimbo", "ruvimbo", "ruvimbo",
"ruvimbo", "ruvimbo", "ruvimbo", "", "", "", "", "", "", "",
"", "", "", "", "")), class = "data.frame", row.names = c(NA,
-34L))
Regards
Chris