Skip to content

Commit 9ff0461

Browse files
committed
Test new data against dataset, curr: prints in rStudio console
1 parent 02cc825 commit 9ff0461

File tree

5 files changed

+126
-31
lines changed

5 files changed

+126
-31
lines changed

NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ export(generate_allelemtach_profile_plot)
77
export(get_id)
88
export(handle_multimatch)
99
export(import_data)
10+
export(match_new_data)

R/app.R

+59-17
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,14 @@ ui <- shiny::fluidPage(
4040

4141
shiny::tags$hr(),
4242
# Select allele mismatch value
43-
shiny::textInput(inputId = "alleleMismatchValue", label = "Allowed Allele-mismatch", value = "3"),
43+
shiny::numericInput(inputId = "alleleMismatchValue", label = "Allowed Allele-mismatch", value = 3, min = 0, step = 1),
4444
# If the user asks for the plot, generate it and show it
4545
shiny::conditionalPanel(condition = "input.generateAllelematchProfile >= 1",
4646
plotOutput(outputId = "allelematchProfilePlot"),
4747
),
48-
shiny::conditionalPanel(condition = "input.generateAllelematchProfile == 0",
49-
actionButton(inputId = "generateAllelematchProfile", "Generate Mismatch Plot"),
50-
),
51-
52-
h4("Type the column name of the specified columns."),
53-
h5("If header is deseleted, type the indexes of the columns."),
48+
shiny::actionButton(inputId = "generateAllelematchProfile", "Generate Mismatch Plot"),
49+
shiny::h4("Type the column name of the specified columns."),
50+
shiny::h5("If header is deseleted, type the indexes of the columns."),
5451

5552

5653
# Select Index Column
@@ -87,7 +84,7 @@ ui <- shiny::fluidPage(
8784
# Allow the user to select and handle all of the multiple matches that occured
8885
div(h4("Handle Multiple Matches")),
8986

90-
textInput(inputId = "multipleMatchIndex", label = "View Details (Index of Multiple Matched Sample): ", placeholder = "tex 2"),
87+
shiny::numericInput(inputId = "multipleMatchIndex", label = "View Details (Index of Multiple Matched Sample): ", value = 0, min = 0, step = 1),
9188
DT::dataTableOutput("multipleMatchesTable"),
9289

9390
# TODO:: Allow the user to handle these (similar to matching new data)
@@ -128,9 +125,11 @@ ui <- shiny::fluidPage(
128125
shiny::conditionalPanel(condition = "input.new_data_mode == 'single'",
129126
shiny::textInput(inputId = "new_data_index", label = "Index: "),
130127
shiny::dateInput(inputId = "new_data_date", label = "Date: "),
131-
shiny::textInput(inputId = "new_data_nornt", label = "North: "),
128+
shiny::textInput(inputId = "new_data_north", label = "North: "),
132129
shiny::textInput(inputId = "new_data_east", label = "East: "),
130+
shiny::h5(shiny::textOutput(outputId = "currentGenderStyle")),
133131
shiny::textInput(inputId = "new_data_gender", label = "Gender: "),
132+
shiny::h5("Make sure the order is the same as the rest of the data, in alignment with the order given to the right."),
134133
shiny::textInput(inputId = "new_data_locus", label = "Locus (separated by ' '):")
135134
),
136135
# If multiple is choosen, open those options
@@ -146,7 +145,7 @@ ui <- shiny::fluidPage(
146145
),
147146
shiny::tags$hr(),
148147
# How many mismatchs to allow when mathcing new data to the rest of the dataset
149-
shiny::textInput(inputId = "new_data_mismatch", label = "Mismatch For New Data"),
148+
shiny::numericInput(inputId = "new_data_mismatch", label = "Mismatch For New Data", value = 3, min = 0, step = 1),
150149
# Load the file or strings into data and compare with the dataset
151150
shiny::actionButton(inputId = "search_new_data", label = "Match New Data To Dataset"),
152151
shiny::tags$hr(),
@@ -176,10 +175,13 @@ server <- function(input, output, session) {
176175
})
177176

178177
groupIndividuals <- function() {
178+
req(input$file1)
179179
req(as.numeric(input$alleleMismatchValue))
180180

181181
# Reload the data incase the colmn-names have changed
182-
load_main_data()
182+
c(data_temp, am_data_temp) %<-% load_main_data(input$file1$datapath)
183+
data <<- data_temp
184+
am_data <<- am_data_temp
183185

184186
# Unpack the different data returned by our wrapper of allelematch into temp variables
185187
c(search_data_temp, multiple_matches_temp, unclassified_temp) %<-% GenotypeCheck::create_search_data(data, am_data, as.numeric(input$alleleMismatchValue))
@@ -190,9 +192,8 @@ server <- function(input, output, session) {
190192
unclassified <<- unclassified_temp
191193
}
192194

193-
load_main_data <- function() {
194-
req(input$file1)
195-
195+
# load the main data file
196+
load_main_data <- function(file) {
196197
# Read the locus data from the ui
197198
locus_columns <- strsplit(input$locusColumnNames, ",")[[1]]
198199

@@ -211,10 +212,12 @@ server <- function(input, output, session) {
211212
}
212213

213214
# Load the data, this will be the meta data
214-
data <<- GenotypeCheck::import_data(input$file1$datapath, index_column = index_column, additional_data = additional_data, locus_names = locus_columns)
215+
data <- GenotypeCheck::import_data(file, index_column = index_column, additional_data = additional_data, locus_names = locus_columns)
215216

216217
# Create allaematch dataset, ignore some meta-data as it can be read from the "data" above, the index (SEP) is the same
217-
am_data <<- GenotypeCheck::create_allelematch_dataset(data, ignore_columns = names(additional_data))
218+
am_data <- GenotypeCheck::create_allelematch_dataset(data, ignore_columns = names(additional_data))
219+
220+
list(data, am_data)
218221
}
219222

220223
update_output_preprocess_data <- function() {
@@ -247,6 +250,10 @@ server <- function(input, output, session) {
247250
output$amtUnclassified <- renderText(
248251
paste0("There were: ", length(unclassified$index), " samples that were unclassified.")
249252
)
253+
254+
output$currentGenderStyle <- renderText(
255+
paste("The datasets gender-style is: ", paste0(data$gender[!duplicated(data$gender)], sep = ", ", collapse = ""))
256+
)
250257
}
251258

252259
# Observe when user types an index to view details
@@ -339,13 +346,48 @@ server <- function(input, output, session) {
339346
req(input$file1)
340347

341348
# Reload the data incase teh colmnnames have changed
342-
load_main_data()
349+
c(data_temp, am_data_temp) %<-% load_main_data(input$file1$datapath)
350+
data <<- data_temp
351+
am_data <<- am_data_temp
343352

344353
# Render the plot to the ui
345354
output$allelematchProfilePlot <- shiny::renderPlot({
346355
GenotypeCheck::generate_allelemtach_profile_plot(am_data)
347356
})
348357
})
358+
359+
shiny::observeEvent(input$search_new_data, {
360+
if (input$new_data_mode == "single") {
361+
# Make sure the essential data is given, the rest is meta-data and it would be annoying if it were required
362+
req(input$new_data_index)
363+
req(input$new_data_locus)
364+
req(input$new_data_mismatch)
365+
366+
# Read the locus data from the ui
367+
locus_columns <- strsplit(input$locusColumnNames, ",")[[1]]
368+
369+
# Split the locus string and name the columns accordingly in the same order that have been given in the panel to the right
370+
# Order is important here
371+
multilocus <- strsplit(input$new_data_locus, " ")[[1]]
372+
373+
names(multilocus) <- locus_columns
374+
375+
# Create the new data, a dataframe with one row
376+
new_data <- data.frame(list(index = input$new_data_index), date = input$new_data_date, north = input$new_data_north,
377+
east = input$new_data_east, gender = input$new_data_gender) %>%
378+
cbind(data.frame(as.list(multilocus)))
379+
} else if (input$new_data_mode == "multiple") {
380+
# If a file is given, use the already exsiting function to load and parse it according to the specifications on the right
381+
c(new_data, new_am_data) %<-% load_main_data(input$new_data_file$datapath)
382+
}
383+
# Get the search_data-type of data for the new data
384+
c(new_search_data, new_multiple_match, new_unclassified) %<-% GenotypeCheck::match_new_data(data = data, new_data = new_data, additional_data_columns = names(additional_data), allele_mismatch = input$new_data_mismatch)
385+
386+
# DEBUG: Temp
387+
print(new_search_data)
388+
print(new_multiple_match)
389+
print(new_unclassified)
390+
})
349391
}
350392

351393
# Create a shiny app

R/import_data.R

+43-5
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,22 @@ create_search_data <- function(data, am_data, allele_mismatch) {
8181
# Group the samples together to form individuals
8282
am_unique <- allelematch::amUnique(am_data, alleleMismatch = allele_mismatch)
8383

84-
# Go through the data and create a large data.frame with all the prevoius samples, adding a column for the individ_id
8584
ind <- 0
85+
# If a override id is specified, get the largest override id and add ind from that
86+
if (!is.null(data$preset_ind)) {
87+
# Convert everything to a number
88+
preset_num <- as.numeric(data[,"preset_ind"])
89+
# Remove everything that is not a number, out generated ind cannot accidentaly assume the same value
90+
preset_num <- preset_num[!is.na(preset_num)]
91+
# Start right after the biggest one found
92+
ind_biggest <- max(preset_num) + 1
93+
# If all override_ids were NA or not numbers, we can count from zero like usual
94+
if (is.na(ind_biggest)) {
95+
ind_biggest <- 0
96+
}
97+
}
98+
99+
# Go through the data and create a large data.frame with all the prevoius samples, adding a column for the individ_id
86100
search_data <- data.frame(index = character(), multilocus = character(), individ_id = character())
87101

88102
for (pair in am_unique$pairwise) {
@@ -97,10 +111,7 @@ create_search_data <- function(data, am_data, allele_mismatch) {
97111

98112
# If a override_id column is specified, write it to the new column
99113
if (!is.null(data$preset_ind)) {
100-
for (ind in 1:length(search_data$index)) {
101-
new_id <- data[search_data$index[[ind]],"preset_ind"]
102-
search_data$override_id[[ind]] <- new_id
103-
}
114+
search_data$override_id <- data[search_data$index,"preset_ind"]
104115
}
105116

106117
# The multiple matches that have been handled by the user previously and is now in the file
@@ -211,3 +222,30 @@ handle_multimatch <- function(search_data, multiple_matches, multimatch_index, n
211222
# Return the updated data
212223
list(search_data, multiple_matches)
213224
}
225+
226+
#' Title
227+
#'
228+
#' @param data The old data, containing all samples that have already been handeled, or been choosen not to be handleh
229+
#' @param new_data The new data, the meta-data for the new sample, containing the same columns as the old data, index, locus, date, gender, etc
230+
#' @param additional_data_columns The names of the columns containing the meta-data for the \code{\link{create_allelematch_dataset}} to be able to create a new am_dataset
231+
#' @param allele_mismatch The mismatch (numbers of alleles that are allowed to differ between individuals) to use for this grouping, user could be intrested in being more strict or more loose, up to them
232+
#'
233+
#' @return Returns only the new data grouped into search_data format, which of them that matched multiple and if some of them were unclassified
234+
#' @export
235+
#'
236+
#' @examples
237+
match_new_data <- function(data, new_data, additional_data_columns, allele_mismatch) {
238+
# Combine the data under each other, create a big data.frame
239+
print(head(data))
240+
print(head(new_data))
241+
combined_data <- rbind(data, new_data)
242+
# Create an amDataset to be able to run the grouping on all of the data
243+
am_data <- create_allelematch_dataset(data = combined_data, ignore_columns = additional_data_columns)
244+
# Get the combined data for all samples, every index and group, including which fitted into multiple and which were unclassified
245+
c(combined_search_data, combined_multiple_matches, combined_unclassified) %<-% create_search_data(data = combined_data,
246+
am_data = am_data, allele_mismatch = allele_mismatch)
247+
# Return the data gathered but filter to only keep the entries whos indexes are not in the "old" data. Thereby only passing on the new data but sorted with individ_id, if they matched multiple and if they were unclassified
248+
list(combined_search_data[!combined_search_data$index %in% data$index,],
249+
combined_multiple_matches[!combined_multiple_matches %in% data$index],
250+
combined_unclassified[!combined_unclassified$index %in% data$index])
251+
}

man/AC_allKorr.Rd

-9
Large diffs are not rendered by default.

man/match_new_data.Rd

+23
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)