Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Column Renaming for Selected Columns #9282

Merged
31 changes: 25 additions & 6 deletions instat/dlgName.vb
Original file line number Diff line number Diff line change
@@ -33,6 +33,7 @@ Public Class dlgName
Private clsStartwithFunction, clsEndswithFunction, clsMatchesFunction, clsContainsFunction As New RFunction
Private WithEvents grdCurrentWorkSheet As Worksheet
Private dctRowsNewNameChanged As New Dictionary(Of Integer, String)
Private dctRowsCurrentName As New Dictionary(Of Integer, String)
Private dctRowsNewLabelChanged As New Dictionary(Of Integer, String)
Private dctNameRowsValues As New Dictionary(Of Integer, String)
Private dctCaseOptions As New Dictionary(Of String, String)
@@ -94,11 +95,9 @@ Public Class dlgName
ucrPnlCase.AddRadioButton(rdoAbbreviate, "abbreviate")
ucrPnlCase.AddRadioButton(rdoReplace, "stringr::str_replace")

ucrPnlSelectData.SetParameter(New RParameter("data", 0))
ucrPnlSelectData.AddRadioButton(rdoWholeDataFrame)
ucrPnlSelectData.AddRadioButton(rdoSelectedColumn)
ucrPnlSelectData.AddParameterValuesCondition(rdoWholeDataFrame, "checked", "whole")
ucrPnlSelectData.AddParameterValuesCondition(rdoSelectedColumn, "checked", "selected")
ucrPnlSelectData.SetParameter(New RParameter("checked", 1))
ucrPnlSelectData.AddRadioButton(rdoWholeDataFrame, "whole")
ucrPnlSelectData.AddRadioButton(rdoSelectedColumn, "selected")

ucrReceiverColumns.SetParameter(New RParameter(".cols", 6))
ucrReceiverColumns.Selector = ucrSelectVariables
@@ -175,6 +174,7 @@ Public Class dlgName
ucrSelectVariables.Reset()
dctRowsNewNameChanged.Clear()
dctRowsNewLabelChanged.Clear()
dctRowsCurrentName.Clear()
bCurrentCell = False
clsNewColNameDataframeFunction.SetRCommand("data.frame")

@@ -327,10 +327,14 @@ Public Class dlgName
If e.Range.Rows > 1 Then
For iRow As Integer = iStartRowIndex To grdCurrentWorkSheet.SelectionRange.EndRow
Dim strNewData As String = ValidateRVariable(grdCurrentWorkSheet.GetCellData(row:=iRow, col:=iColIndex), iColIndex)
Dim strOldData As String = grdCurrentWorkSheet.GetCellData(row:=iRow, col:=0)
GetOldNames(iRow, strOldData)
RenameColumns(strNewData, iRow, iColIndex)
Next
Else
Dim strNewData As String = ValidateRVariable(grdCurrentWorkSheet.GetCellData(row:=e.Range.Row, col:=iColIndex), iColIndex)
Dim strOldData As String = grdCurrentWorkSheet.GetCellData(row:=e.Range.Row, col:=0)
GetOldNames(e.Range.Row, strOldData)
RenameColumns(strNewData, iStartRowIndex, iColIndex)
End If
ValidateNamesFromDictionary(iColIndex)
@@ -380,18 +384,30 @@ Public Class dlgName
Private Sub grdCurrSheet_AfterCellEdit(sender As Object, e As CellAfterEditEventArgs) Handles grdCurrentWorkSheet.AfterCellEdit
Dim iCol As Integer = e.Cell.Column
Dim strNewData As String = ValidateRVariable(e.NewData, iCol)

Dim strFirstColumnData As String = grdCurrentWorkSheet(e.Cell.Row, 0).ToString()

GetOldNames(e.Cell.Row, strFirstColumnData)
RenameColumns(strNewData, e.Cell.Row, iCol)
ValidateNamesFromDictionary(iCol)
End Sub

Private Sub GetOldNames(iRow As Integer, strOldName As String)
If Not dctRowsCurrentName.ContainsKey(iRow) Then
dctRowsCurrentName.Add(iRow, strOldName)
Else
dctRowsCurrentName(iRow) = strOldName
End If
End Sub

Private Sub GetVariables(strNewData As String, iRowIndex As Integer, iColIndex As Integer)
If rdoMultiple.Checked Then
If iColIndex = 1 Then
If strNewData <> "" Then
AddChangedNewNameRows(iRowIndex, strNewData)

clsNewColNameDataframeFunction.AddParameter("cols", GetValuesAsVector(dctRowsNewNameChanged), iPosition:=0)
clsNewColNameDataframeFunction.AddParameter("index", "c(" & String.Join(",", dctRowsNewNameChanged.Keys.ToArray) & ")", iPosition:=1)
clsNewColNameDataframeFunction.AddParameter("old_names", GetValuesAsVector(dctRowsCurrentName), iPosition:=1)
clsDefaultRFunction.AddParameter("new_column_names_df", clsRFunctionParameter:=clsNewColNameDataframeFunction, iPosition:=8)
Else
clsNewColNameDataframeFunction.RemoveParameterByName("cols")
@@ -652,6 +668,8 @@ Public Class dlgName
Dim parsedValue As Boolean
Dim strNewData As String = ValidateRVariable(e.Text, iCol)
If Not strNewData.ToLower.Equals("t") AndAlso Not strNewData.ToLower.Equals("f") AndAlso Not IsNumeric(strNewData) AndAlso Not Boolean.TryParse(strNewData, parsedValue) Then
Dim strFirstColumnData As String = grdCurrentWorkSheet(e.Cell.Row, 0).ToString()
GetOldNames(e.Cell.Row, strFirstColumnData)
RenameColumns(strNewData, e.Cell.Row, iCol)
ValidateNamesFromDictionary(iCol)
End If
@@ -677,6 +695,7 @@ Public Class dlgName
Case "Contains"
clsDefaultRFunction.AddParameter(".cols", clsRFunctionParameter:=clsContainsFunction, iPosition:=3)
End Select

Else
clsDefaultRFunction.RemoveParameterByName("pattern")
clsDefaultRFunction.RemoveParameterByName("replacement")
76 changes: 72 additions & 4 deletions instat/static/InstatObject/R/data_object_R6.R
Original file line number Diff line number Diff line change
@@ -921,6 +921,40 @@ DataSheet$set("public", "cor", function(x_col_names, y_col_name, use = "everythi
}
)

DataSheet$set("public", "update_selection", function(new_values, column_selection_name = NULL) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since you set it to be an error if column_selection_name = NULL, then you can just set column_selection_name here, and run it like you have run new_values (i.e., with if(missing(column_selection_name)) ... - unless I'm missing something?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

if(missing(new_values)) stop("new_values is required")

# Find the column selection to update
if (is.null(column_selection_name)) {
stop("A column selection name must be provided to update the selection.")
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some inconsistency here. In the first instance you refer to the parameter name which is missing, in the second you refer to the objective of the missing parameter name.

Can you be consistent with this error message between them both (e.g., either say "Parameter for new values is missing", or "column_selection_name is required").

If this is code that the user wouldn't look at, then saying the parameter name makes sense. If this is code that the user (who may not be versed in R) will look at, then saying a statement like your second one makes more sense?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


column_selection_obj <- private$column_selections[[column_selection_name]]

if (is.null(column_selection_obj)) {
stop("No column selection found with the name: ", column_selection_name)
}

# Update conditions in the column selection with new values
updated_conditions <- lapply(column_selection_obj$conditions, function(condition) {
# Check if the parameters exist and replace them with new values
if ("parameters" %in% names(condition)) {
condition$parameters$x <- new_values
}
return(condition)
})

# Update the column selection object with the new conditions
column_selection_obj$conditions <- updated_conditions
private$column_selections[[column_selection_name]] <- column_selection_obj

# Optionally, mark data as changed
self$data_changed <- TRUE

message("Column selection '", column_selection_name, "' updated successfully with new values.")
})


DataSheet$set("public", "rename_column_in_data", function(curr_col_name = "", new_col_name = "", label = "", type = "single", .fn, .cols = everything(), new_column_names_df, new_labels_df, ...) {
curr_data <- self$get_data_frame(use_current_filter = FALSE, use_column_selection = FALSE)

@@ -954,9 +988,20 @@ DataSheet$set("public", "rename_column_in_data", function(curr_col_name = "", ne
purrr::map(.x = keys_to_delete, .f = ~self$remove_key(key_name = names(active_keys[.x])))
}
}
if(self$column_selection_applied()) self$remove_current_column_selection()
#if(self$column_selection_applied()) self$remove_current_column_selection()
# Need to use private$data here because changing names of data field
names(private$data)[names(curr_data) == curr_col_name] <- new_col_name

column_names <- self$get_column_names()

if (any(is.na(column_names))) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and here

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

column_names[is.na(column_names)] <- new_col_name
} else {
column_names <- new_col_name
}

self$update_selection(column_names, private$.current_column_selection$name)

self$append_to_variables_metadata(new_col_name, name_label, new_col_name)
# TODO decide if we need to do these 2 lines
self$append_to_changes(list(Renamed_col, curr_col_name, new_col_name))
@@ -971,12 +1016,23 @@ DataSheet$set("public", "rename_column_in_data", function(curr_col_name = "", ne
} else if (type == "multiple") {
if (!missing(new_column_names_df)) {
new_col_names <- new_column_names_df[, 1]
cols_changed_index <- new_column_names_df[, 2]
cols_changed_index <- which(names(private$data) %in% new_column_names_df[, 2])
curr_col_names <- names(private$data)
curr_col_names[cols_changed_index] <- new_col_names
if(any(duplicated(curr_col_names))) stop("Cannot rename columns. Column names must be unique.")
if(self$column_selection_applied()) self$remove_current_column_selection()
#if(self$column_selection_applied()) self$remove_current_column_selection()
names(private$data)[cols_changed_index] <- new_col_names

column_names <- self$get_column_names()

if (any(is.na(column_names))) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@N-thony can you change the any(is.na(...)) to anyNA as per @Patowhiz's suggestion?
It should perform the same, but be a fraction quicker and easier to read

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

column_names[is.na(column_names)] <- new_col_names
} else {
column_names <- new_col_names
}

self$update_selection(column_names, private$.current_column_selection$name)

for (i in seq_along(cols_changed_index)) {
self$append_to_variables_metadata(new_col_names[i], name_label, new_col_names[i])
}
@@ -995,19 +1051,31 @@ DataSheet$set("public", "rename_column_in_data", function(curr_col_name = "", ne
} else if (type == "rename_with") {
if (missing(.fn)) stop(.fn, "is missing with no default.")
curr_col_names <- names(curr_data)
column_names <- self$get_column_names()
private$data <- curr_data |>

dplyr::rename_with(
.fn = .fn,
.cols = {{ .cols }}, ...
)
if(self$column_selection_applied()) self$remove_current_column_selection()
new_col_names <- names(private$data)
if (!all(new_col_names %in% curr_col_names)) {
new_col_names <- new_col_names[!(new_col_names %in% curr_col_names)]

print(new_col_names)
for (i in seq_along(new_col_names)) {
self$append_to_variables_metadata(new_col_names[i], name_label, new_col_names[i])
}

column_names <- self$get_column_names()
if (any(is.na(column_names))) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

similarly here

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

column_names[is.na(column_names)] <- new_col_names
} else {
column_names <- new_col_names
}

self$update_selection(column_names, private$.current_column_selection$name)

self$data_changed <- TRUE
self$variables_metadata_changed <- TRUE
}