diff --git a/instat/Model/RCommand/clsPrepareFunctionsForGrids.vb b/instat/Model/RCommand/clsPrepareFunctionsForGrids.vb index 61c4d09ca69..534a6a2a037 100644 --- a/instat/Model/RCommand/clsPrepareFunctionsForGrids.vb +++ b/instat/Model/RCommand/clsPrepareFunctionsForGrids.vb @@ -124,6 +124,7 @@ Public Class clsPrepareFunctionsForGrids clsGetColumnsFromData.AddParameter("col_names", Chr(34) & strColumnName & Chr(34), iPosition:=1) clsGetColumnsFromData.AddParameter("use_current_filter", "FALSE", iPosition:=2) + clsNNonNumeric.SetPackageName("instatExtras") clsNNonNumeric.SetRCommand("n_non_numeric") clsNNonNumeric.AddParameter("x", clsRFunctionParameter:=clsGetColumnsFromData, iPosition:=0) expTemp = _RLink.RunInternalScriptGetValue(clsNNonNumeric.ToScript(), bSilent:=True) diff --git a/instat/clsFileUrlUtilities.vb b/instat/clsFileUrlUtilities.vb index 7a97243bcf3..ad8f7537b3a 100644 --- a/instat/clsFileUrlUtilities.vb +++ b/instat/clsFileUrlUtilities.vb @@ -50,6 +50,7 @@ Public Class clsFileUrlUtilities If bVignette Then Dim clsGetVignetteFunction As New RFunction + clsGetVignetteFunction.SetPackageName("instatExtras") clsGetVignetteFunction.SetRCommand("get_vignette") clsGetVignetteFunction.AddParameter("package", Chr(34) & strPackageName & Chr(34), iPosition:=0) strURL = frmMain.clsRLink.RunInternalScriptGetValue(clsGetVignetteFunction.ToScript(), bSeparateThread:=False).AsCharacter(0) diff --git a/instat/clsGridROperations.vb b/instat/clsGridROperations.vb index 2527e92c8e7..c19f3085ac2 100644 --- a/instat/clsGridROperations.vb +++ b/instat/clsGridROperations.vb @@ -52,6 +52,7 @@ Public Class GridROperations clsGetColumnsFromData.SetRCommand(frmMain.clsRLink.strInstatDataObject & "$get_columns_from_data") clsGetColumnsFromData.AddParameter("data_name", Chr(34) & strDataFrame & Chr(34), iPosition:=0) + clsNNonNumeric.SetPackageName("instatExtras") clsNNonNumeric.SetRCommand("n_non_numeric") clsConvertToNumeric.AddParameter("to_type", Chr(34) & "numeric" & Chr(34), iPosition:=2) diff --git a/instat/clsQualityControl.vb b/instat/clsQualityControl.vb index c12fd7a52e6..f25c813b8f0 100644 --- a/instat/clsQualityControl.vb +++ b/instat/clsQualityControl.vb @@ -66,7 +66,7 @@ Public Class clsQCJumpRCode clsPmaxFunction.bToScriptAsRString = True strCalcName = strlargestJump - clsJumpCalcFunction.SetRCommand("instat_calculation$new") + clsJumpCalcFunction.SetRCommand("instatCalculations::instat_calculation$new") clsJumpCalcFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsJumpCalcFunction.AddParameter("function_exp", clsRFunctionParameter:=clsPmaxFunction, iPosition:=1) clsJumpCalcFunction.AddParameter("result_name", Chr(34) & strCalcName & Chr(34), iPosition:=4) @@ -75,7 +75,7 @@ Public Class clsQCJumpRCode strTestName = strJumpTest clsJumpListFunc.SetRCommand("list") clsJumpListFunc.AddParameter("sub1", clsRFunctionParameter:=clsJumpCalcFunction, bIncludeArgumentName:=False) - clsJumpTestFunction.SetRCommand("instat_calculation$new") + clsJumpTestFunction.SetRCommand("instatCalculations::instat_calculation$new") clsJumpTestFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsJumpTestFunction.AddParameter("function_exp", clsROperatorParameter:=clsGreaterJumpOperator, iPosition:=1) clsJumpTestFunction.AddParameter("result_name", Chr(34) & strTestName & Chr(34), iPosition:=4) @@ -131,7 +131,7 @@ Public Class clsQCSameRCode clsDollarOperator.AddParameter("right", strParameterValue:=strLengths, bIncludeArgumentName:=False, iPosition:=1) strCalcName = strlargestSame - clsSameCalcFunction.SetRCommand("instat_calculation$new") + clsSameCalcFunction.SetRCommand("instatCalculations::instat_calculation$new") clsSameCalcFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsSameCalcFunction.AddParameter("function_exp", clsRFunctionParameter:=clsRepFunc, iPosition:=1) clsSameCalcFunction.AddParameter("result_name", Chr(34) & strCalcName & Chr(34), iPosition:=4) @@ -142,7 +142,7 @@ Public Class clsQCSameRCode clsSameListFunc.SetRCommand("list") clsSameListFunc.AddParameter("sub1", clsRFunctionParameter:=clsSameCalcFunction, bIncludeArgumentName:=False) - clsSameTestFunction.SetRCommand("instat_calculation$new") + clsSameTestFunction.SetRCommand("instatCalculations::instat_calculation$new") clsSameTestFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsSameTestFunction.AddParameter("function_exp", clsROperatorParameter:=clsSameGreaterOperator, iPosition:=1) clsSameTestFunction.AddParameter("result_name", Chr(34) & strTestName & Chr(34), iPosition:=4) @@ -179,7 +179,7 @@ Public Class clsQCDifferenceRCode clsDiffOperator.SetOperation("-") clsDiffOperator.bToScriptAsRString = True - clsDiffCalcFunction.SetRCommand("instat_calculation$new") + clsDiffCalcFunction.SetRCommand("instatCalculations::instat_calculation$new") clsDiffCalcFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsDiffCalcFunction.AddParameter("function_exp", clsROperatorParameter:=clsDiffOperator, iPosition:=1) clsDiffCalcFunction.AddParameter("result_name", Chr(34) & strDiffCalc & Chr(34), iPosition:=4) @@ -188,7 +188,7 @@ Public Class clsQCDifferenceRCode strTestName = strDiffTest clsListFunc.SetRCommand("list") clsListFunc.AddParameter("sub1", bIncludeArgumentName:=False, clsRFunctionParameter:=clsDiffCalcFunction, iPosition:=0) - clsDiffTestFunction.SetRCommand("instat_calculation$new") + clsDiffTestFunction.SetRCommand("instatCalculations::instat_calculation$new") clsDiffTestFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsDiffTestFunction.AddParameter("function_exp", clsROperatorParameter:=clsLessDiffOperator, iPosition:=1) clsDiffTestFunction.AddParameter("result_name", Chr(34) & strTestName & Chr(34), iPosition:=4) @@ -223,7 +223,7 @@ Public Class clsQCAcceptableRange clsRangeOrOperator.AddParameter("left", clsROperatorParameter:=clsLessEqualToOperator, iPosition:=0, bIncludeArgumentName:=False) clsRangeOrOperator.AddParameter("right", clsROperatorParameter:=clsGreaterEqualToOperator, iPosition:=1, bIncludeArgumentName:=False) - clsAcceptableRangeTestFunc.SetRCommand("instat_calculation$new") + clsAcceptableRangeTestFunc.SetRCommand("instatCalculations::instat_calculation$new") clsAcceptableRangeTestFunc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsAcceptableRangeTestFunc.AddParameter("function_exp", clsROperatorParameter:=clsRangeOrOperator, iPosition:=1) clsAcceptableRangeTestFunc.AddParameter("result_name", Chr(34) & strRangeName & Chr(34), iPosition:=4) @@ -265,7 +265,7 @@ Public Class clsQcOutliers strUpperTestName = strOutlierUpperTestCalcName strLowerTestName = strOutlierLowerTestCalcName - clsOutlierUpperLimitCalc.SetRCommand("instat_calculation$new") + clsOutlierUpperLimitCalc.SetRCommand("instatCalculations::instat_calculation$new") clsOutlierUpperLimitCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsOutlierUpperLimitCalc.AddParameter("function_exp", clsRFunctionParameter:=clsOutlierUpperLimitFunc, iPosition:=1) clsOutlierUpperLimitCalc.AddParameter("result_name", Chr(34) & strUpperCalcName & Chr(34), iPosition:=4) @@ -275,7 +275,7 @@ Public Class clsQcOutliers clsOutlierUpperLimitFunc.AddParameter("bupperlimit", "TRUE") clsOutlierUpperLimitFunc.bToScriptAsRString = True - clsOutlierLowerLimitCalc.SetRCommand("instat_calculation$new") + clsOutlierLowerLimitCalc.SetRCommand("instatCalculations::instat_calculation$new") clsOutlierLowerLimitCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsOutlierLowerLimitCalc.AddParameter("function_exp", clsRFunctionParameter:=clsOutlierLowerLimitFunc, iPosition:=1) clsOutlierLowerLimitCalc.AddParameter("result_name", Chr(34) & strLowerCalcName & Chr(34), iPosition:=4) @@ -285,7 +285,7 @@ Public Class clsQcOutliers clsOutlierLowerLimitFunc.AddParameter("bupperlimit", "FALSE") clsOutlierLowerLimitFunc.bToScriptAsRString = True - clsOutlierLowerLimitTestCalc.SetRCommand("instat_calculation$new") + clsOutlierLowerLimitTestCalc.SetRCommand("instatCalculations::instat_calculation$new") clsOutlierLowerLimitTestCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsOutlierLowerLimitTestCalc.AddParameter("function_exp", clsROperatorParameter:=clsOutlierLowerOperator, iPosition:=1) clsOutlierLowerLimitTestCalc.AddParameter("sub_calculations", clsRFunctionParameter:=clsOutlierLowerList, iPosition:=2) @@ -299,7 +299,7 @@ Public Class clsQcOutliers clsOutlierLowerOperator.bToScriptAsRString = True clsOutlierLowerOperator.AddParameter("right", strLowerCalcName, iPosition:=1) - clsOutlierUpperLimitTestCalc.SetRCommand("instat_calculation$new") + clsOutlierUpperLimitTestCalc.SetRCommand("instatCalculations::instat_calculation$new") clsOutlierUpperLimitTestCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsOutlierUpperLimitTestCalc.AddParameter("function_exp", clsROperatorParameter:=clsOutlierUpperOperator, iPosition:=1) clsOutlierUpperLimitTestCalc.AddParameter("sub_calculations", clsRFunctionParameter:=clsOutlierUpperList, iPosition:=2) diff --git a/instat/clsRCodeStructure.vb b/instat/clsRCodeStructure.vb index 1044c69d182..050f501edbc 100644 --- a/instat/clsRCodeStructure.vb +++ b/instat/clsRCodeStructure.vb @@ -530,6 +530,7 @@ Public Class RCodeStructure If _strAssignToObjectTypeLabel = RObjectTypeLabel.Graph Then Dim clsCheckGraphRFunction As New RFunction + clsCheckGraphRFunction.SetPackageName("instatExtras") clsCheckGraphRFunction.SetRCommand("check_graph") clsCheckGraphRFunction.AddParameter("graph_object", _strAssignToObject) clsAddRObject.AddParameter("object", clsRFunctionParameter:=clsCheckGraphRFunction) diff --git a/instat/clsRLink.vb b/instat/clsRLink.vb index ba19e66b60b..4c7f339635a 100644 --- a/instat/clsRLink.vb +++ b/instat/clsRLink.vb @@ -625,6 +625,7 @@ Public Class RLink Dim strExistingNames As String Dim expPrefix As SymbolicExpression + clsGetDefault.SetPackageName("instatExtras") clsGetDefault.SetRCommand("next_default_item") clsGetDefault.AddParameter("prefix", Chr(34) & strPrefix & Chr(34)) strExistingNames = GetListAsRString(lstItems) @@ -740,7 +741,7 @@ Public Class RLink bSeparateThread:=False, bShowWaitDialogOverride:=Nothing) ElseIf Not clsRStatement.IsAssignment _ AndAlso Not String.IsNullOrWhiteSpace(clsRStatement.TextNoFormatting) Then - strOutput = GetFileOutput("view_object_data(object = " _ + strOutput = GetFileOutput("instatExtras::view_object_data(object = " _ & clsRStatement.TextNoFormatting _ & " , object_format = 'text' )", bSilent:=False, bSeparateThread:=False, bShowWaitDialogOverride:=Nothing) @@ -751,7 +752,7 @@ Public Class RLink ' Add output to logger clsOutputLogger.AddOutput(clsRStatement.Text, strOutput, bAsFile:=True, - bDisplayOutputInExternalViewer:=clsRStatement.TextNoFormatting.StartsWith("view_object_data")) + bDisplayOutputInExternalViewer:=clsRStatement.TextNoFormatting.StartsWith("instatExtras::view_object_data")) ' Log the script LogScript(clsRStatement.Text.TrimEnd(vbCr, vbLf)) @@ -850,7 +851,7 @@ Public Class RLink Dim strRStatementAsSingleLine As String = strRStatement.Replace(vbCr, String.Empty) strRStatementAsSingleLine = strRStatementAsSingleLine.Replace(vbLf, String.Empty) 'wrap final command inside view_object_data just in case there is an output object - strOutput = GetFileOutput("view_object_data(object = " & strRStatementAsSingleLine & " , object_format = 'text' )", False, False, Nothing) + strOutput = GetFileOutput("instatExtras::view_object_data(object = " & strRStatementAsSingleLine & " , object_format = 'text' )", False, False, Nothing) Else Evaluate(strRStatement, bSilent:=False, bSeparateThread:=False, bShowWaitDialogOverride:=Nothing) End If @@ -993,7 +994,7 @@ Public Class RLink End If If bSuccess Then - strOutput = GetFileOutput("view_object_data(object = " & arrExecutableRScriptLines.Last() & " , object_format = 'text' )", bSilent, bSeparateThread, bShowWaitDialogOverride) + strOutput = GetFileOutput("instatExtras::view_object_data(object = " & arrExecutableRScriptLines.Last() & " , object_format = 'text' )", bSilent, bSeparateThread, bShowWaitDialogOverride) End If End If @@ -2113,6 +2114,7 @@ Public Class RLink clsGetColumn.SetRCommand(strInstatDataObject & "$get_columns_from_data") clsGetColumn.AddParameter("data_name", Chr(34) & strDataName & Chr(34)) clsGetColumn.AddParameter("col_names", Chr(34) & strColumn & Chr(34)) + clsIsBinary.SetPackageName("instatExtras") clsIsBinary.SetRCommand("is.binary") clsIsBinary.AddParameter("x", clsRFunctionParameter:=clsGetColumn) expBinary = RunInternalScriptGetValue(clsIsBinary.ToScript()) @@ -2216,7 +2218,7 @@ Public Class RLink Dim strRStatementTrimmed As String = TrimStartRStatement(strRStatement) Return strRStatementTrimmed.StartsWith(strInstatDataObject & "$get_object_data") _ OrElse strRStatementTrimmed.StartsWith(strInstatDataObject & "$get_last_object_data") _ - OrElse strRStatementTrimmed.StartsWith("view_object_data") + OrElse strRStatementTrimmed.StartsWith("instatExtras::view_object_data") End Function Private Function TrimStartRStatement(strRStatement As String) As String diff --git a/instat/dlgAnonymiseIDColumn.vb b/instat/dlgAnonymiseIDColumn.vb index c387518d3ab..821597ee6de 100644 --- a/instat/dlgAnonymiseIDColumn.vb +++ b/instat/dlgAnonymiseIDColumn.vb @@ -72,6 +72,7 @@ Public Class dlgAnonymiseIDColumn ucrSelectorAnonymiseIDColumn.Reset() ucrSaveAnonymisedColumn.Reset() + clsHashFunction.SetPackageName("instatExtras") clsHashFunction.SetRCommand("hashed_id") clsHashFunction.AddParameter("algo", Chr(34) & "crc32" & Chr(34), iPosition:=2) diff --git a/instat/dlgCalculationsSummary.vb b/instat/dlgCalculationsSummary.vb index 7ed156dd6ed..d5be6731d5f 100644 --- a/instat/dlgCalculationsSummary.vb +++ b/instat/dlgCalculationsSummary.vb @@ -61,7 +61,7 @@ Public Class dlgCalculationsSummary clsApplyCalculation.SetRCommand(frmMain.clsRLink.strInstatDataObject & "$run_instat_calculation") clsApplyCalculation.AddParameter("calc", clsRFunctionParameter:=clsNewCalculationFunction) - clsNewCalculationFunction.SetRCommand("instat_calculation$new") + clsNewCalculationFunction.SetRCommand("instatCalculations::instat_calculation$new") clsNewCalculationFunction.AddParameter("name", Chr(34) & strCalcName & Chr(34)) clsNewCalculationFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34)) clsNewCalculationFunction.AddParameter("save", "2") diff --git a/instat/dlgCanonicalCorrelationAnalysis.vb b/instat/dlgCanonicalCorrelationAnalysis.vb index f7dab3ff36a..8a6b9b2132f 100644 --- a/instat/dlgCanonicalCorrelationAnalysis.vb +++ b/instat/dlgCanonicalCorrelationAnalysis.vb @@ -93,6 +93,7 @@ Public Class dlgCanonicalCorrelationAnalysis strRDataFrameNameToAddObjectTo:=ucrSelectorCCA.strCurrentDataFrame, strObjectName:="last_model") + clsRFunctionCoef.SetPackageName("instatExtras") clsRFunctionCoef.SetRCommand("cancor_coef") clsRFunctionCoef.bExcludeAssignedFunctionOutput = False clsRFunctionCoef.SetAssignToOutputObject(strRObjectToAssignTo:="last_summary", diff --git a/instat/dlgCircularDensityPlot.vb b/instat/dlgCircularDensityPlot.vb index 33561bf1c09..8fb5b185c3d 100644 --- a/instat/dlgCircularDensityPlot.vb +++ b/instat/dlgCircularDensityPlot.vb @@ -150,6 +150,7 @@ Public Class dlgCircularDensityPlot clsScatterPlotFunction.AddParameter("sep", 0.0003, iPosition:=5) clsScatterPlotFunction.AddParameter("shrink", 1, iPosition:=9) + clsRecordGraphFunction.SetPackageName("instatExtras") clsRecordGraphFunction.SetRCommand("record_graph") clsRecordGraphFunction.AddParameter("x", clsRFunctionParameter:=clsDensityPlotFunction, iPosition:=0) diff --git a/instat/dlgClimaticBoxPlot.vb b/instat/dlgClimaticBoxPlot.vb index b4ab4385bf2..c2468506c9f 100644 --- a/instat/dlgClimaticBoxPlot.vb +++ b/instat/dlgClimaticBoxPlot.vb @@ -269,6 +269,7 @@ Public Class dlgClimaticBoxPlot clsRaesFunction.SetRCommand("aes") clsRaesFunction.AddParameter("x", Chr(34) & Chr(34)) + clsAsFactorFunction.SetPackageName("instatExtras") clsAsFactorFunction.SetRCommand("make_factor") clsAsFactorFunction.AddParameter("x", Chr(34) & Chr(34), bIncludeArgumentName:=False) diff --git a/instat/dlgClimaticCheckDataRain.vb b/instat/dlgClimaticCheckDataRain.vb index 97e2fb3b035..402129a5b5b 100644 --- a/instat/dlgClimaticCheckDataRain.vb +++ b/instat/dlgClimaticCheckDataRain.vb @@ -292,7 +292,7 @@ Public Class dlgClimaticCheckDataRain bResetSubdialog = True 'Group Function - clsGroupByFunc.SetRCommand("instat_calculation$new") + clsGroupByFunc.SetRCommand("instatCalculations::instat_calculation$new") clsGroupByFunc.AddParameter("type", Chr(34) & "by" & Chr(34), iPosition:=0) clsGroupByFunc.SetAssignTo("grouping") @@ -300,7 +300,7 @@ Public Class dlgClimaticCheckDataRain clsListFunc.AddParameter("sub1", clsRFunctionParameter:=clsGroupByFunc, bIncludeArgumentName:=False, iPosition:=1) 'Main Filter - clsRainFilterFunc.SetRCommand("instat_calculation$new") + clsRainFilterFunc.SetRCommand("instatCalculations::instat_calculation$new") clsRainFilterFunc.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsRainFilterFunc.AddParameter("function_exp", clsROperatorParameter:=clsOrOperator, iPosition:=1) clsRainFilterFunc.AddParameter("sub_calculations", clsRFunctionParameter:=clsListSubCalc, iPosition:=2) @@ -310,7 +310,7 @@ Public Class dlgClimaticCheckDataRain clsManuplationDayListFunction.SetRCommand("list") clsManuplationDayListFunction.AddParameter("list", clsRFunctionParameter:=clsRainFilterFunc, iPosition:=0, bIncludeArgumentName:=False) - clsDayFilterFunc.SetRCommand("instat_calculation$new") + clsDayFilterFunc.SetRCommand("instatCalculations::instat_calculation$new") clsDayFilterFunc.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsDayFilterFunc.AddParameter("function_exp", Chr(34) & "day" & Chr(34), iPosition:=1) clsDayFilterFunc.AddParameter("sub_calculations", clsRFunctionParameter:=clsListDayFunction, iPosition:=2) @@ -337,7 +337,7 @@ Public Class dlgClimaticCheckDataRain clsLargeLessOperator.SetOperation("<") clsLargeLessOperator.AddParameter("right", bIncludeArgumentName:=False, strParameterValue:="-1E-8", iPosition:=1) - clsLargeTestCalcFunc.SetRCommand("instat_calculation$new") + clsLargeTestCalcFunc.SetRCommand("instatCalculations::instat_calculation$new") clsLargeTestCalcFunc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsLargeTestCalcFunc.AddParameter("function_exp", clsROperatorParameter:=clsOrLargeOperator, iPosition:=1) clsLargeTestCalcFunc.AddParameter("result_name", Chr(34) & strLargeTest & Chr(34), iPosition:=3) @@ -370,13 +370,13 @@ Public Class dlgClimaticCheckDataRain clsRepFunc.AddParameter("second", bIncludeArgumentName:=False, clsROperatorParameter:=clsDollarOperator, iPosition:=1) clsRepFunc.bToScriptAsRString = True - clsSameCalcFunc.SetRCommand("instat_calculation$new") + clsSameCalcFunc.SetRCommand("instatCalculations::instat_calculation$new") clsSameCalcFunc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsSameCalcFunc.AddParameter("function_exp", clsRFunctionParameter:=clsRepFunc, iPosition:=1) clsSameCalcFunc.AddParameter("result_name", Chr(34) & strSameCalc & Chr(34), iPosition:=3) clsSameCalcFunc.SetAssignTo("same_calculation") - clsSameTestFunc.SetRCommand("instat_calculation$new") + clsSameTestFunc.SetRCommand("instatCalculations::instat_calculation$new") clsSameTestFunc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsSameTestFunc.AddParameter("function_exp", clsROperatorParameter:=clsAndOperator, iPosition:=1) clsSameTestFunc.AddParameter("sub_calculations", clsRFunctionParameter:=clsSameList, iPosition:=2) @@ -415,13 +415,13 @@ Public Class dlgClimaticCheckDataRain clsMultipleOperator.AddParameter("left", clsROperatorParameter:=clsEqualOperator, iPosition:=0) clsMultipleOperator.AddParameter("right", clsRFunctionParameter:=clsCumSumFuc, iPosition:=1) - clsCumulativeCalcFunc.SetRCommand("instat_calculation$new") + clsCumulativeCalcFunc.SetRCommand("instatCalculations::instat_calculation$new") clsCumulativeCalcFunc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsCumulativeCalcFunc.AddParameter("function_exp", clsROperatorParameter:=clsMinusOperator, iPosition:=1) clsCumulativeCalcFunc.AddParameter("result_name", Chr(34) & strCumulativeCalc & Chr(34), iPosition:=3) clsCumulativeCalcFunc.SetAssignTo("cumulative_calculation") - clsCumulativeTestFunc.SetRCommand("instat_calculation$new") + clsCumulativeTestFunc.SetRCommand("instatCalculations::instat_calculation$new") clsCumulativeTestFunc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsCumulativeTestFunc.AddParameter("function_exp", clsROperatorParameter:=clsSecondGreaterOperator, iPosition:=1) clsCumulativeTestFunc.AddParameter("sub_calculations", clsRFunctionParameter:=clsCumulativeList, iPosition:=2) @@ -438,11 +438,11 @@ Public Class dlgClimaticCheckDataRain clsIsNaFunction.SetRCommand("is.na") 'Group By Month - clsGroupByMonth.SetRCommand("instat_calculation$new") + clsGroupByMonth.SetRCommand("instatCalculations::instat_calculation$new") clsGroupByMonth.AddParameter("type", Chr(34) & "by" & Chr(34), iPosition:=0) clsGroupByMonth.SetAssignTo("grouping_month") - clsGroupByMonthYearFunction.SetRCommand("instat_calculation$new") + clsGroupByMonthYearFunction.SetRCommand("instatCalculations::instat_calculation$new") clsGroupByMonthYearFunction.AddParameter("type", Chr(34) & "by" & Chr(34), iPosition:=0) clsGroupByMonthYearFunction.SetAssignTo("grouping_month_year") @@ -459,13 +459,13 @@ Public Class dlgClimaticCheckDataRain clsRainyDaysOperator.bBrackets = True clsRainyDaysOperator.bToScriptAsRString = True - clsRainyDaysFunc.SetRCommand("instat_calculation$new") + clsRainyDaysFunc.SetRCommand("instatCalculations::instat_calculation$new") clsRainyDaysFunc.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsRainyDaysFunc.AddParameter("function_exp", clsROperatorParameter:=clsRainyDaysOperator, iPosition:=0) clsRainyDaysFunc.SetAssignTo("rainydays_filter") 'upper Outlier Limit function - clsUpperOutlierLimitValueCalcFunc.SetRCommand("instat_calculation$new") + clsUpperOutlierLimitValueCalcFunc.SetRCommand("instatCalculations::instat_calculation$new") clsUpperOutlierLimitValueCalcFunc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsUpperOutlierLimitValueCalcFunc.AddParameter("function_exp", clsRFunctionParameter:=clsUpperOutlierLimitFunc, iPosition:=1) clsUpperOutlierLimitValueCalcFunc.AddParameter("result_name", Chr(34) & strUpperOutlierLimit & Chr(34), iPosition:=4) @@ -481,7 +481,7 @@ Public Class dlgClimaticCheckDataRain clsUpperOutlierOperator.bBrackets = False clsUpperOutlierOperator.bToScriptAsRString = True - clsUpperOutlierlimitTestFunc.SetRCommand("instat_calculation$new") + clsUpperOutlierlimitTestFunc.SetRCommand("instatCalculations::instat_calculation$new") clsUpperOutlierlimitTestFunc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsUpperOutlierlimitTestFunc.AddParameter("function_exp", clsROperatorParameter:=clsUpperOutlierOperator, iPosition:=1) clsUpperOutlierlimitTestFunc.AddParameter("result_name", Chr(34) & strUpperOutlierTest & Chr(34), iPosition:=3) @@ -492,7 +492,7 @@ Public Class dlgClimaticCheckDataRain clsUpperListOutlier.AddParameter("sub1", clsRFunctionParameter:=clsUpperOutlierLimitValueCalcFunc, bIncludeArgumentName:=False, iPosition:=0) 'lower Outlier Limit function - clsLowerOutlierLimitValueCalcFunc.SetRCommand("instat_calculation$new") + clsLowerOutlierLimitValueCalcFunc.SetRCommand("instatCalculations::instat_calculation$new") clsLowerOutlierLimitValueCalcFunc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsLowerOutlierLimitValueCalcFunc.AddParameter("function_exp", clsRFunctionParameter:=clsLowerOutlierLimitFunc, iPosition:=1) clsLowerOutlierLimitValueCalcFunc.AddParameter("result_name", Chr(34) & strLowerOutlierLimit & Chr(34), iPosition:=4) @@ -508,7 +508,7 @@ Public Class dlgClimaticCheckDataRain clsLowerOutlierOperator.bBrackets = False clsLowerOutlierOperator.bToScriptAsRString = True - clsLowerOutlierlimitTestFunc.SetRCommand("instat_calculation$new") + clsLowerOutlierlimitTestFunc.SetRCommand("instatCalculations::instat_calculation$new") clsLowerOutlierlimitTestFunc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsLowerOutlierlimitTestFunc.AddParameter("function_exp", clsROperatorParameter:=clsLowerOutlierOperator, iPosition:=1) clsLowerOutlierlimitTestFunc.AddParameter("result_name", Chr(34) & strLowerOutlierTest & Chr(34), iPosition:=3) @@ -530,7 +530,7 @@ Public Class dlgClimaticCheckDataRain clsOrOperator.bToScriptAsRString = True 'Dry Month Calculations - clsFilterMonthFunction.SetRCommand("instat_calculation$new") + clsFilterMonthFunction.SetRCommand("instatCalculations::instat_calculation$new") clsFilterMonthFunction.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsFilterMonthFunction.AddParameter("function_exp", clsROperatorParameter:=clsInOperator, iPosition:=1) clsFilterMonthFunction.SetAssignTo("filter_months") @@ -544,14 +544,14 @@ Public Class dlgClimaticCheckDataRain clsNotOperator.bSpaceAroundOperation = False clsNotOperator.AddParameter("space", "", iPosition:=0) - clsDryMonthCalculationFunc.SetRCommand("instat_calculation$new") + clsDryMonthCalculationFunc.SetRCommand("instatCalculations::instat_calculation$new") clsDryMonthCalculationFunc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsDryMonthCalculationFunc.AddParameter("function_exp", clsROperatorParameter:=clsDryMonthAndOperator, iPosition:=1) clsDryMonthCalculationFunc.AddParameter("manipulations", clsRFunctionParameter:=clsListCalcFunction, iPosition:=3) clsDryMonthCalculationFunc.AddParameter("result_name", Chr(34) & strDryMonthCalc & Chr(34), iPosition:=4) clsDryMonthCalculationFunc.SetAssignTo("dry_month_calculation") - clsDayFilterCalcFunction.SetRCommand("instat_calculation$new") + clsDayFilterCalcFunction.SetRCommand("instatCalculations::instat_calculation$new") clsDayFilterCalcFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsDayFilterCalcFunction.AddParameter("function_exp", clsROperatorParameter:=clsDayEqualOperator, iPosition:=1) clsDayFilterCalcFunction.AddParameter("manipulations", clsRFunctionParameter:=clsManuplationDayListFunction, iPosition:=3) @@ -590,7 +590,7 @@ Public Class dlgClimaticCheckDataRain clsNotIsNaFunction.SetRCommand("!is.na") - clsDryMonthTestCalculationFunc.SetRCommand("instat_calculation$new") + clsDryMonthTestCalculationFunc.SetRCommand("instatCalculations::instat_calculation$new") clsDryMonthTestCalculationFunc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsDryMonthTestCalculationFunc.AddParameter("function_exp", clsROperatorParameter:=clsDryTestAndOperator, iPosition:=1) clsDryMonthTestCalculationFunc.AddParameter("sub_calculations", clsRFunctionParameter:=clsListTestFunction, iPosition:=3) diff --git a/instat/dlgClimaticCheckDataTemperature.vb b/instat/dlgClimaticCheckDataTemperature.vb index 4e3803c7264..1f595902f2c 100644 --- a/instat/dlgClimaticCheckDataTemperature.vb +++ b/instat/dlgClimaticCheckDataTemperature.vb @@ -213,7 +213,7 @@ Public Class dlgClimaticCheckDataTemperature ucrReceiverElement1.SetMeAsReceiver() 'GroupBy - clsGroupByFunc.SetRCommand("instat_calculation$new") + clsGroupByFunc.SetRCommand("instatCalculations::instat_calculation$new") clsGroupByFunc.AddParameter("type", Chr(34) & "by" & Chr(34), iPosition:=0) clsGroupByFunc.SetAssignTo("grouping") clsGroupingListFunc.SetRCommand("list") @@ -244,7 +244,7 @@ Public Class dlgClimaticCheckDataTemperature clsDiffOp.SetOperation("|") 'Group By Month for Outliers - clsGroupByMonth.SetRCommand("instat_calculation$new") + clsGroupByMonth.SetRCommand("instatCalculations::instat_calculation$new") clsGroupByMonth.AddParameter("type", Chr(34) & "by" & Chr(34), iPosition:=0) clsGroupByMonth.SetAssignTo("grouping_month") @@ -267,7 +267,7 @@ Public Class dlgClimaticCheckDataTemperature clsDiffListSubCalc.SetOperation(",") 'Main calculation filter - clsCalcFilterFunc.SetRCommand("instat_calculation$new") + clsCalcFilterFunc.SetRCommand("instatCalculations::instat_calculation$new") clsCalcFilterFunc.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsCalcFilterFunc.AddParameter("function_exp", clsROperatorParameter:=clsOrOperator, iPosition:=1) clsCalcFilterFunc.AddParameter("sub_calculations", clsRFunctionParameter:=clsFilterListFunc, iPosition:=2) diff --git a/instat/dlgClimaticLengthOfSeason.vb b/instat/dlgClimaticLengthOfSeason.vb index 1f6fba4f174..53925e4b267 100644 --- a/instat/dlgClimaticLengthOfSeason.vb +++ b/instat/dlgClimaticLengthOfSeason.vb @@ -183,6 +183,7 @@ Public Class dlgClimaticLengthOfSeason ucrReceiverStartofRains.SetMeAsReceiver() 'length of season calculation + clsLengthOfSeasonFunction.SetPackageName("instatCalculations") clsLengthOfSeasonFunction.SetRCommand("instat_calculation$new") clsLengthOfSeasonFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsLengthOfSeasonFunction.AddParameter("function_exp", clsROperatorParameter:=clsMinusOpertor, iPosition:=1) @@ -194,6 +195,7 @@ Public Class dlgClimaticLengthOfSeason clsMinusOpertor.bToScriptAsRString = True 'start status calculation + clsStartEndStatusFunction.SetPackageName("instatCalculations") clsStartEndStatusFunction.SetRCommand("instat_calculation$new") clsStartEndStatusFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsStartEndStatusFunction.AddParameter("function_exp", clsRFunctionParameter:=clsCaseWhenFunction, iPosition:=1) @@ -272,6 +274,7 @@ Public Class dlgClimaticLengthOfSeason clsOROperator.bBrackets = False 'combination calculation + clsCombinationCalcFunction.SetPackageName("instatCalculations") clsCombinationCalcFunction.SetRCommand("instat_calculation$new") clsCombinationCalcFunction.AddParameter("type", Chr(34) & "combination" & Chr(34), iPosition:=0) clsCombinationCalcFunction.AddParameter("sub_calculation", clsRFunctionParameter:=clsCombinationListFunction, iPosition:=2) @@ -288,6 +291,7 @@ Public Class dlgClimaticLengthOfSeason clsConvertColumnTypeFunction.SetRCommand(frmMain.clsRLink.strInstatDataObject & "$convert_column_to_type") clsConvertColumnTypeFunction.AddParameter("to_type", Chr(34) & "factor" & Chr(34), iPosition:=2) + clsLengthmoreFunction.SetPackageName("instatCalculations") clsLengthmoreFunction.SetRCommand("instat_calculation$new") clsLengthmoreFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsLengthmoreFunction.AddParameter("function_exp", clsRFunctionParameter:=clsElseIfMoreFunction, iPosition:=1) diff --git a/instat/dlgClimaticSummary.vb b/instat/dlgClimaticSummary.vb index 9afa0c59809..f6cdb23e55c 100644 --- a/instat/dlgClimaticSummary.vb +++ b/instat/dlgClimaticSummary.vb @@ -165,6 +165,7 @@ Public Class dlgClimaticSummary clsToConditionOperator = New ROperator clsDayFilterCalcFromConvert = New RFunction + clsDayFilterCalcFromConvert.SetPackageName("databook") clsDayFilterCalcFromConvert.SetRCommand("calc_from_convert") clsDayFilterCalcFromList = New RFunction clsDayFilterCalcFromList.SetRCommand("list") @@ -178,7 +179,7 @@ Public Class dlgClimaticSummary 'TODO: this changes to from >= receiver and to <= receiver if annual-variable is checekd. clsFromAndToConditionOperator.bToScriptAsRString = True - clsDayFilterCalc.SetRCommand("instat_calculation$new") + clsDayFilterCalc.SetRCommand("instatCalculations::instat_calculation$new") clsDayFilterCalc.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsDayFilterCalc.AddParameter("function_exp", clsROperatorParameter:=clsFromAndToConditionOperator, iPosition:=1) clsDayFilterCalc.AddParameter("calculated_from", clsRFunctionParameter:=clsDayFilterCalcFromConvert, iPosition:=2) diff --git a/instat/dlgClimdexIndices.vb b/instat/dlgClimdexIndices.vb index 613ef424cc8..244eb5996c8 100644 --- a/instat/dlgClimdexIndices.vb +++ b/instat/dlgClimdexIndices.vb @@ -117,6 +117,7 @@ Public Class dlgClimdexIndices ucrReceiverStation.SetMeAsReceiver() ucrChkSave.Checked = True + clsClimdex.SetPackageName("instatClimatic") clsClimdex.SetRCommand("climdex") clsClimdex.AddParameter("indices", clsRFunctionParameter:=clsIndices, iPosition:=8) diff --git a/instat/dlgClimograph.vb b/instat/dlgClimograph.vb index c03070d7e39..fb91003afb2 100644 --- a/instat/dlgClimograph.vb +++ b/instat/dlgClimograph.vb @@ -578,6 +578,7 @@ Public Class dlgClimograph clsPipeOperator.SetOperation("%>%") SetPipeAssignTo() + clsGgwalterliethFunction.SetPackageName("instatClimatic") clsGgwalterliethFunction.SetRCommand("ggwalter_lieth") clsFacetFunction.SetPackageName("ggplot2") diff --git a/instat/dlgCompare.vb b/instat/dlgCompare.vb index 0b60a5fe4ac..abab11826da 100644 --- a/instat/dlgCompare.vb +++ b/instat/dlgCompare.vb @@ -139,11 +139,11 @@ Public Class dlgCompare ucrSaveSecondCol.Reset() 'group_by_station_day_of_year - clsGroupByStationWithinYear.SetRCommand("instat_calculation$new") + clsGroupByStationWithinYear.SetRCommand("instatCalculations::instat_calculation$new") clsGroupByStationWithinYear.AddParameter("type", Chr(34) & "by" & Chr(34), iPosition:=0) clsGroupByStationWithinYear.SetAssignTo("grouping") - clsBiasCalculation.SetRCommand("instat_calculation$new") + clsBiasCalculation.SetRCommand("instatCalculations::instat_calculation$new") clsBiasCalculation.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsBiasCalculation.AddParameter("function_exp", clsROperatorParameter:=clsDiffOperator, iPosition:=1) clsBiasCalculation.AddParameter("result_name", Chr(34) & "bias" & Chr(34), iPosition:=2) @@ -153,7 +153,7 @@ Public Class dlgCompare clsDiffOperator.SetOperation("-") clsDiffOperator.bToScriptAsRString = True - clsAbsDevCalculation.SetRCommand("instat_calculation$new") + clsAbsDevCalculation.SetRCommand("instatCalculations::instat_calculation$new") clsAbsDevCalculation.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsAbsDevCalculation.AddParameter("function_exp", clsRFunctionParameter:=clsAbsDevFunction, iPosition:=1) clsAbsDevCalculation.AddParameter("result_name", Chr(34) & "absdev" & Chr(34), iPosition:=2) @@ -166,7 +166,7 @@ Public Class dlgCompare clsAbsDevFunction.bToScriptAsRString = True clsAbsDevFunction.AddParameter("diff", clsROperatorParameter:=clsMinusOperator, bIncludeArgumentName:=False) - clsCombinedCalculation.SetRCommand("instat_calculation$new") + clsCombinedCalculation.SetRCommand("instatCalculations::instat_calculation$new") clsCombinedCalculation.AddParameter("type", Chr(34) & "combination" & Chr(34), iPosition:=0) clsCombinedCalculation.AddParameter("sub_calculation", clsRFunctionParameter:=clsListFunction, iPosition:=2) clsCombinedCalculation.SetAssignTo("combined_calculation") @@ -178,7 +178,7 @@ Public Class dlgCompare clsListManipulation.SetRCommand("list") 'Anomalies calculations - clsSateliteAnomalies.SetRCommand("instat_calculation$new") + clsSateliteAnomalies.SetRCommand("instatCalculations::instat_calculation$new") clsSateliteAnomalies.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsSateliteAnomalies.AddParameter("function_exp", clsROperatorParameter:=clsSateliteMinusOperator, iPosition:=1) clsSateliteAnomalies.AddParameter("result_name", Chr(34) & "satellite_anom" & Chr(34), iPosition:=2) @@ -192,7 +192,7 @@ Public Class dlgCompare clsSateliteMeanFunction.SetRCommand("mean") clsSateliteMeanFunction.AddParameter("na.rm", "TRUE", iPosition:=1) - clsStationAnomalies.SetRCommand("instat_calculation$new") + clsStationAnomalies.SetRCommand("instatCalculations::instat_calculation$new") clsStationAnomalies.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsStationAnomalies.AddParameter("function_exp", clsROperatorParameter:=clsStationMinusOperator, iPosition:=1) clsStationAnomalies.AddParameter("result_name", Chr(34) & "station_anom" & Chr(34), iPosition:=2) diff --git a/instat/dlgCompareColumns.vb b/instat/dlgCompareColumns.vb index 38838c9e50f..0dcc3e3dbe3 100644 --- a/instat/dlgCompareColumns.vb +++ b/instat/dlgCompareColumns.vb @@ -139,6 +139,7 @@ Public Class dlgCompareColumns ucrReceiverFirst.SetMeAsReceiver() ucrSaveLogical.Reset() + clsCompareColumnsFunction.SetPackageName("instatExtras") clsCompareColumnsFunction.SetRCommand("compare_columns") clsYinXOperator.SetOperation("%in%") diff --git a/instat/dlgConversions.vb b/instat/dlgConversions.vb index 8de14f90963..3655914e436 100644 --- a/instat/dlgConversions.vb +++ b/instat/dlgConversions.vb @@ -248,6 +248,7 @@ Public Class dlgConversions clsDayLengthFunction.SetPackageName("geosphere") clsDayLengthFunction.SetRCommand("daylength") + clsConvertToDegreeFunction.SetPackageName("instatClimatic") clsConvertToDegreeFunction.SetRCommand("convert_to_dec_deg") clsConvertToDegreeFunction.AddParameter("dir", Chr(34) & "N" & Chr(34), iPosition:=3) @@ -255,6 +256,7 @@ Public Class dlgConversions clsRoundFunction.AddParameter("x", clsRFunctionParameter:=clsConvertToDegreeFunction, iPosition:=0) clsRoundFunction.AddParameter("digits", 3, iPosition:=1) + clsYearConversionFunction.SetPackageName("instatClimatic") clsYearConversionFunction.SetRCommand("convert_yy_to_yyyy") clsYearConversionFunction.AddParameter("base", "2030", iPosition:=1) diff --git a/instat/dlgDefineCRI.vb b/instat/dlgDefineCRI.vb index cf19f9a5b60..1887a4acc3b 100644 --- a/instat/dlgDefineCRI.vb +++ b/instat/dlgDefineCRI.vb @@ -113,7 +113,7 @@ Public Class dlgCorruptionDefineCRI ucrBase.clsRsyntax.AddParameter("calc", clsRFunctionParameter:=clsDefineFunction) ucrBase.clsRsyntax.AddParameter("display", "FALSE") - clsDefineFunction.SetRCommand("instat_calculation$new") + clsDefineFunction.SetRCommand("instatCalculations::instat_calculation$new") clsDefineFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34)) clsDefineFunction.AddParameter("save", 2) clsDefineFunction.AddParameter("result_name", Chr(34) & ucrSaveCRI.GetText() & Chr(34)) diff --git a/instat/dlgDisplayTopN.vb b/instat/dlgDisplayTopN.vb index 59f93c6e4b1..c03ca978463 100644 --- a/instat/dlgDisplayTopN.vb +++ b/instat/dlgDisplayTopN.vb @@ -219,6 +219,7 @@ Public Class dlgDisplayTopN clsFilterFunction.SetRCommand("filter") clsFilterFunction.AddParameter("0", clsRFunctionParameter:=clsInTopNFunction, iPosition:=0, bIncludeArgumentName:=False) + clsInTopNFunction.SetPackageName("instatExtras") clsInTopNFunction.SetRCommand("in_top_n") clsInTopNFunction.AddParameter("n", "10", iPosition:=1) diff --git a/instat/dlgDuplicates.vb b/instat/dlgDuplicates.vb index 770beea9dad..f9bf87d70ac 100644 --- a/instat/dlgDuplicates.vb +++ b/instat/dlgDuplicates.vb @@ -157,9 +157,11 @@ Public Class dlgDuplicateRows clsDuplicated2.SetRCommand("duplicated2") ' For the third rdo we run clsStreakFunction + clsStreakFunction.SetPackageName("instatExtras") clsStreakFunction.SetRCommand("duplicated_cases") clsStreakFunction.AddParameter("ignore", "NULL") + clsDupCountIndex.SetPackageName("instatExtras") clsDupCountIndex.SetRCommand("duplicated_count_index") clsDupCountIndex.AddParameter("type", Chr(34) & "index" & Chr(34)) diff --git a/instat/dlgEndOfRainsSeason.vb b/instat/dlgEndOfRainsSeason.vb index 7b8056768be..9eba61bc32e 100644 --- a/instat/dlgEndOfRainsSeason.vb +++ b/instat/dlgEndOfRainsSeason.vb @@ -575,23 +575,30 @@ Public Class dlgEndOfRainsSeason clsDummyFunction.AddParameter("sub3", "True", iPosition:=1) ' Group by + clsGroupByStationYearCalc.SetPackageName("instatCalculations") clsGroupByStationYearCalc.SetRCommand("instat_calculation$new") clsGroupByStationYearCalc.AddParameter("type", Chr(34) & "by" & Chr(34), iPosition:=0) clsGroupByStationYearCalc.SetAssignTo("grouping_by_station_year") + clsGroupByStationCalc.SetPackageName("instatCalculations") clsGroupByStationCalc.SetRCommand("instat_calculation$new") clsGroupByStationCalc.AddParameter("type", Chr(34) & "by" & Chr(34), iPosition:=0) clsGroupByStationCalc.SetAssignTo("grouping_by_station") ' Doy Filter + clsDoyFilterCalcFromConvert.SetPackageName("databook") + clsDoyFilterCalcFromConvert.SetRCommand("calc_from_convert") + clsDoyFilterCalcFromConvert.AddParameter("x", clsRFunctionParameter:=clsDoyFilterCalcFromList, iPosition:=0) + + clsDoyFilterCalcFromList.SetRCommand("list") + + clsDoyFilterCalc.SetPackageName("instatCalculations") clsDoyFilterCalc.SetRCommand("instat_calculation$new") clsDoyFilterCalc.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsDoyFilterCalc.AddParameter("function_exp", clsROperatorParameter:=clsDoyFilterOperator, iPosition:=1) clsDoyFilterCalc.AddParameter("calculated_from", clsRFunctionParameter:=clsDoyFilterCalcFromConvert, iPosition:=2) clsDoyFilterCalc.SetAssignTo(strDoyFilter) - clsDoyFilterCalcFromConvert.SetRCommand("calc_from_convert") - clsDoyFilterCalcFromConvert.AddParameter("x", clsRFunctionParameter:=clsDoyFilterCalcFromList, iPosition:=0) clsDoyFilterCalcFromList.SetRCommand("list") @@ -610,6 +617,7 @@ Public Class dlgEndOfRainsSeason #Region "end_of_rains" ' Rolling sum calculation + clsEndRainsRollingSumCalc.SetPackageName("instatCalculations") clsEndRainsRollingSumCalc.SetRCommand("instat_calculation$new") clsEndRainsRollingSumCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsEndRainsRollingSumCalc.AddParameter("function_exp", clsRFunctionParameter:=clsRollSumRainFunction, iPosition:=1) @@ -624,6 +632,7 @@ Public Class dlgEndOfRainsSeason clsRollSumRainFunction.bToScriptAsRString = True ' Conditions filter + clsEndRainsConditionsFilterCalc.SetPackageName("instatCalculations") clsEndRainsConditionsFilterCalc.SetRCommand("instat_calculation$new") clsEndRainsConditionsFilterCalc.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsEndRainsConditionsFilterCalc.AddParameter("function_exp", clsROperatorParameter:=clsEndRainsConditionsOperator, iPosition:=1) @@ -646,6 +655,7 @@ Public Class dlgEndOfRainsSeason clsIsNaRollSumRain.AddParameter("x", strRollSumRain, iPosition:=0) ' Doy summary + clsEndRainsLastDoySummaryCalc.SetPackageName("instatCalculations") clsEndRainsLastDoySummaryCalc.SetRCommand("instat_calculation$new") clsEndRainsLastDoySummaryCalc.AddParameter("type", Chr(34) & "summary" & Chr(34), iPosition:=0) clsEndRainsLastDoySummaryCalc.AddParameter("function_exp", clsRFunctionParameter:=clsIfElseLastDoyFunction, iPosition:=1) @@ -670,6 +680,7 @@ Public Class dlgEndOfRainsSeason clsLastDoyFunction.SetRCommand("last") ' Date summary + clsEndRainsLastDateSummaryCalc.SetPackageName("instatCalculations") clsEndRainsLastDateSummaryCalc.SetRCommand("instat_calculation$new") clsEndRainsLastDateSummaryCalc.AddParameter("type", Chr(34) & "summary" & Chr(34), iPosition:=0) clsEndRainsLastDateSummaryCalc.AddParameter("function_exp", clsRFunctionParameter:=clsIfElseLastDateFunction, iPosition:=1) @@ -688,6 +699,7 @@ Public Class dlgEndOfRainsSeason clsLastDateFunction.SetRCommand("last") ' Status summary + clsEndRainsStatusSummaryCalc.SetPackageName("instatCalculations") clsEndRainsStatusSummaryCalc.SetRCommand("instat_calculation$new") clsEndRainsStatusSummaryCalc.AddParameter("type", Chr(34) & "summary" & Chr(34), iPosition:=0) clsEndRainsStatusSummaryCalc.AddParameter("function_exp", clsRFunctionParameter:=clsElseIfENdRainStatusFunction, iPosition:=1) @@ -714,6 +726,7 @@ Public Class dlgEndOfRainsSeason clsLastFunction.AddParameter("x", strRollSumRain, iPosition:=0) ' Combined + clsEndRainsCombinationCalc.SetPackageName("instatCalculations") clsEndRainsCombinationCalc.SetRCommand("instat_calculation$new") clsEndRainsCombinationCalc.AddParameter("type", Chr(34) & "combination" & Chr(34), iPosition:=0) clsEndRainsCombinationCalc.AddParameter("manipulations", clsRFunctionParameter:=clsEndRainsCombinationManipulationList, iPosition:=1) @@ -731,6 +744,7 @@ Public Class dlgEndOfRainsSeason clsWBOperator1.bSpaceAroundOperation = True clsWBOperator1.bBrackets = False + clsWBEvaporationMinFunction.SetPackageName("instatClimatic") clsWBEvaporationMinFunction.SetRCommand("WB_evaporation") clsWBEvaporationMinFunction.AddParameter("water_balance", "..1", iPosition:=0, bIncludeArgumentName:=False) clsWBEvaporationMinFunction.AddParameter("y", "..2", iPosition:=4, bIncludeArgumentName:=False) @@ -754,6 +768,7 @@ Public Class dlgEndOfRainsSeason clsEndSeasonIsNaRain.SetRCommand("is.na") 'Rain min + clsEndSeasonRainMinCalc.SetPackageName("instatCalculations") clsEndSeasonRainMinCalc.SetRCommand("instat_calculation$new") clsEndSeasonRainMinCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsEndSeasonRainMinCalc.AddParameter("function_exp", clsRFunctionParameter:=clsIfElseRainMinFunction, iPosition:=1) @@ -766,6 +781,7 @@ Public Class dlgEndOfRainsSeason clsIfElseRainMinFunction.AddParameter("yes", "0", iPosition:=1) 'Rain max + clsEndSeasonRainMaxCalc.SetPackageName("instatCalculations") clsEndSeasonRainMaxCalc.SetRCommand("instat_calculation$new") clsEndSeasonRainMaxCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsEndSeasonRainMaxCalc.AddParameter("function_exp", clsRFunctionParameter:=clsIfElseRainMaxFunction, iPosition:=1) @@ -783,6 +799,7 @@ Public Class dlgEndOfRainsSeason clsPMaxFunction.AddParameter("1", "0", bIncludeArgumentName:=False) ' Water Balance min + clsEndSeasonWBMinCalc.SetPackageName("instatCalculations") clsEndSeasonWBMinCalc.SetRCommand("instat_calculation$new") clsEndSeasonWBMinCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsEndSeasonWBMinCalc.AddParameter("function_exp", clsRFunctionParameter:=clsReduceWBMinFunction, iPosition:=1) @@ -841,6 +858,7 @@ Public Class dlgEndOfRainsSeason clsWBMinEvapOperator.AddParameter("0", strRainMin, iPosition:=0) ' Water Balance max + clsEndSeasonWBMaxCalc.SetPackageName("instatCalculations") clsEndSeasonWBMaxCalc.SetRCommand("instat_calculation$new") clsEndSeasonWBMaxCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsEndSeasonWBMaxCalc.AddParameter("function_exp", clsRFunctionParameter:=clsReduceWBMaxFunction, iPosition:=1) @@ -896,6 +914,7 @@ Public Class dlgEndOfRainsSeason clsWBMaxEvapOperator.AddParameter("value", "5", iPosition:=1) ' Water Balance + clsEndSeasonWBCalc.SetPackageName("instatCalculations") clsEndSeasonWBCalc.SetRCommand("instat_calculation$new") clsEndSeasonWBCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsEndSeasonWBCalc.AddParameter("function_exp", clsRFunctionParameter:=clsIfElseWBFunction, iPosition:=1) @@ -929,6 +948,7 @@ Public Class dlgEndOfRainsSeason clsIsNaEvaporation.SetRCommand("is.na") ' Conditions filter + clsEndSeasonConditionsFilterCalc.SetPackageName("instatCalculations") clsEndSeasonConditionsFilterCalc.SetRCommand("instat_calculation$new") clsEndSeasonConditionsFilterCalc.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsEndSeasonConditionsFilterCalc.AddParameter("function_exp", Chr(34) & "conditions_check == 1" & Chr(34), iPosition:=1) @@ -948,6 +968,7 @@ Public Class dlgEndOfRainsSeason clsEndSeasonWBConditionOperator.AddParameter("1", "0.5", iPosition:=1) ' Doy summary + clsEndSeasonFirstDoySummaryCalc.SetPackageName("instatCalculations") clsEndSeasonFirstDoySummaryCalc.SetRCommand("instat_calculation$new") clsEndSeasonFirstDoySummaryCalc.AddParameter("type", Chr(34) & "summary" & Chr(34), iPosition:=0) clsEndSeasonFirstDoySummaryCalc.AddParameter("function_exp", clsRFunctionParameter:=clsIfElseFirstDoyFunction, iPosition:=1) @@ -955,6 +976,7 @@ Public Class dlgEndOfRainsSeason clsEndSeasonFirstDoySummaryCalc.AddParameter("save", "2", iPosition:=6) clsEndSeasonFirstDoySummaryCalc.SetAssignTo(strEndSeason) + clsEndSeasonFirstDoySummaryCalcFilledFunction.SetPackageName("instatCalculations") clsEndSeasonFirstDoySummaryCalcFilledFunction.SetRCommand("instat_calculation$new") clsEndSeasonFirstDoySummaryCalcFilledFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsEndSeasonFirstDoySummaryCalcFilledFunction.AddParameter("function_exp", clsRFunctionParameter:=clsIfElseFirstDoyFilledFunction, iPosition:=1) @@ -995,6 +1017,7 @@ Public Class dlgEndOfRainsSeason clsFirstDoyFunction.SetRCommand("first") ' Date summary + clsEndSeasonFirstDateSummaryCalc.SetPackageName("instatCalculations") clsEndSeasonFirstDateSummaryCalc.SetRCommand("instat_calculation$new") clsEndSeasonFirstDateSummaryCalc.AddParameter("type", Chr(34) & "summary" & Chr(34), iPosition:=0) clsEndSeasonFirstDateSummaryCalc.AddParameter("function_exp", clsRFunctionParameter:=clsIfElseFirstDateFunction, iPosition:=1) @@ -1013,6 +1036,7 @@ Public Class dlgEndOfRainsSeason clsFirstDateFunction.SetRCommand("first") ' Status summary + clsEndSeasonStatusSummaryCalc.SetPackageName("instatCalculations") clsEndSeasonStatusSummaryCalc.SetRCommand("instat_calculation$new") clsEndSeasonStatusSummaryCalc.AddParameter("type", Chr(34) & "summary" & Chr(34), iPosition:=0) clsEndSeasonStatusSummaryCalc.AddParameter("function_exp", clsRFunctionParameter:=clsIfelseStatusFunction, iPosition:=1) @@ -1040,6 +1064,7 @@ Public Class dlgEndOfRainsSeason clsIsNAStatusFunction.AddParameter("x", strWB, iPosition:=0, bIncludeArgumentName:=False) ' Combined + clsEndSeasonCombinationCalc.SetPackageName("instatCalculations") clsEndSeasonCombinationCalc.SetRCommand("instat_calculation$new") clsEndSeasonCombinationCalc.AddParameter("type", Chr(34) & "combination" & Chr(34), iPosition:=0) clsEndSeasonCombinationCalc.AddParameter("manipulations", clsRFunctionParameter:=clsEndSeasonCombinationManipulationList, iPosition:=1) @@ -1070,6 +1095,7 @@ Public Class dlgEndOfRainsSeason clsConvertlinkedvariable1Function.SetRCommand(frmMain.clsRLink.strInstatDataObject & "$convert_linked_variable") + clsConditionCheckFunction.SetPackageName("instatCalculations") clsConditionCheckFunction.SetRCommand("instat_calculation$new") clsConditionCheckFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsConditionCheckFunction.AddParameter("function_exp", clsRFunctionParameter:=clsElseifCondCheckFunction, iPosition:=1) @@ -1096,6 +1122,7 @@ Public Class dlgEndOfRainsSeason clsIsnaConditionCheckFunction.SetRCommand("is.na") + clsConditionsFilterCondCheckFunction.SetPackageName("instatCalculations") clsConditionsFilterCondCheckFunction.SetRCommand("instat_calculation$new") clsConditionsFilterCondCheckFunction.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsConditionsFilterCondCheckFunction.AddParameter("function_exp", clsROperatorParameter:=clsCheckConditionFilterOperator, iPosition:=1) @@ -1117,6 +1144,7 @@ Public Class dlgEndOfRainsSeason clsIsnaSecondConditionFilterFunction.SetRCommand("is.na") clsIsnaSecondConditionFilterFunction.AddParameter("x", "conditions_check", iPosition:=0, bIncludeArgumentName:=False) + clsEndSeasonCombinationCalcStatusFunction.SetPackageName("instatCalculations") clsEndSeasonCombinationCalcStatusFunction.SetRCommand("instat_calculation$new") clsEndSeasonCombinationCalcStatusFunction.AddParameter("type", Chr(34) & "combination" & Chr(34), iPosition:=0) clsEndSeasonCombinationCalcStatusFunction.AddParameter("manipulations", clsRFunctionParameter:=clsEndSeasonCombinationManipulationListStatusFunction, iPosition:=1) @@ -1152,6 +1180,7 @@ Public Class dlgEndOfRainsSeason clsSetNamesEndSeasonFunction.AddParameter("x", Chr(34) & strEndSeason & Chr(34), iPosition:=0, bIncludeArgumentName:=False) clsSetNamesEndSeasonFunction.AddParameter("y", strLinkeddata, iPosition:=1, bIncludeArgumentName:=False) + clsEndSeasonStatus2Function.SetPackageName("instatCalculations") clsEndSeasonStatus2Function.SetRCommand("instat_calculation$new") clsEndSeasonStatus2Function.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsEndSeasonStatus2Function.AddParameter("function_exp", clsRFunctionParameter:=clsIfelseStatus2Function, iPosition:=1) @@ -1169,6 +1198,7 @@ Public Class dlgEndOfRainsSeason clsIsnaStatus2Function.SetRCommand("!is.na") clsIsnaStatus2Function.AddParameter("x", strEndSeason, iPosition:=0, bIncludeArgumentName:=False) + clsEndSeasonCombiStatus2Function.SetPackageName("instatCalculations") clsEndSeasonCombiStatus2Function.SetRCommand("instat_calculation$new") clsEndSeasonCombiStatus2Function.AddParameter("type", Chr(34) & "combination" & Chr(34), iPosition:=0) clsEndSeasonCombiStatus2Function.AddParameter("sub_calculations", clsRFunctionParameter:=clsEndSeasonCombiStatus2ListFunction, iPosition:=2) diff --git a/instat/dlgExportForClimpact.vb b/instat/dlgExportForClimpact.vb index 91770b69bc6..2c810c07174 100644 --- a/instat/dlgExportForClimpact.vb +++ b/instat/dlgExportForClimpact.vb @@ -99,6 +99,7 @@ Public Class dlgExportForClimpact ucrSelectorImportToClimpact.Reset() + clsOutputClimpact.SetPackageName("instatClimatic") clsOutputClimpact.SetRCommand("write_weather_data") ucrBase.clsRsyntax.SetBaseRFunction(clsOutputClimpact) diff --git a/instat/dlgExportToCPT.vb b/instat/dlgExportToCPT.vb index d69aa8bfc6d..96b24612fb7 100644 --- a/instat/dlgExportToCPT.vb +++ b/instat/dlgExportToCPT.vb @@ -124,6 +124,7 @@ Public Class dlgExportToCPT ucrInputFilePath.Reset() ucrInputFilePath.SetName("") + clsOutputCPT.SetPackageName("instatClimatic") clsOutputCPT.SetRCommand("output_CPT") clsExportCPT.SetPackageName("rio") diff --git a/instat/dlgExportToWWR.vb b/instat/dlgExportToWWR.vb index 2ab1df0f6c3..fb077ce4b3a 100644 --- a/instat/dlgExportToWWR.vb +++ b/instat/dlgExportToWWR.vb @@ -109,6 +109,7 @@ Public Class dlgExportToWWR ucrSelectorExportToWWR.Reset() ucrReceiverStationIdentifier.SetMeAsReceiver() + clsWWRExport.SetPackageName("instatClimatic") clsWWRExport.SetRCommand("wwr_export") clsWWRExport.AddParameter("folder", Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments).Replace("\", "/"), iPosition:=20) diff --git a/instat/dlgExtremesClimatic.vb b/instat/dlgExtremesClimatic.vb index 6fb03d26cb6..cca8e787934 100644 --- a/instat/dlgExtremesClimatic.vb +++ b/instat/dlgExtremesClimatic.vb @@ -346,6 +346,7 @@ Public Class dlgExtremesClimatic SetCalculationValues() clsDayFilterCalcFromConvert = New RFunction + clsDayFilterCalcFromConvert.SetPackageName("databook") clsDayFilterCalcFromConvert.SetRCommand("calc_from_convert") clsDayFilterCalcFromList = New RFunction clsDayFilterCalcFromList.SetRCommand("list") @@ -353,7 +354,7 @@ Public Class dlgExtremesClimatic ' Days clsDayFromAndToOperator.bToScriptAsRString = True - clsDayFromAndTo.SetRCommand("instat_calculation$new") + clsDayFromAndTo.SetRCommand("instatCalculations::instat_calculation$new") clsDayFromAndTo.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsDayFromAndTo.AddParameter("function_exp", clsROperatorParameter:=clsDayFromAndToOperator, iPosition:=1) clsDayFromAndToOperator.SetOperation("&") @@ -369,13 +370,13 @@ Public Class dlgExtremesClimatic UpdateDayFilterPreview() ' For the Min/Max option: - clsGroupByFunction.SetRCommand("instat_calculation$new") + clsGroupByFunction.SetRCommand("instatCalculations::instat_calculation$new") clsGroupByFunction.AddParameter("type", Chr(34) & "by" & Chr(34), iPosition:=0) clsGroupByFunction.SetAssignTo("grouping") clsMinMaxManipulationsFunction.SetRCommand("list") clsMinMaxFuncExp.bToScriptAsRString = True - clsMinMaxSummariseFunction.SetRCommand("instat_calculation$new") + clsMinMaxSummariseFunction.SetRCommand("instatCalculations::instat_calculation$new") clsMinMaxSummariseFunction.AddParameter("type", Chr(34) & "summary" & Chr(34), iPosition:=0) clsMinMaxSummariseFunction.AddParameter("function_exp", clsRFunctionParameter:=clsMinMaxFuncExp, iPosition:=1) clsMinMaxFuncExp.SetRCommand("max") @@ -390,7 +391,7 @@ Public Class dlgExtremesClimatic ' For the Peaks option: clsDayManipulation.SetRCommand("list") clsPeaksFilterOperator.bToScriptAsRString = True - clsPeaksFilterFunction.SetRCommand("instat_calculation$new") + clsPeaksFilterFunction.SetRCommand("instatCalculations::instat_calculation$new") clsPeaksFilterFunction.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsPeaksFilterFunction.AddParameter("function_exp", clsROperatorParameter:=clsPeaksFilterOperator, iPosition:=1) clsPeaksFilterOperator.SetOperation(">=") ' this value changes depending what is selected in the combo box. Temp. fix in a sub, but needs a proper fix. @@ -402,7 +403,7 @@ Public Class dlgExtremesClimatic clsPeaksFilterFunction.SetAssignTo("peak_filter") 'Code for carry columns - clsCombinationCalc.SetRCommand("instat_calculation$new") + clsCombinationCalc.SetRCommand("instatCalculations::instat_calculation$new") clsCombinationCalc.AddParameter("type", Chr(34) & "combination" & Chr(34), iPosition:=0) clsCombinationCalc.AddParameter("sub_calculations", clsRFunctionParameter:=clsCombinationSubCalcs, iPosition:=1) clsCombinationCalc.AddParameter("manipulations", clsRFunctionParameter:=clsCombinationManipulations, iPosition:=2) @@ -419,7 +420,7 @@ Public Class dlgExtremesClimatic clsDateCarryCalcFromList.SetRCommand("list") - clsFirstDateSummary.SetRCommand("instat_calculation$new") + clsFirstDateSummary.SetRCommand("instatCalculations::instat_calculation$new") clsFirstDateSummary.AddParameter("type", Chr(34) & "summary" & Chr(34), iPosition:=0) clsFirstDateSummary.AddParameter("function_exp", clsRFunctionParameter:=clsFirstFunction, iPosition:=1) clsFirstDateSummary.AddParameter("calculated_from", clsRFunctionParameter:=clsDateCarryCalcFromList, iPosition:=2) @@ -430,7 +431,7 @@ Public Class dlgExtremesClimatic clsFirstFunction.SetRCommand("first") clsFirstFunction.bToScriptAsRString = True - clsLastDateSummary.SetRCommand("instat_calculation$new") + clsLastDateSummary.SetRCommand("instatCalculations::instat_calculation$new") clsLastDateSummary.AddParameter("type", Chr(34) & "summary" & Chr(34), iPosition:=0) clsLastDateSummary.AddParameter("function_exp", clsRFunctionParameter:=clsLastFunction, iPosition:=1) clsLastDateSummary.AddParameter("calculated_from", clsRFunctionParameter:=clsDateCarryCalcFromList, iPosition:=2) @@ -441,7 +442,7 @@ Public Class dlgExtremesClimatic clsLastFunction.SetRCommand("last") clsLastFunction.bToScriptAsRString = True - clsNSummary.SetRCommand("instat_calculation$new") + clsNSummary.SetRCommand("instatCalculations::instat_calculation$new") clsNSummary.AddParameter("type", Chr(34) & "summary" & Chr(34), iPosition:=0) clsNSummary.AddParameter("function_exp", clsRFunctionParameter:=clsNFunction, iPosition:=1) clsNSummary.AddParameter("calculated_from", clsRFunctionParameter:=clsDateCarryCalcFromList, iPosition:=2) @@ -451,7 +452,7 @@ Public Class dlgExtremesClimatic clsNFunction.SetRCommand("summary_count_all") clsNFunction.bToScriptAsRString = True - clsFilterExtremeCalc.SetRCommand("instat_calculation$new") + clsFilterExtremeCalc.SetRCommand("instatCalculations::instat_calculation$new") clsFilterExtremeCalc.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsFilterExtremeCalc.AddParameter("function_exp", clsROperatorParameter:=clsFilterExtremeExp, iPosition:=1) clsFilterExtremeCalc.AddParameter("sub_calculations", clsRFunctionParameter:=clsFilterExtremeSubCalcs, iPosition:=2) @@ -463,16 +464,19 @@ Public Class dlgExtremesClimatic clsFilterExtremeExp.SetOperation("==") clsFilterExtremeExp.bToScriptAsRString = True + clsPlotMrlFunction.SetPackageName("instatClimatic") clsPlotMrlFunction.SetRCommand("plot_mrl") clsPlotMrlFunction.AddParameter("ncol", "1", iPosition:=3) clsPlotMrlFunction.SetAssignTo("last_graph", strTempDataframe:=ucrSelectorClimaticExtremes.ucrAvailableDataFrames.cboAvailableDataFrames.Text, strTempGraph:="last_graph") + clsThresholdPlotFunction.SetPackageName("instatClimatic") clsThresholdPlotFunction.SetRCommand("plot_multiple_threshold") clsThresholdPlotFunction.AddParameter("nint", "10", iPosition:=5) clsThresholdPlotFunction.AddParameter("alpha", "0.05", iPosition:=6) clsThresholdPlotFunction.AddParameter("ncol", "1", iPosition:=7) clsThresholdPlotFunction.SetAssignTo("last_graph", strTempDataframe:=ucrSelectorClimaticExtremes.ucrAvailableDataFrames.cboAvailableDataFrames.Text, strTempGraph:="last_graph") + clsDeclusteringFunction.SetPackageName("instatClimatic") clsDeclusteringFunction.SetRCommand("plot_declustered") clsDeclusteringFunction.AddParameter("threshold", "40", iPosition:=3) clsDeclusteringFunction.SetAssignTo("last_graph", strTempDataframe:=ucrSelectorClimaticExtremes.ucrAvailableDataFrames.cboAvailableDataFrames.Text, strTempGraph:="last_graph") diff --git a/instat/dlgFindInVariableOrFilter.vb b/instat/dlgFindInVariableOrFilter.vb index 67bcb0922cc..f325c8a6be8 100644 --- a/instat/dlgFindInVariableOrFilter.vb +++ b/instat/dlgFindInVariableOrFilter.vb @@ -113,6 +113,7 @@ Public Class dlgFindInVariableOrFilter clsGetDataFrameFunction.SetRCommand(frmMain.clsRLink.strInstatDataObject & "$get_data_frame") + clsGetRowHeadersFunction.SetPackageName("instatExtras") clsGetRowHeadersFunction.SetRCommand("getRowHeadersWithText") clsGetRowHeadersFunction.AddParameter("data", clsRFunctionParameter:=clsGetDataFrameFunction, iPosition:=0) clsGetRowHeadersFunction.AddParameter("ignore_case", "TRUE", iPosition:=3) diff --git a/instat/dlgFindNonnumericValues.vb b/instat/dlgFindNonnumericValues.vb index faf95fa7eaf..c46f7aff359 100644 --- a/instat/dlgFindNonnumericValues.vb +++ b/instat/dlgFindNonnumericValues.vb @@ -116,13 +116,13 @@ Public Class dlgFindNonnumericValues clsAsNumericFunction.SetRCommand("as.numeric") clsAsNumericFunction.AddParameter("x", clsRFunctionParameter:=clsAsCharacterFunction, bIncludeArgumentName:=False, iPosition:=1) - clsNonNumericCalcFunc.SetRCommand("instat_calculation$new") + clsNonNumericCalcFunc.SetRCommand("instatCalculations::instat_calculation$new") clsNonNumericCalcFunc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsNonNumericCalcFunc.AddParameter("function_exp", clsROperatorParameter:=clsNotEqualToOperator, iPosition:=1) clsNonNumericCalcFunc.AddParameter("result_name", Chr(34) & strLogicalColumn & Chr(34), iPosition:=3) clsNonNumericCalcFunc.AddParameter("save", 2, iPosition:=4) - clsNonNumericFilterFunc.SetRCommand("instat_calculation$new") + clsNonNumericFilterFunc.SetRCommand("instatCalculations::instat_calculation$new") clsNonNumericFilterFunc.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsNonNumericFilterFunc.AddParameter("sub_calculations", clsRFunctionParameter:=clslSubCalcListFunc, iPosition:=2) clsNonNumericFilterFunc.AddParameter("result_data_frame", Chr(34) & "Filter" & Chr(34), iPosition:=3) diff --git a/instat/dlgFromLibrary.vb b/instat/dlgFromLibrary.vb index 99813d400ab..fc136f97e46 100644 --- a/instat/dlgFromLibrary.vb +++ b/instat/dlgFromLibrary.vb @@ -65,6 +65,7 @@ Public Class dlgFromLibrary lstCollection.HideSelection = False + clsGetPackages.SetPackageName("instatExtras") clsGetPackages.SetRCommand("get_installed_packages_with_data") expPackageNames = frmMain.clsRLink.RunInternalScriptGetValue(clsGetPackages.ToScript(), bSilent:=True) If expPackageNames IsNot Nothing AndAlso expPackageNames.Type <> Internals.SymbolicExpressionType.Null Then diff --git a/instat/dlgHelpVignettes.vb b/instat/dlgHelpVignettes.vb index 0a53e7924d9..02b435842cf 100644 --- a/instat/dlgHelpVignettes.vb +++ b/instat/dlgHelpVignettes.vb @@ -54,6 +54,7 @@ Public Class dlgHelpVignettes ucrChkFunction.SetText("Function Name:") + clsGetPackages.SetPackageName("instatExtras") clsGetPackages.SetRCommand("get_installed_packages_with_data") clsGetPackages.AddParameter("with_data", "FALSE") expPackageNames = frmMain.clsRLink.RunInternalScriptGetValue(clsGetPackages.ToScript(),bSeparateThread:=False, bSilent:=True) diff --git a/instat/dlgImportDataset.vb b/instat/dlgImportDataset.vb index 43aea73de52..7f4db4c780d 100644 --- a/instat/dlgImportDataset.vb +++ b/instat/dlgImportDataset.vb @@ -804,17 +804,16 @@ Public Class dlgImportDataset ElseIf IsJSONFileFormat() Then strRowMaxParamName = "nrows" ElseIf IsExcelFileFormat() Then + bCanImport = True 'assume can still import the entire excel file If dctSelectedExcelSheets.Count = 0 Then lblNoPreview.Show() lblImportingSheets.Show() lblImportingSheets.Text = "No sheet selected." - bCanImport = True 'assume can still import the entire excel file Exit Sub ElseIf dctSelectedExcelSheets.Count > 1 Then lblNoPreview.Show() lblImportingSheets.Show() lblImportingSheets.Text = "Importing the following sheets:" & Environment.NewLine & String.Join(", ", dctSelectedExcelSheets.Values) - bCanImport = True 'assume can import all selected sheets Exit Sub End If strRowMaxParamName = "n_max" @@ -837,6 +836,7 @@ Public Class dlgImportDataset clsTempImport.RemoveAssignTo() + clsAsCharacterFunc.SetPackageName("instatExtras") clsAsCharacterFunc.SetRCommand("convert_to_character_matrix") clsAsCharacterFunc.AddParameter("data", clsRFunctionParameter:=clsTempImport) expTemp = frmMain.clsRLink.RunInternalScriptGetValue(clsAsCharacterFunc.ToScript(), bSilent:=True) diff --git a/instat/dlgInfillMissingValues.vb b/instat/dlgInfillMissingValues.vb index 92c2342f252..750b082e868 100644 --- a/instat/dlgInfillMissingValues.vb +++ b/instat/dlgInfillMissingValues.vb @@ -326,7 +326,7 @@ Public Class dlgInfillMissingValues clsAggregateFunction.SetPackageName("zoo") clsAggregateFunction.SetRCommand("na.aggregate") - clsAggregateFunction.AddParameter("FUN", "summary_mean", iPosition:=2) + clsAggregateFunction.AddParameter("FUN", "databook::summary_mean", iPosition:=2) clsAggregateFunction.AddParameter("maxgap", 10, iPosition:=5) clsSetSeedFunction.SetRCommand("set.seed") diff --git a/instat/dlgInventoryPlot.vb b/instat/dlgInventoryPlot.vb index a9505fc0dc9..88bf81432e7 100644 --- a/instat/dlgInventoryPlot.vb +++ b/instat/dlgInventoryPlot.vb @@ -210,7 +210,9 @@ Public Class dlgInventoryPlot clsInventoryPlot.AddParameter("facet_by", "NULL", iPosition:=8) clsInventoryPlot.SetAssignTo("last_graph", strTempDataframe:=ucrInventoryPlotSelector.ucrAvailableDataFrames.cboAvailableDataFrames.Text, strTempGraph:="last_graph") + clsClimaticMissing.SetPackageName("instatClimatic") clsClimaticMissing.SetRCommand("climatic_missing") + clsClimaticDetails.SetPackageName("instatClimatic") clsClimaticDetails.SetRCommand("climatic_details") clsClimaticDetails.SetAssignTo("detail") @@ -224,6 +226,7 @@ Public Class dlgInventoryPlot clsAddKeyFunction.SetRCommand(frmMain.clsRLink.strInstatDataObject & "$add_key") clsAddKeyFunction.AddParameter("col_names", "key_cols", iPosition:=1) + clsCumulativeInventoryFunction.SetPackageName("instatClimatic") clsCumulativeInventoryFunction.SetRCommand("cumulative_inventory") clsCumulativeInventoryFunction.AddParameter("from", Chr(34) & "From" & Chr(34), iPosition:=1) clsCumulativeInventoryFunction.AddParameter("to", Chr(34) & "To" & Chr(34), iPosition:=2) diff --git a/instat/dlgLinePlot.vb b/instat/dlgLinePlot.vb index bbaa250744c..a255608c6db 100644 --- a/instat/dlgLinePlot.vb +++ b/instat/dlgLinePlot.vb @@ -550,9 +550,11 @@ Public Class dlgLinePlot clsGeomSmoothFunc.SetRCommand("geom_smooth") clsGeomSmoothFunc.AddParameter("method", Chr(34) & "lm" & Chr(34), iPosition:=0) + clsGgSlopeFunction.SetPackageName("instatExtras") clsGgSlopeFunction.SetRCommand("slopegraph") clsGgSlopeFunction.AddParameter("data", clsRFunctionParameter:=ucrLinePlotSelector.ucrAvailableDataFrames.clsCurrDataFrame, iPosition:=0) + clsSlopeThemeFunction.SetPackageName("instatExtras") clsSlopeThemeFunction.SetRCommand("slopegraph_theme") clsFacetFunction.SetPackageName("ggplot2") diff --git a/instat/dlgNewDataFrame.vb b/instat/dlgNewDataFrame.vb index 9670737661b..b7807061b42 100644 --- a/instat/dlgNewDataFrame.vb +++ b/instat/dlgNewDataFrame.vb @@ -141,6 +141,7 @@ Public Class dlgNewDataFrame clsCorporaFunction.SetPackageName("rcorpora") clsCorporaFunction.SetRCommand("corpora") + clsListDfFunction.SetPackageName("instatExtras") clsListDfFunction.SetRCommand("read_corpora") 'e.g of Function to be constructed . data.frame(data=matrix(data = NA,nrow = 10, ncol = 2)) diff --git a/instat/dlgNewMarkovChains.vb b/instat/dlgNewMarkovChains.vb index 123acf9e989..97305baaa6a 100644 --- a/instat/dlgNewMarkovChains.vb +++ b/instat/dlgNewMarkovChains.vb @@ -85,19 +85,19 @@ Public Class dlgNewMarkovChains clsIfelseFunction.SetRCommand("ifelse") clsRunCalcFunction.SetRCommand(frmMain.clsRLink.strInstatDataObject & "$run_instat_calculation") - clsWDCalcFunction.SetRCommand("instat_calculation$new") + clsWDCalcFunction.SetRCommand("instatCalculations::instat_calculation$new") clsWDCalcFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34)) clsLagFunction.SetRCommand("lag") - clsWDCalcFunction.SetRCommand("instat_calculation$new") + clsWDCalcFunction.SetRCommand("instatCalculations::instat_calculation$new") clsWDCalcFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34)) - clsLagCalcFunction.SetRCommand("instat_calculation$new") + clsLagCalcFunction.SetRCommand("instatCalculations::instat_calculation$new") clsLagCalcFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34)) clsSubCalcsList.SetRCommand("list") - clsOverallLagCalcFunction.SetRCommand("instat_calculation$new") + clsOverallLagCalcFunction.SetRCommand("instatCalculations::instat_calculation$new") clsFormulaOp.SetOperation("~") diff --git a/instat/dlgOtherRosePlots.vb b/instat/dlgOtherRosePlots.vb index 527cb0a03bf..1568b782dda 100644 --- a/instat/dlgOtherRosePlots.vb +++ b/instat/dlgOtherRosePlots.vb @@ -182,6 +182,7 @@ Public Class dlgOtherRosePlots ucrInputStatistic.SetName("mean") + clsOtherRosePlots.SetPackageName("instatClimatic") clsOtherRosePlots.SetRCommand("other_rose_plots") clsOtherRosePlots.AddParameter("trans", "TRUE", iPosition:=9) diff --git a/instat/dlgPlotRegion.vb b/instat/dlgPlotRegion.vb index 580ca0ae4f5..e4097c231cf 100644 --- a/instat/dlgPlotRegion.vb +++ b/instat/dlgPlotRegion.vb @@ -68,6 +68,7 @@ Public Class dlgPlotRegion ucrReceiverLongitude.SetMeAsReceiver() clsPlotRegionFunction.AddParameter("time_point", clsRFunctionParameter:=clsAsDateTimeFunction, iPosition:=5) + clsPlotRegionFunction.SetPackageName("instatClimatic") clsPlotRegionFunction.SetRCommand("plot.region") clsAsDateTimeFunction.SetRCommand("as.Date") clsSequenceFunction.SetRCommand("seq") diff --git a/instat/dlgRPackages.vb b/instat/dlgRPackages.vb index 31635f28e95..e3fca1fc23f 100644 --- a/instat/dlgRPackages.vb +++ b/instat/dlgRPackages.vb @@ -118,9 +118,11 @@ Public Class dlgInstallRPackage Dim chrOutput As CharacterVector If rdoCRAN.Checked Then + clsPackageCheck.SetPackageName("instatExtras") clsPackageCheck.SetRCommand("package_check") clsPackageCheck.AddParameter("package", Chr(34) & ucrInputTextBoxRPackage.GetText() & Chr(34)) ElseIf rdoRPackage.Checked Then + clsPackageCheck.SetPackageName("instatExtras") clsPackageCheck.SetRCommand("check_github_repo") clsPackageCheck.AddParameter("repo", Chr(34) & ucrInputPackage.GetText() & Chr(34)) clsPackageCheck.AddParameter("owner", Chr(34) & ucrInputRepositoryName.GetText() & Chr(34)) diff --git a/instat/dlgSPI.vb b/instat/dlgSPI.vb index 2ab81117dfc..e6e8441c587 100644 --- a/instat/dlgSPI.vb +++ b/instat/dlgSPI.vb @@ -175,9 +175,11 @@ Public Class dlgSPI clsSpeiFunction.AddParameter("kernel", clsRFunctionParameter:=clsListFunction, iPosition:=2) clsSpeiFunction.SetAssignTo("last_model", strTempDataframe:=ucrSelectorVariable.ucrAvailableDataFrames.cboAvailableDataFrames.Text, strTempModel:="last_model", bAssignToIsPrefix:=True) + clsSpeiInputFunction.SetPackageName("instatClimatic") clsSpeiInputFunction.SetRCommand("spei_input") clsSpeiInputFunction.SetAssignTo("data_ts") + clsSpeiOutputFunction.SetPackageName("instatClimatic") clsSpeiOutputFunction.SetRCommand("spei_output") clsPlotFunction.SetRCommand("plot") diff --git a/instat/dlgScript.vb b/instat/dlgScript.vb index d45ab14efed..9ddb65776a8 100644 --- a/instat/dlgScript.vb +++ b/instat/dlgScript.vb @@ -169,6 +169,7 @@ Public Class dlgScript Private Function GetPackages() As String() Dim arrAvailablePackages() As String = {} Dim clsGetPackages As New RFunction + clsGetPackages.SetPackageName("instatExtras") clsGetPackages.SetRCommand("get_installed_packages_with_data") clsGetPackages.AddParameter("with_data", "FALSE") Dim expPackageNames As SymbolicExpression = frmMain.clsRLink.RunInternalScriptGetValue(clsGetPackages.ToScript(), bSeparateThread:=False, bSilent:=True) @@ -656,6 +657,7 @@ Public Class dlgScript Try Dim clsLibraryExpFunction As New RFunction + clsLibraryExpFunction.SetPackageName("instatExtras") clsLibraryExpFunction.SetRCommand("getExample") clsLibraryExpFunction.AddParameter("package", Chr(34) & ucrCboExamplePackages.GetText() & Chr(34), iPosition:=1) clsLibraryExpFunction.AddParameter("topic", Chr(34) & strTopic & Chr(34), iPosition:=0) diff --git a/instat/dlgSeasonalPlot.vb b/instat/dlgSeasonalPlot.vb index fa976b592a8..674c4ecbea8 100644 --- a/instat/dlgSeasonalPlot.vb +++ b/instat/dlgSeasonalPlot.vb @@ -435,6 +435,7 @@ Public Class dlgSeasonalPlot clsPasteFunction.AddParameter("y", Chr(34) & "y~" & Chr(34), bIncludeArgumentName:=False, iPosition:=0) clsPasteFunction.AddParameter("fourier", clsRFunctionParameter:=clsFourierSeriesFunction, bIncludeArgumentName:=False, iPosition:=1) + clsFourierSeriesFunction.SetPackageName("instatClimatic") clsFourierSeriesFunction.SetRCommand("fourier_series") clsFourierSeriesFunction.AddParameter("x", Chr(34) & "x" & Chr(34), bIncludeArgumentName:=False, iPosition:=0) clsFourierSeriesFunction.AddParameter("n", 3, iPosition:=1) diff --git a/instat/dlgSelectColumns.vb b/instat/dlgSelectColumns.vb index 20e1bb3abc9..128ec9d0f5f 100644 --- a/instat/dlgSelectColumns.vb +++ b/instat/dlgSelectColumns.vb @@ -213,15 +213,15 @@ Public Class dlgSelectColumns ElseIf strValue = "Logical" Then clsParametersList.AddParameter("fn", "is.logical", iPosition:=0) ElseIf strValue = "Variable label" Then - clsParametersList.AddParameter("fn", "is.containVariableLabel", iPosition:=0) + clsParametersList.AddParameter("fn", "instatExtras::is.containVariableLabel", iPosition:=0) ElseIf strValue = "Value label" Then - clsParametersList.AddParameter("fn", "is.containValueLabel", iPosition:=0) + clsParametersList.AddParameter("fn", "instatExtras::is.containValueLabel", iPosition:=0) ElseIf strValue = "Empty columns" Then - clsParametersList.AddParameter("fn", "is.emptyvariable", iPosition:=0) + clsParametersList.AddParameter("fn", "instatExtras::is.emptyvariable", iPosition:=0) ElseIf strValue = "NA columns" Then - clsParametersList.AddParameter("fn", "is.NAvariable", iPosition:=0) + clsParametersList.AddParameter("fn", "instatExtras::is.NAvariable", iPosition:=0) ElseIf strValue = "Partially labelled" Then - clsParametersList.AddParameter("fn", "is.containPartialValueLabel", iPosition:=0) + clsParametersList.AddParameter("fn", "instatExtras::is.containPartialValueLabel", iPosition:=0) End If Case "Last column" strValue = "Last column" diff --git a/instat/dlgSpells.vb b/instat/dlgSpells.vb index 81b979c7045..47817d1b784 100644 --- a/instat/dlgSpells.vb +++ b/instat/dlgSpells.vb @@ -176,6 +176,7 @@ Public Class dlgSpells ' key clsDayFilterCalcFromConvert = New RFunction + clsDayFilterCalcFromConvert.SetPackageName("databook") clsDayFilterCalcFromConvert.SetRCommand("calc_from_convert") clsDayFilterCalcFromList = New RFunction clsDayFilterCalcFromList.SetRCommand("list") @@ -183,7 +184,7 @@ Public Class dlgSpells 'DayFromandTo clsDayFromAndToOperator.bToScriptAsRString = True - clsDayFilter.SetRCommand("instat_calculation$new") + clsDayFilter.SetRCommand("instatCalculations::instat_calculation$new") clsDayFilter.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsDayFilter.AddParameter("function_exp", clsROperatorParameter:=clsDayFromAndToOperator, iPosition:=1) clsDayFromAndToOperator.SetOperation("&") @@ -198,16 +199,16 @@ Public Class dlgSpells clsDayFilter.AddParameter("calculated_from", clsRFunctionParameter:=clsDayFilterCalcFromConvert, iPosition:=2) ' group - clsGroupBy.SetRCommand("instat_calculation$new") + clsGroupBy.SetRCommand("instatCalculations::instat_calculation$new") clsGroupBy.AddParameter("type", Chr(34) & "by" & Chr(34)) clsGroupBy.SetAssignTo("grouping") - clsGroupByStation.SetRCommand("instat_calculation$new") + clsGroupByStation.SetRCommand("instatCalculations::instat_calculation$new") clsGroupByStation.AddParameter("type", Chr(34) & "by" & Chr(34), iPosition:=0) clsGroupByStation.SetAssignTo("group_by_station") ' rain_day - clsSpellLogicalCalc.SetRCommand("instat_calculation$new") + clsSpellLogicalCalc.SetRCommand("instatCalculations::instat_calculation$new") clsSpellLogicalCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsSpellLogicalCalc.AddParameter("function_exp", clsROperatorParameter:=clsSpellLogicalAndOperator, iPosition:=1) clsSpellLogicalCalc.AddParameter("result_name", Chr(34) & strSpellLogical & Chr(34), iPosition:=2) @@ -225,7 +226,7 @@ Public Class dlgSpells clsLessThanOperator.SetOperation("<") ' Spell Length - clsSpellLength.SetRCommand("instat_calculation$new") + clsSpellLength.SetRCommand("instatCalculations::instat_calculation$new") clsSpellLength.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsSpellLength.AddParameter("result_name", Chr(34) & strSpellName & Chr(34), iPosition:=2) clsSpellLength.AddParameter("function_exp", clsRFunctionParameter:=clsSpellsFunction) @@ -237,7 +238,7 @@ Public Class dlgSpells clsSpellsManipulationsFunc.SetRCommand("list") clsSpellsManipulationsFunc.AddParameter("group_by_year", clsRFunctionParameter:=clsGroupBy, bIncludeArgumentName:=False, iPosition:=0) - clsSpellsLogicalCalc.SetRCommand("instat_calculation$new") + clsSpellsLogicalCalc.SetRCommand("instatCalculations::instat_calculation$new") clsSpellsLogicalCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsSpellsLogicalCalc.AddParameter("function_exp", clsROperatorParameter:=clsSpellLogicalAndOperator, iPosition:=1) clsSpellsLogicalCalc.AddParameter("result_name", Chr(34) & strSpellDay & Chr(34), iPosition:=2) @@ -246,7 +247,7 @@ Public Class dlgSpells clsSpellManipulationsFunc.SetRCommand("list") clsSpellManipulationsFunc.AddParameter("group_by_station", clsRFunctionParameter:=clsGroupByStation, bIncludeArgumentName:=False, iPosition:=0) - 'clsSpellsLogCalcFunc.SetRCommand("instat_calculation$new") + 'clsSpellsLogCalcFunc.SetRCommand("instatCalculations::instat_calculation$new") 'clsSpellsLogCalcFunc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) 'clsSpellsLogCalcFunc.AddParameter("function_exp", clsROperatorParameter:=clsSpellLogicalAndOperator, iPosition:=1) 'clsSpellsLogCalcFunc.AddParameter("result_name", Chr(34) & strSpellDay & Chr(34), iPosition:=2) @@ -258,10 +259,11 @@ Public Class dlgSpells clsDotSpellsFunction.bToScriptAsRString = True - clsDotSpellsFunction.SetRCommand(".spells") + clsDotSpellsFunction.SetPackageName("instatClimatic") + clsDotSpellsFunction.SetRCommand("spells") clsDotSpellsFunction.AddParameter("x", clsROperatorParameter:=clsSpellLogicalAndOperator, iPosition:=0) - clsSpellFunction.SetRCommand("instat_calculation$new") + clsSpellFunction.SetRCommand("instatCalculations::instat_calculation$new") clsSpellFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsSpellFunction.AddParameter("function_exp", clsRFunctionParameter:=clsDotSpellsFunction, iPosition:=1) ' changes depending on the rdo clsSpellFunction.AddParameter("result_name", Chr(34) & "spell" & Chr(34), iPosition:=2) @@ -272,7 +274,7 @@ Public Class dlgSpells clsRSpellFilterSubFunct.SetRCommand("list") clsRSpellFilterSubFunct.AddParameter("sub1", clsRFunctionParameter:=clsSpellFunction, bIncludeArgumentName:=False, iPosition:=0) - clsSpellFilterFunction.SetRCommand("instat_calculation$new") + clsSpellFilterFunction.SetRCommand("instatCalculations::instat_calculation$new") clsSpellFilterFunction.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsSpellFilterFunction.AddParameter("function_exp", Chr(34) & "dplyr::lead(c(NA,diff(spell)))<0" & Chr(34), iPosition:=1) ' changes depending on the rdo clsSpellFilterFunction.AddParameter("sub_calculations", clsRFunctionParameter:=clsRSpellFilterSubFunct, iPosition:=2) @@ -281,7 +283,7 @@ Public Class dlgSpells ' Additional Checkbox 'clsAdditionalConditionReplaceFunction.bToScriptAsRString = True - 'clsAdditionalCondition.SetRCommand("instat_calculation$new") + 'clsAdditionalCondition.SetRCommand("instatCalculations::instat_calculation$new") 'clsAdditionalCondition.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) 'clsAdditionalCondition.AddParameter("function_exp", clsRFunctionParameter:=clsAdditionalConditionReplaceFunction, iPosition:=1) 'clsAdditionalCondition.AddParameter("result_name", Chr(34) & strSpellDay & Chr(34), iPosition:=2) @@ -295,7 +297,7 @@ Public Class dlgSpells 'clsAdditionalCondition.AddParameter("sub_calculation", clsRFunctionParameter:=clsAdditionalConditionList) 'Max Summary - clsMaxSpellSummary.SetRCommand("instat_calculation$new") + clsMaxSpellSummary.SetRCommand("instatCalculations::instat_calculation$new") clsMaxSpellSummary.AddParameter("type", Chr(34) & "summary" & Chr(34), iPosition:=0) clsMaxSpellSummary.AddParameter("function_exp", clsRFunctionParameter:=clsMaxFunction, iPosition:=1) clsMaxSpellSummary.AddParameter("save", 2, iPosition:=6) @@ -316,7 +318,8 @@ Public Class dlgSpells clsMaxSpellManipulation.AddParameter("manip3", clsRFunctionParameter:=clsDayFilter, bIncludeArgumentName:=False, iPosition:=2) clsSpellsFunction.bToScriptAsRString = True - clsSpellsFunction.SetRCommand(".spells") + clsSpellsFunction.SetPackageName("instatClimatic") + clsSpellsFunction.SetRCommand("spells") clsSpellsFunction.AddParameter("x", strSpellLogical) diff --git a/instat/dlgStack.vb b/instat/dlgStack.vb index e52428347c6..d9daffaac1a 100644 --- a/instat/dlgStack.vb +++ b/instat/dlgStack.vb @@ -254,6 +254,7 @@ Public Class dlgStack clsReshapeFunction.AddParameter("direction", Chr(34) & "long" & Chr(34), iPosition:=4) clsReshapeFunction.AddParameter("idvar", Chr(34) & "id" & Chr(34), iPosition:=5) + clsSplitColumnsFunction.SetPackageName("instatExtras") clsSplitColumnsFunction.SetRCommand("split_items_in_groups") clsSplitColumnsFunction.AddParameter("num", 2) diff --git a/instat/dlgStartofRains.vb b/instat/dlgStartofRains.vb index a64ddd626b0..83dc6ee3e72 100644 --- a/instat/dlgStartofRains.vb +++ b/instat/dlgStartofRains.vb @@ -362,6 +362,7 @@ Public Class dlgStartofRains bResetSubdialog = True ucrSelectorForStartofRains.Reset() + clsDayFilterCalcFromConvert.SetPackageName("databook") clsDayFilterCalcFromConvert.SetRCommand("calc_from_convert") clsDayFilterCalcFromList.SetRCommand("list") @@ -372,6 +373,7 @@ Public Class dlgStartofRains clsDummyFunction.AddParameter("additional", "False", iPosition:=2) 'Day From and To + clsDayFromAndTo.SetPackageName("instatCalculations") clsDayFromAndTo.SetRCommand("instat_calculation$new") clsDayFromAndTo.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsDayFromAndTo.AddParameter("function_exp", clsROperatorParameter:=clsDayFromAndToOperator, iPosition:=1) @@ -391,11 +393,13 @@ Public Class dlgStartofRains clsDayToOperator.AddParameter("to", 366, iPosition:=1) ' group_by_station + clsGroupByStation.SetPackageName("instatCalculations") clsGroupByStation.SetRCommand("instat_calculation$new") clsGroupByStation.AddParameter("type", Chr(34) & "by" & Chr(34), iPosition:=0) clsGroupByStation.SetAssignTo("grouping_by_station") ' group_by_year + clsGroupByYear.SetPackageName("instatCalculations") clsGroupByYear.SetRCommand("instat_calculation$new") clsGroupByYear.AddParameter("type", Chr(34) & "by" & Chr(34), iPosition:=0) clsGroupByYear.SetAssignTo("grouping_by_year") @@ -413,6 +417,7 @@ Public Class dlgStartofRains clsConvertColumnType1Function.AddParameter("to_type", strYearType, iPosition:=2) 'TOTAL RAIN: associated with ucrChkTotalRainfall + clsCalcRainRollingSum.SetPackageName("instatCalculations") clsCalcRainRollingSum.SetRCommand("instat_calculation$new") clsCalcRainRollingSum.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsCalcRainRollingSum.AddParameter("function_exp", clsRFunctionParameter:=clsRainRollingSumFunction, iPosition:=1) @@ -434,6 +439,7 @@ Public Class dlgStartofRains clsIsNaFirstRollSumRain.AddParameter("x", clsRFunctionParameter:=clsFirstRollSumRain, iPosition:=0) ' RAINY DAY associated with ucrChkNumberOfRainyDays + clsCalcRainDay.SetPackageName("instatCalculations") clsCalcRainDay.SetRCommand("instat_calculation$new") clsCalcRainDay.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsCalcRainDay.AddParameter("function_exp", clsROperatorParameter:=clsRainDayOperator, iPosition:=1) @@ -444,6 +450,7 @@ Public Class dlgStartofRains clsRainDayOperator.bToScriptAsRString = True clsRainDayOperator.AddParameter("threshold", 0.85, iPosition:=1) + clsCalcRainDayRollingSum.SetPackageName("instatCalculations") clsCalcRainDayRollingSum.SetRCommand("instat_calculation$new") clsCalcRainDayRollingSum.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsCalcRainDayRollingSum.AddParameter("function_exp", clsRFunctionParameter:=clsRainDayRollingSumFunction, iPosition:=1) @@ -476,6 +483,7 @@ Public Class dlgStartofRains clsListToTalRain.SetRCommand("list") 'SOR_Filter + clsSORFilter.SetPackageName("instatCalculations") clsSORFilter.SetRCommand("instat_calculation$new") clsSORFilter.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsSORFilter.AddParameter("function_exp", clsROperatorParameter:=clsLessFilterOperator, iPosition:=1) @@ -502,6 +510,7 @@ Public Class dlgStartofRains clsTRWetSpellFunction.bToScriptAsRString = True clsTRWetSpellList.SetRCommand("list") clsTRWetSpellList.AddParameter("sub1", clsRFunctionParameter:=clsCalcRainRollingSum, bIncludeArgumentName:=False) + clsTRWetSpell.SetPackageName("instatCalculations") clsTRWetSpell.SetRCommand("instat_calculation$new") clsTRWetSpell.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsTRWetSpell.AddParameter("function_exp", clsRFunctionParameter:=clsTRWetSpellFunction, iPosition:=1) @@ -514,6 +523,7 @@ Public Class dlgStartofRains clsTRWetSpell.SetAssignTo("total_rainfall_wet_spell") 'DRY SPELL associated with ucrChkDrySpell + clsCalcDrySpell.SetPackageName("instatCalculations") clsCalcDrySpell.SetRCommand("instat_calculation$new") clsCalcDrySpell.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsCalcDrySpell.AddParameter("function_exp", clsRFunctionParameter:=clsSpellsFunction, iPosition:=1) @@ -524,7 +534,9 @@ Public Class dlgStartofRains clsDrySpellSubCalcList.SetRCommand("list") clsDrySpellSubCalcList.AddParameter("sub1", clsRFunctionParameter:=clsCalcRainDay, bIncludeArgumentName:=False) - clsSpellsFunction.SetRCommand(".spells") + + clsSpellsFunction.SetPackageName("instatClimatic") + clsSpellsFunction.SetRCommand("spells") clsSpellsFunction.AddParameter("x", clsROperatorParameter:=clsRainDaySpellsOperator, iPosition:=0) clsSpellsFunction.bToScriptAsRString = True @@ -532,6 +544,7 @@ Public Class dlgStartofRains clsRainDaySpellsOperator.AddParameter("0", strRainDay, iPosition:=0) clsRainDaySpellsOperator.AddParameter("1", "0", iPosition:=1) + clsCalcDrySpellRollMax.SetPackageName("instatCalculations") clsCalcDrySpellRollMax.SetRCommand("instat_calculation$new") clsCalcDrySpellRollMax.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsCalcDrySpellRollMax.AddParameter("function_exp", clsRFunctionParameter:=clsDrySpellPeriodLeadFunction, iPosition:=1) @@ -560,6 +573,7 @@ Public Class dlgStartofRains clsIsNaFirstDrySpell.SetRCommand("is.na") clsIsNaFirstDrySpell.AddParameter("x", clsRFunctionParameter:=clsFirstDrySpell, iPosition:=0) + clsFractionEvapFunction.SetPackageName("instatCalculations") clsFractionEvapFunction.SetRCommand("instat_calculation$new") clsFractionEvapFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsFractionEvapFunction.AddParameter("function_exp", clsROperatorParameter:=clsEvapOperator, iPosition:=1) @@ -569,6 +583,7 @@ Public Class dlgStartofRains clsEvapOperator.SetOperation("*") clsEvapOperator.bToScriptAsRString = True + clsSumEvapFunction.SetPackageName("instatCalculations") clsSumEvapFunction.SetRCommand("instat_calculation$new") clsSumEvapFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsSumEvapFunction.AddParameter("function_exp", clsRFunctionParameter:=clsRollEvaporationFunction, iPosition:=1) @@ -587,6 +602,7 @@ Public Class dlgStartofRains clsRollEvaporationFunction.bToScriptAsRString = True 'DRY PERIOD + clsCalcRainRollingSumDryPeriod.SetPackageName("instatCalculations") clsCalcRainRollingSumDryPeriod.SetRCommand("instat_calculation$new") clsCalcRainRollingSumDryPeriod.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsCalcRainRollingSumDryPeriod.AddParameter("function_exp", clsRFunctionParameter:=clsLeadRollingSumRainDryPeriodFunction, iPosition:=1) @@ -602,6 +618,7 @@ Public Class dlgStartofRains clsRollingSumRainDryPeriodFunction.AddParameter("n", 30, iPosition:=1) clsRollingSumRainDryPeriodFunction.AddParameter("fill", "NA", iPosition:=2) + clsCalcRollSumNumberDryPeriod.SetPackageName("instatCalculations") clsCalcRollSumNumberDryPeriod.SetRCommand("instat_calculation$new") clsCalcRollSumNumberDryPeriod.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsCalcRollSumNumberDryPeriod.AddParameter("function_exp", clsRFunctionParameter:=clsRollSumNumberDryPeriodFunction, iPosition:=1) @@ -640,6 +657,7 @@ Public Class dlgStartofRains clsIsNaFirstDryPeriod.AddParameter("x", clsRFunctionParameter:=clsFirstDryPeriod, iPosition:=0) ' Combined filter + clsConditionsFilter.SetPackageName("instatCalculations") clsConditionsFilter.SetRCommand("instat_calculation$new") clsConditionsFilter.AddParameter("type", Chr(34) & "filter" & Chr(34), iPosition:=0) clsConditionsFilter.AddParameter("function_exp", clsROperatorParameter:=clsConditionsOrOverallOperator, iPosition:=1) @@ -689,6 +707,7 @@ Public Class dlgStartofRains clsDPCombineOperator.AddParameter("dp_max", 0, iPosition:=1) ' First DOY + clsCalcStartDOY.SetPackageName("instatCalculations") clsCalcStartDOY.SetRCommand("instat_calculation$new") clsCalcStartDOY.AddParameter("type", Chr(34) & "summary" & Chr(34), iPosition:=0) clsCalcStartDOY.AddParameter("function_exp", clsRFunctionParameter:=clsIfelseStartDOY, iPosition:=1) @@ -716,6 +735,7 @@ Public Class dlgStartofRains clsFirstRain.SetRCommand("first") 'First Date + clsCalcStartDate.SetPackageName("instatCalculations") clsCalcStartDate.SetRCommand("instat_calculation$new") clsCalcStartDate.AddParameter("type", Chr(34) & "summary" & Chr(34), iPosition:=0) clsCalcStartDate.AddParameter("function_exp", clsRFunctionParameter:=clsIfelseStartDate, iPosition:=1) @@ -735,6 +755,7 @@ Public Class dlgStartofRains clsFirstDate.AddParameter("default", "NA", iPosition:=1) ' Status + clsCalcStatus.SetPackageName("instatCalculations") clsCalcStatus.SetRCommand("instat_calculation$new") clsCalcStatus.AddParameter("type", Chr(34) & "summary" & Chr(34), iPosition:=0) clsCalcStatus.AddParameter("function_exp", clsRFunctionParameter:=clsIfelseStatusFunction, iPosition:=1) @@ -761,6 +782,7 @@ Public Class dlgStartofRains clsIsNAStatusFunction.AddParameter("x", strRollSumRain, iPosition:=0, bIncludeArgumentName:=False) 'Combination + clsCombinationCalc.SetPackageName("instatCalculations") clsCombinationCalc.SetRCommand("instat_calculation$new") clsCombinationCalc.AddParameter("type", Chr(34) & "combination" & Chr(34), iPosition:=0) clsCombinationCalc.AddParameter("manipulations", clsRFunctionParameter:=clsCombinationManipList, iPosition:=1) @@ -816,7 +838,7 @@ Public Class dlgStartofRains clsCalculatedListformFunction.SetRCommand("c") clsCalculatedListformFunction.AddParameter("x", clsRFunctionParameter:=clsSetnameRainStatusFunction, iPosition:=0, bIncludeArgumentName:=False) - clsCalculatedListformFunction.AddParameter("y", clsRFunctionParameter:=clsSetNameRainFunction, iPosition:=1, bIncludeArgumentName:=False) + clsCalculatedListformFunction.AddParameter("y", clsRFunctionParameter:=clsSetnameRainFunction, iPosition:=1, bIncludeArgumentName:=False) clsCalculatedListformFunction.SetAssignTo(strcalculatedfromlist) clsSetnameRainStatusFunction.SetRCommand("setNames") @@ -827,6 +849,7 @@ Public Class dlgStartofRains clsSetnameRainFunction.AddParameter("x", Chr(34) & strStartDoy & Chr(34), iPosition:=0, bIncludeArgumentName:=False) clsSetnameRainFunction.AddParameter("y", "linked_data_name", iPosition:=1, bIncludeArgumentName:=False) + clsStatRainStatus2Function.SetPackageName("instatCalculations") clsStatRainStatus2Function.SetRCommand("instat_calculation$new") clsStatRainStatus2Function.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsStatRainStatus2Function.AddParameter("function_exp", clsRFunctionParameter:=clsIfElseStatus2Function, iPosition:=1) @@ -843,6 +866,7 @@ Public Class dlgStartofRains clsNastatus2Function.SetRCommand("!is.na") clsNastatus2Function.AddParameter("x", strStartDoy, iPosition:=0, bIncludeArgumentName:=False) + clsStartRainCombine2Function.SetPackageName("instatCalculations") clsStartRainCombine2Function.SetRCommand("instat_calculation$new") clsStartRainCombine2Function.AddParameter("type", Chr(34) & "combination" & Chr(34), iPosition:=0) clsStartRainCombine2Function.AddParameter("sub_calculations", clsRFunctionParameter:=clsListFunction, iPosition:=1) diff --git a/instat/dlgTimeSeriesPlot.vb b/instat/dlgTimeSeriesPlot.vb index aa55864e5f6..4e6e06fb1f2 100644 --- a/instat/dlgTimeSeriesPlot.vb +++ b/instat/dlgTimeSeriesPlot.vb @@ -433,6 +433,7 @@ Public Class dlgTimeSeriesPlot clsSd.AddParameter("x", strValue, iPosition:=0) clsSd.AddParameter("na.rm", "TRUE", iPosition:=1) + clsSlope.SetPackageName("instatExtras") clsSlope.SetRCommand("slope") clsSlope.AddParameter("y", strValue, iPosition:=0) diff --git a/instat/dlgTransformClimatic.vb b/instat/dlgTransformClimatic.vb index 1ce2dca1340..6bfcd9f1c0b 100644 --- a/instat/dlgTransformClimatic.vb +++ b/instat/dlgTransformClimatic.vb @@ -176,10 +176,10 @@ Public Class dlgTransformClimatic ttRdoGdd.SetToolTip(rdoGrowingDegreeDays, "Growing (or Cooling) Degree Days. If the baseline = 15 degrees, then GDD = (tmean - 15), or 0 if tmean is less than 15") ttRdoMgdd.SetToolTip(rdoModifiedGDD, "Modified GDD is just GDD if tmean is less than the upper limit. If baseline = 15 degrees and limit = 30 degrees, then Modified GDD = 30 - 15 if tmean is more than 30 degrees.") - 'ucrPnlTransform.AddParameterValueFunctionNamesCondition(rdoMoving, "sub1", "instat_calculation$new", False) ' clsRRainday - 'ucrPnlTransform.AddParameterValueFunctionNamesCondition(rdoCount, "sub1", "instat_calculation$new") - 'ucrPnlTransform.AddParameterValueFunctionNamesCondition(rdoSpell, "sub1", "instat_calculation$new") - 'ucrPnlTransform.AddParameterValueFunctionNamesCondition(rdoWaterBalance, "sub1", "instat_calculation$new", False) + 'ucrPnlTransform.AddParameterValueFunctionNamesCondition(rdoMoving, "sub1", "instatCalculations::instat_calculation$new", False) ' clsRRainday + 'ucrPnlTransform.AddParameterValueFunctionNamesCondition(rdoCount, "sub1", "instatCalculations::instat_calculation$new") + 'ucrPnlTransform.AddParameterValueFunctionNamesCondition(rdoSpell, "sub1", "instatCalculations::instat_calculation$new") + 'ucrPnlTransform.AddParameterValueFunctionNamesCondition(rdoWaterBalance, "sub1", "instatCalculations::instat_calculation$new", False) ' Setting receivers ucrReceiverStation.Selector = ucrSelectorTransform @@ -502,7 +502,7 @@ Public Class dlgTransformClimatic clsDoyFilterCalcFromList.Clear() ' Count and Spells: Rainday - clsRRainday.SetRCommand("instat_calculation$new") + clsRRainday.SetRCommand("instatCalculations::instat_calculation$new") clsRRainday.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsRRainday.AddParameter("function_exp", clsROperatorParameter:=clsRRaindayAndOperator, iPosition:=1) clsRRainday.AddParameter("result_name", Chr(34) & strRainDay & Chr(34), iPosition:=2) @@ -557,7 +557,7 @@ Public Class dlgTransformClimatic clsRollConsecutiveSumFunction.SetRCommand("rollapply") clsRollConsecutiveSumFunction.AddParameter("align", Chr(39) & "left" & Chr(39)) clsRollConsecutiveSumFunction.AddParameter("width", 21, iPosition:=1) - clsRollConsecutiveSumFunction.AddParameter("FUN", "max_consecutive_sum") + clsRollConsecutiveSumFunction.AddParameter("FUN", "instatExtras::max_consecutive_sum") clsRollConsecutiveSumFunction.AddParameter("fill", "NA") clsRollConsecutiveSumFunction.AddParameter("data", strRainDay) @@ -578,11 +578,11 @@ Public Class dlgTransformClimatic clsRasterFuction.AddParameter("na.rm", "TRUE", iPosition:=5) clsRasterFuction.bToScriptAsRString = True - clsGroupByStationCalc.SetRCommand("instat_calculation$new") + clsGroupByStationCalc.SetRCommand("instatCalculations::instat_calculation$new") clsGroupByStationCalc.AddParameter("type", Chr(34) & "by" & Chr(34), iPosition:=0) clsGroupByStationCalc.SetAssignTo("grouping_by_station") - clsGroupByStationYearCalc.SetRCommand("instat_calculation$new") + clsGroupByStationYearCalc.SetRCommand("instatCalculations::instat_calculation$new") clsGroupByStationYearCalc.AddParameter("type", Chr(34) & "by" & Chr(34), iPosition:=0) clsGroupByStationYearCalc.SetAssignTo("grouping_by_station_year") @@ -601,6 +601,7 @@ Public Class dlgTransformClimatic clsDayToOperator.SetOperation("<=") clsDayToOperator.AddParameter("to", 366, iPosition:=1) + clsDoyFilterCalcFromConvert.SetPackageName("databook") clsDoyFilterCalcFromConvert.SetRCommand("calc_from_convert") clsDoyFilterCalcFromConvert.AddParameter("x", clsRFunctionParameter:=clsDoyFilterCalcFromList, iPosition:=0) @@ -620,7 +621,7 @@ Public Class dlgTransformClimatic clsEndSeasonWBConditionOperator.AddParameter("0", strWB, iPosition:=0) clsEndSeasonWBConditionOperator.AddParameter("1", "0.5", iPosition:=1) - clsEndSeasonWBCalc.SetRCommand("instat_calculation$new") + clsEndSeasonWBCalc.SetRCommand("instatCalculations::instat_calculation$new") clsEndSeasonWBCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsEndSeasonWBCalc.AddParameter("function_exp", clsRFunctionParameter:=clsRoundFunction, iPosition:=1) clsEndSeasonWBCalc.AddParameter("save", "2", iPosition:=2) @@ -651,6 +652,7 @@ Public Class dlgTransformClimatic clsWBOperator1.bSpaceAroundOperation = True clsWBOperator1.bBrackets = False + clsWBEvaporationMinFunction.SetPackageName("instatClimatic") clsWBEvaporationMinFunction.SetRCommand("WB_evaporation") clsWBEvaporationMinFunction.AddParameter("water_balance", "..1", iPosition:=0, bIncludeArgumentName:=False) clsWBEvaporationMinFunction.AddParameter("y", "..2", iPosition:=4, bIncludeArgumentName:=False) @@ -665,14 +667,14 @@ Public Class dlgTransformClimatic clsIfElseRainMaxFunction.AddParameter("yes", 100, iPosition:=0) ' Water Balance min - clsEndSeasonWBMinCalc.SetRCommand("instat_calculation$new") + clsEndSeasonWBMinCalc.SetRCommand("instatCalculations::instat_calculation$new") clsEndSeasonWBMinCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsEndSeasonWBMinCalc.AddParameter("function_exp", clsRFunctionParameter:=clsReduceWBMinFunction, iPosition:=1) clsEndSeasonWBMinCalc.AddParameter("result_name", Chr(34) & strWBMin & Chr(34), iPosition:=2) clsEndSeasonWBMinCalc.AddParameter("sub_calculations", clsRFunctionParameter:=clsEndSeasonWBMinCalcSubCalcsList, iPosition:=3) clsEndSeasonWBMinCalc.SetAssignTo(strWBMin) - clsEndSeasonRainMinCalc.SetRCommand("instat_calculation$new") + clsEndSeasonRainMinCalc.SetRCommand("instatCalculations::instat_calculation$new") clsEndSeasonRainMinCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsEndSeasonRainMinCalc.AddParameter("function_exp", clsRFunctionParameter:=clsIfElseRainMinFunction, iPosition:=1) clsEndSeasonRainMinCalc.AddParameter("result_name", Chr(34) & strRainMin & Chr(34), iPosition:=2) @@ -739,14 +741,14 @@ Public Class dlgTransformClimatic clsWBMinEvapOperator.AddParameter("value", "5", iPosition:=1) ' Water Balance max - clsEndSeasonWBMaxCalc.SetRCommand("instat_calculation$new") + clsEndSeasonWBMaxCalc.SetRCommand("instatCalculations::instat_calculation$new") clsEndSeasonWBMaxCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsEndSeasonWBMaxCalc.AddParameter("function_exp", clsRFunctionParameter:=clsReduceWBMaxFunction, iPosition:=1) clsEndSeasonWBMaxCalc.AddParameter("result_name", Chr(34) & strWBMax & Chr(34), iPosition:=2) clsEndSeasonWBMaxCalc.AddParameter("sub_calculations", clsRFunctionParameter:=clsEndSeasonWBMaxCalcSubCalcsList, iPosition:=3) clsEndSeasonWBMaxCalc.SetAssignTo(strWBMax) - clsEndSeasonRainMaxCalc.SetRCommand("instat_calculation$new") + clsEndSeasonRainMaxCalc.SetRCommand("instatCalculations::instat_calculation$new") clsEndSeasonRainMaxCalc.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsEndSeasonRainMaxCalc.AddParameter("function_exp", clsRFunctionParameter:=clsIfElseRainMaxFunction, iPosition:=1) clsEndSeasonRainMaxCalc.AddParameter("result_name", Chr(34) & strRainMax & Chr(34), iPosition:=2) @@ -879,12 +881,12 @@ Public Class dlgTransformClimatic clsDummyFunction.AddParameter("checked", "rollapply", iPosition:=2) ' Group options - clsGroupByYear.SetRCommand("instat_calculation$new") + clsGroupByYear.SetRCommand("instatCalculations::instat_calculation$new") clsGroupByYear.AddParameter("type", Chr(34) & "by" & Chr(34), iPosition:=0) clsGroupByYear.AddParameter("result_name", Chr(34) & "group_by_year" & Chr(34), iPosition:=2) clsGroupByYear.SetAssignTo("group_by_year") - clsGroupByStation.SetRCommand("instat_calculation$new") + clsGroupByStation.SetRCommand("instatCalculations::instat_calculation$new") clsGroupByStation.AddParameter("type", Chr(34) & "by" & Chr(34), iPosition:=0) clsGroupByStation.SetAssignTo("group_by_station") @@ -894,7 +896,7 @@ Public Class dlgTransformClimatic clsTransformManipulationsFunc.SetRCommand("list") - clsRTransform.SetRCommand("instat_calculation$new") + clsRTransform.SetRCommand("instatCalculations::instat_calculation$new") clsRTransform.AddParameter("type", Chr(34) & "calculation" & Chr(34), iPosition:=0) clsRTransform.AddParameter("result_name", Chr(34) & "count" & Chr(34), iPosition:=2) clsRTransform.AddParameter("sub_calculations", clsRFunctionParameter:=clsRTransformCountSpellSub, iPosition:=4) @@ -1067,7 +1069,7 @@ Public Class dlgTransformClimatic clsRTransform.RemoveParameterByName("sub_calculations") clsTransformCheck = clsRTransform ElseIf rdoSpell.Checked Then - clsRTransform.AddParameter("function_exp", Chr(34) & ".spells(x = " & strRainDay & ")" & Chr(34), iPosition:=1) + clsRTransform.AddParameter("function_exp", Chr(34) & "instatClimatic::spells(x = " & strRainDay & ")" & Chr(34), iPosition:=1) clsRTransform.AddParameter("sub_calculations", clsRFunctionParameter:=clsRTransformCountSpellSub, iPosition:=4) clsRTransform.RemoveParameterByName("calculated_from") clsTransformCheck = clsRTransform diff --git a/instat/dlgViewGraph.vb b/instat/dlgViewGraph.vb index 151b125fda4..5ade756e472 100644 --- a/instat/dlgViewGraph.vb +++ b/instat/dlgViewGraph.vb @@ -84,6 +84,7 @@ Public Class dlgViewGraph clsPrintRFunction.SetRCommand("print") clsPrintRFunction.AddParameter("x", clsRFunctionParameter:=clsGetObjectRFunction, iPosition:=0) + clsViewObjectRFunction.SetPackageName("instatExtras") clsViewObjectRFunction.SetRCommand("view_object_data") clsViewObjectRFunction.AddParameter("object", clsRFunctionParameter:=clsGetObjectRFunction) clsViewObjectRFunction.AddParameter("object_format", strParameterValue:=Chr(34) & RObjectFormat.Image & Chr(34)) diff --git a/instat/dlgWindPollutionRose.vb b/instat/dlgWindPollutionRose.vb index 3b94c16bd42..a96c9a7c350 100644 --- a/instat/dlgWindPollutionRose.vb +++ b/instat/dlgWindPollutionRose.vb @@ -118,7 +118,8 @@ Public Class dlgWindPollutionRose ucrSelectorWindPollutionRose.Reset() ucrReceiverDate.SetMeAsReceiver() - + + clsPollutionRoseFunction.SetPackageName("instatClimatic") clsPollutionRoseFunction.SetRCommand("wind_pollution_rose") clsPollutionRoseFunction.AddParameter("paddle", "FALSE", iPosition:=7) clsPollutionRoseFunction.AddParameter("statistic", Chr(34) & "prop.count" & Chr(34), iPosition:=8) diff --git a/instat/frmMain.vb b/instat/frmMain.vb index 05aa150b26e..d3047c71e2c 100644 --- a/instat/frmMain.vb +++ b/instat/frmMain.vb @@ -446,6 +446,7 @@ Public Class frmMain 'To ensure this part of the code only runs when the application Is Not in the Debug mode (i.e., in Release mode) #If Not DEBUG Then Dim clsSetLibPathsFunction As New RFunction + clsSetLibPathsFunction.SetPackageName("instatExtras") clsSetLibPathsFunction.SetRCommand("set_library_paths") clsSetLibPathsFunction.AddParameter("library_path", Chr(34) & strLibraryPath.Replace("\", "/") & Chr(34)) @@ -2338,6 +2339,7 @@ Public Class frmMain clsLastObjectRFunction.AddParameter("as_file", strParameterValue:="FALSE", iPosition:=1) Dim clsViewObjectRFunction As New RFunction + clsViewObjectRFunction.SetPackageName("instatExtras") clsViewObjectRFunction.SetRCommand("view_object_data") clsViewObjectRFunction.AddParameter("object", clsRFunctionParameter:=clsLastObjectRFunction) clsViewObjectRFunction.AddParameter("object_format", strParameterValue:=Chr(34) & RObjectFormat.Image & Chr(34)) @@ -2359,6 +2361,7 @@ Public Class frmMain clsPlotlyRFunction.SetRCommand("ggplotly") clsPlotlyRFunction.AddParameter("p", clsRFunctionParameter:=clsLastObjectRFunction, iPosition:=0) + clsViewObjectRFunction.SetPackageName("instatExtras") clsViewObjectRFunction.SetRCommand("view_object_data") clsViewObjectRFunction.AddParameter("object", clsRFunctionParameter:=clsPlotlyRFunction) clsViewObjectRFunction.AddParameter("object_format", strParameterValue:=Chr(34) & RObjectFormat.Html & Chr(34)) diff --git a/instat/sdgCalculationsSummmary.vb b/instat/sdgCalculationsSummmary.vb index 5e0aea31c49..798501c271e 100644 --- a/instat/sdgCalculationsSummmary.vb +++ b/instat/sdgCalculationsSummmary.vb @@ -291,7 +291,7 @@ Public Class sdgCalculationsSummmary iSubCalcCount = iSubCalcCount + 1 strCalcName = "sub_cal" & iSubCalcCount - clsSubCalcFunction.SetRCommand("instat_calculation$new") + clsSubCalcFunction.SetRCommand("instatCalculations::instat_calculation$new") clsSubCalcFunction.AddParameter("name", Chr(34) & strCalcName & Chr(34)) clsSubCalcFunction.AddParameter("type", Chr(34) & "calculation" & Chr(34)) clsSubCalcFunction.AddParameter("save", "0") @@ -318,7 +318,7 @@ Public Class sdgCalculationsSummmary iManipCount = iManipCount + 1 strCalcName = "manip" & iManipCount - clsManipFunction.SetRCommand("instat_calculation$new") + clsManipFunction.SetRCommand("instatCalculations::instat_calculation$new") clsManipFunction.AddParameter("name", Chr(34) & strCalcName & Chr(34)) clsManipFunction.AddParameter("type", Chr(34) & "by" & Chr(34)) clsManipFunction.AddParameter("save", "0") diff --git a/instat/sdgDoyRange.vb b/instat/sdgDoyRange.vb index cab5c87e4e7..56b01d78ae7 100644 --- a/instat/sdgDoyRange.vb +++ b/instat/sdgDoyRange.vb @@ -62,6 +62,7 @@ Public Class sdgDoyRange ucrSelectorDoy.SetPrimaryDataFrameOptions(strMainDataFrame, True, True) + clsFindDfFunc.SetPackageName("databook") clsFindDfFunc.SetRCommand("find_df_from_calc_from") clsFindDfFunc.AddParameter("x", clsRFunctionParameter:=clsCalcFromList, iPosition:=0) diff --git a/instat/sdgOpenNetCDF.vb b/instat/sdgOpenNetCDF.vb index 374a910d8c0..7ad258659d1 100644 --- a/instat/sdgOpenNetCDF.vb +++ b/instat/sdgOpenNetCDF.vb @@ -216,6 +216,7 @@ Public Class sdgOpenNetCDF lstDimAxes = Nothing End If If lstDimAxes IsNot Nothing Then + clsGetBoundsFunction.SetPackageName("instatExtras") clsGetBoundsFunction.SetRCommand("nc_get_dim_min_max") clsGetBoundsFunction.AddParameter("nc", clsRFunctionParameter:=clsNcOpenFunction) For i As Integer = 0 To lstDims.Count - 1 diff --git a/instat/static/.old/data_object.R b/instat/static/.old/data_object.R index 25ba588dc58..42552ecdb7d 100644 --- a/instat/static/.old/data_object.R +++ b/instat/static/.old/data_object.R @@ -1,695 +1 @@ -data_obj <- setRefClass("data_obj", - fields = list(data = "data.frame", metadata = "list", - variables_metadata = "data.frame", changes = "list", - data_changed = "logical", variables_metadata_changed = "logical", - metadata_changed = "logical")) - - - -# INITIALIZE method -############################################################################################## -# Functions of reference classes are called methods. -# This is how you define a method for a specific reference class. -# Every reference class has an initialize method which tells R how to create new -# data_obj objects. -# By the end of this method, all fields of data_obj should be defined. - -data_obj$methods(initialize = function(data = data.frame(), data_name = "", - variables_metadata = data.frame(), metadata = list(), - imported_from = "", - messages = TRUE, convert=TRUE, create = TRUE, - start_point=1) -{ - - # Set up the data object - - .self$set_changes(list()) - .self$set_data(data, messages) - .self$set_variables_metadata(variables_metadata) - .self$update_variables_metadata() - .self$set_meta(metadata) - - # If no name for the data.frame has been given in the list we create a default one. - # Decide how to choose default name index - if (!.self$is_metadata(data_name_label)) { - if ( ( is.null(data_name) || data_name == "" || missing(data_name))) { - metadata[[data_name_label]] <<- paste0("data_set_",sprintf("%03d", start_point)) - if (messages) { - message(paste0("No name specified in data_tables list for data frame ", start_point, ". - Data frame will have default name: ", "data_set_",sprintf("%03d", start_point))) - } - } - else metadata[[data_name_label]] <<- data_name - } - - .self$add_defaults_meta() -} -) - -data_obj$methods(set_data = function(new_data, messages=TRUE) { - if( ! is.data.frame(new_data) ) { - stop("Data set must be of type: data.frame") - } - else { - if ( length(new_data) == 0 && messages) { - message("data of object:is empty. data will be an empty data frame.") - } - data <<- new_data - .self$append_to_changes(list(Set_property, "data")) - .self$set_data_changed(TRUE) - .self$set_variables_metadata_changed(TRUE) - # is_data_split<<-FALSE - } -} -) - -data_obj$methods(set_meta = function(new_meta) { - if( ! is.list(new_meta) ) { - stop("new_meta must be of type: list") - } - metadata <<- new_meta - .self$set_metadata_changed(TRUE) - .self$append_to_changes(list(Set_property, "meta data")) -} -) - -data_obj$methods(set_variables_metadata = function(new_meta) { - if( ! is.data.frame(new_meta) ) { - stop("variable metadata must be of type: data.frame") - } - variables_metadata <<- new_meta - .self$append_to_changes(list(Set_property, "variable metadata")) -} -) - -data_obj$methods(set_changes = function(new_changes) { - if( ! is.list(new_changes) ) { - stop("Changes must be of type: list") - } - - else { - changes <<- new_changes - .self$append_to_changes(list(Set_property, "changes")) } -} -) - -data_obj$methods(set_data_changed = function(new_val) { - if( new_val != TRUE && new_val != FALSE ) { - stop("new_val must be TRUE or FALSE") - } - data_changed <<- new_val - .self$append_to_changes(list(Set_property, "data_changed")) -} -) - -data_obj$methods(set_variables_metadata_changed = function(new_val) { - if( new_val != TRUE && new_val != FALSE ) { - stop("new_val must be TRUE or FALSE") - } - variables_metadata_changed <<- new_val - .self$append_to_changes(list(Set_property, "variable_data_changed")) -} -) - -data_obj$methods(set_metadata_changed = function(new_val) { - if( new_val != TRUE && new_val != FALSE ) { - stop("new_val must be TRUE or FALSE") - } - metadata_changed <<- new_val - .self$append_to_changes(list(Set_property, "metadata_changed")) -} -) - -data_obj$methods(update_variables_metadata = function() { - - if(length(colnames(data)) != length(rownames(variables_metadata)) || !all(colnames(data)==rownames(variables_metadata))) { - if(all(colnames(data) %in% rownames(variables_metadata))) { - variables_metadata <<- variables_metadata[colnames(data),] - } - else { - for(col in colnames(data)[!colnames(data) %in% rownames(variables_metadata)]) { - variables_metadata[col, name_label] <<- col - variables_metadata[col, display_decimal_label] <<- get_default_decimal_places(data[[col]]) - } - } - } - .self$append_to_changes(list(Set_property, "variables_metadata")) -} -) - -data_obj$methods(get_data_frame = function(convert_to_character = FALSE) { - if(convert_to_character) { - decimal_places = .self$variables_metadata[[display_decimal_label]] - return(convert_to_character_matrix(data, TRUE, decimal_places)) - } - else return(data) -} -) - -data_obj$methods(get_variables_metadata = function(include_all = TRUE, data_type = "all", convert_to_character = FALSE) { - .self$update_variables_metadata() - if(!include_all) out = variables_metadata - else { - out = variables_metadata - out[[data_type_label]] = sapply(data, class) - if(data_type != "all") { - if(data_type == "numeric") { - out = out[out[[data_type_label]] %in% c("numeric", "integer") , ] - } - else { - out = out[out[[data_type_label]]==data_type, ] - } - } - if(convert_to_character) return(convert_to_character_matrix(out, FALSE)) - else return(out) - } -} -) - -data_obj$methods(get_metadata = function(label) { - if(missing(label)) return(metadata) - else { - if(label %in% names(metadata)) return(metadata[[label]]) - else return("") - } -} -) - - -data_obj$methods(add_columns_to_data = function(col_name = "", col_data, use_col_name_as_prefix) { - - # Column name must be character - if( ! is.character(col_name) ) stop("Column name must be of type: character") - if(is.matrix(col_data) || is.data.frame(col_data)) { - num_cols = ncol(col_data) - if( (length(col_name) != 1) && (length(col_name) != num_cols) ) stop("col_name must be a character or character vector with the same length as the number of new columns") - } - else { - use_col_name_as_prefix = FALSE - num_cols = 1 - col_data = data.frame(col_data) - } - - if(missing(use_col_name_as_prefix)) { - if(num_cols > 1 && length(col_name) == num_cols) use_col_name_as_prefix = FALSE - else use_col_name_as_prefix = TRUE - } - - for(i in 1:num_cols) { - curr_col = unlist(col_data[,i]) - if(use_col_name_as_prefix) curr_col_name = .self$get_next_default_column_name(col_name) - else curr_col_name = col_name[[i]] - - if(curr_col_name %in% names(data)) { - message(paste("A column named", curr_col_name, "already exists. The column will be replaced in the data")) - .self$append_to_changes(list(Replaced_col, curr_col_name)) - } - - else .self$append_to_changes(list(Added_col, curr_col_name)) - - data[[curr_col_name]] <<- curr_col - .self$set_data_changed(TRUE) - .self$set_variables_metadata_changed(TRUE) - } -} -) - -data_obj$methods(get_columns_from_data = function(col_names, force_as_data_frame = FALSE) { - if(missing(col_names)) stop("no col_names to return") - if(!all(sapply(col_names, function(x) x %in% names(data)))) stop("Not all column names were found in data") - - if(length(col_names)==1) { - if(force_as_data_frame) return(data[col_names]) - else (data[[col_names]]) - } - else return(data[col_names]) -} -) - -data_obj$methods(rename_column_in_data = function(curr_col_name = "", new_col_name="") { - - # Column name must be character - if (new_col_name %in% names(data)){ - stop(paste0(new_col_name," exist in the data.")) - } - if( ! is.character(curr_col_name) ) { - stop("Current column name must be of type: character") - } - - else if (!(curr_col_name %in% names(data))) { - stop(paste0("Cannot rename column: ",curr_col_name,". Column was not found in the data.")) - } - - else if (! is.character(new_col_name)) { - stop("New column name must be of type: character") - } - - else { - if(sum(names(data) == curr_col_name) > 1) { - warning(paste0("Multiple columns have name: '", curr_col_name,"'. All such columns will be - renamed.")) - } - names(data)[names(data) == curr_col_name] <<- new_col_name - rownames(variables_metadata)[rownames(variables_metadata) == curr_col_name] <<- new_col_name - variables_metadata[rownames(variables_metadata) == new_col_name, 1] <<- new_col_name - .self$append_to_changes(list(Renamed_col, curr_col_name, new_col_name)) - .self$set_data_changed(TRUE) - .self$set_variables_metadata_changed(TRUE) - } -} -) - -data_obj$methods(remove_columns_in_data_from_start_position = function(start_pos, col_numbers = 1 ) { - if (start_pos <= 0) stop("You cannot remove a column into the position less or equal to zero.") - if (start_pos %% 1 != 0) stop("start_pos value should be an integer.") - if (length(names(data)) < start_pos) stop("The start_pos argument exceeds the number of columns in the data.") - col_names = names(data)[start_pos:(start_pos+col_numbers-1)] - .self$remove_columns_in_data(col_names) -} -) - -data_obj$methods(remove_columns_in_data = function(cols=c()) { - for(col_name in cols){ - # Column name must be character - if( ! is.character(col_name) ) { - stop("Column name must be of type: character") - } - - else if (!(col_name %in% names(data))) { - stop(paste0("Column :'", col_name, " was not found in the data.")) - } - - else { - data[[ col_name ]] <<- NULL - # .self$append_to_changes(list(Removed_col, col_name)) - } - } - .self$append_to_changes(list(Removed_col, cols)) - .self$set_data_changed(TRUE) - .self$set_variables_metadata_changed(TRUE) -} - -) - -data_obj$methods(replace_value_in_data = function(col_name = "", index, new_value = "") { - - # Column name must be character - if( ! is.character(col_name) ) { - stop("Column name must be of type: character") - } - - else if (!(col_name %in% names(data))) { - stop(paste("Cannot find column:",col_name,"in the data.")) - } - - # Column data length must match number of rows of data. - else if ( missing(index) || !(is.numeric(index)) ) { - stop(paste("Specify the index of the value to be replaced as an integer.")) - } - - else if ( index != as.integer(index) || index < 1 || index > nrow(data) ) { - stop( paste("index must be an integer between 1 and", nrow(data), ".") ) - } - - if ( class(data[[col_name]][[index]]) != class(new_value)) { - warning("Class of new value does not match the class of the replaced value.") - } - - old_value = data[[col_name]][[index]] - data[[col_name]][[index]] <<- new_value - .self$append_to_changes(list(Replaced_value, col_name, index, old_value, new_value)) - .self$set_data_changed(TRUE) - .self$set_variables_metadata_changed(TRUE) -} -) - - -data_obj$methods(append_to_metadata = function(name, value) { - - if( missing(name) || missing(value) ) { - stop("name and value arguements must be specified.") - } - - - else if ( ! is.character(name) ) { - stop("name must be of type: character") - } - - # Remember double brackets must be used when dealing with variable names. - else { - metadata[[name]] <<- value - .self$append_to_changes(list(Added_metadata, name)) - .self$set_metadata_changed(TRUE) - } -} -) - -data_obj$methods(append_to_variables_metadata = function(col_name, property, new_val) { - - if( missing(col_name) || missing(property) || missing(new_val) ) { - stop("col_name, property and new_val arguements must be specified.") - } - if(!col_name %in% names(data)) stop("col_name not found in data") - - row = which(variables_metadata[,1]==col_name) - col = which(colnames(variables_metadata)==property) - variables_metadata[row, col] <<- new_val - - .self$append_to_changes(list(Added_variables_metadata, col_name, property)) - .self$set_variables_metadata_changed(TRUE) - .self$set_data_changed(TRUE) -} -) - -data_obj$methods(append_to_changes = function(value) { - - if( missing(value) ) { - stop(" value arguements must be specified.") - } - else { - changes[[length(changes)+1]] <<- value - } -} -) - -data_obj$methods(is_metadata = function(str) { - out = FALSE - - if(str %in% names(metadata) ) { - out = TRUE - } - return(out) -} -) - -data_obj$methods(add_defaults_meta = function(user) { - append_to_metadata(is_calculated_label,FALSE) - } -) - -data_obj$methods(remove_rows_in_data = function(start_pos, num_rows = 1) { - if ( start_pos != as.integer(start_pos) || start_pos < 1 || start_pos > nrow(data) ) { - stop( paste("index must be an integer between 1 and", nrow(data), ".") ) - } - else if (start_pos > nrow(data)) { - stop(paste0(" Row: '", start_pos, " does not exist in the data.")) - } - else { - end_pos <- start_pos + num_rows - 1 - data <<- data[-(start_pos:end_pos),] - .self$append_to_changes(list(Removed_row, start_pos)) - } - .self$set_data_changed(TRUE) -} -) - -data_obj$methods(get_next_default_column_name = function(prefix) { - next_default_item(prefix = prefix, existing_names = names(data)) -} -) - -data_obj$methods(insert_column_in_data = function(col_data =c(), start_pos = (length(names(data))+1), number_cols = 1) { - if (start_pos <= 0) stop("You cannot put a column into the position less or equal to zero.") - if (start_pos %% 1 != 0) stop("start_pos value should be an integer.") - if ((length(names(data))+1) < start_pos) stop("The start_pos argument exceeds the number of columns in the data plus 0ne.") - - if(length(col_data)==0){ - col_data <- rep(NA, nrow(data)) - warning(paste("You are inserting empty column(s) to", get_metadata(data_name_label))) - } - for(j in 1:number_cols){ - col_name <- .self$get_next_default_column_name("X") - assign(col_name, col_data) - data[, col_name] <<- col_data - } - if(start_pos==1){ - data <<- cbind(data[(ncol(data)-number_cols+1): ncol(data)], data[(start_pos):(ncol(data)-number_cols)]) - } - else if(start_pos==(length(names(data))+1 - number_cols)){ - data <<- data - } - else{ - data <<- cbind(data[1:(start_pos -1)], data[(ncol(data)-number_cols+1): ncol(data)], data[start_pos:(ncol(data)-number_cols)]) - - } - - .self$append_to_changes(list(Inserted_col, start_pos)) - .self$set_data_changed(TRUE) - .self$set_variables_metadata_changed(TRUE) -} -) - -# data_obj$methods(move_columns_in_data = function(col_names = "", col_number) { -# if (col_number <= 0) stop("You cannot move a column into the position less or equal to zero.") -# if (col_number %% 1 != 0) stop("col_number value should be an integer.") -# if (length(names(data)) < col_number) stop("The col_number argument exceeds the number of columns in the data.") -# -# for(col_name in col_names){ -# if(!(col_name %in% names(data))){ -# stop(col_name, " is not a column in ", get_metadata(data_name_label)) -# } -# } -# -# old_names = names(data) -# dat1 <- data[(col_names)] -# names(dat1) <- col_names -# -# for(name in col_names){ -# names(data)[names(data) == name] <<- .self$get_next_default_column_name(prefix = "to_delete") -# } -# -# if(col_number==1){ -# data <<- cbind(dat1, data) -# } -# else if(col_number == ncol(data)){ -# data <<- cbind(data,dat1) -# } -# else{ -# data <<- cbind(data[1:(col_number)], dat1, data[(col_number+1):ncol(data)]) -# } -# new_names = names(data) -# -# for(name in new_names){ -# if(!(name %in% old_names)){ -# data[,name]<<- NULL -# } -# } -# .self$append_to_changes(list(Move_col, col_names)) -# } -# ) - - -data_obj$methods(order_columns_in_data = function(col_order) { - if (length(names(data)) != length(col_order)) stop("Columns to order should be same as columns in the data.") - - if(is.numeric(col_order)) { - if(! (identical(sort(col_order), sort(as.numeric(1:ncol(data)))))) { - stop("Invalid column order") - } - }else if(is.character(col_order)) { - if(! (identical(sort(col_order), sort(as.character(names(data)))))){ - stop("Invalid column order") - } - }else{ - stop("column order must be a numeric or character vector") - } - set_data(data[ ,col_order]) - .self$append_to_changes(list(Col_order, col_order)) -} -) - -data_obj$methods(insert_row_in_data = function(start_pos = (nrow(data)+1), row_data = c(), number_rows = 1) { - - if (start_pos != as.integer(start_pos) || start_pos < 1 || start_pos > nrow(data) + 1 ) { - stop( paste("index must be an integer between 1 and", nrow(data)+1, ".") ) - } - if (length(row_data) == 0){ - row_data <- rep(NA,ncol(data)) - warning("You are inserting an empty row to data") - } - if(length(row_data)>0 && length(row_data)!=ncol(data)){ - stop("The dimension of Row data is different from that of the data") - } - for(j in 1:number_rows){ - if(start_pos==1){ - data <<- rbind(row_data, data) - } - - else if (start_pos == (nrow(data)+1)){ - data <<- rbind(data,row_data) - } - else { - data <<- rbind(data[1:(start_pos-1),],row_data,data[(start_pos):nrow(data),]) - - } - } - .self$append_to_changes(list(Inserted_row, start_pos)) - .self$set_data_changed(TRUE) -} -) - -data_obj$methods(length_of_data = function() { - return(nrow(data)) -} -) - -data_obj$methods(get_column_factor_levels = function(col_name = "") { - if(!(col_name %in% names(data))){ - stop(col_name, " is not a column in", get_metadata(data_name_label)) - } - if(!(is.factor(data[,c(col_name)]))){ - stop(col_name, " is not a factor column") - } - - counts <- as.data.frame(table(data[,c(col_name)])) - counts <- rename(counts, replace = c("Var1" = "Levels", "Freq" = "Counts")) - return(counts) -} -) - -data_obj$methods(sort_dataframe = function(col_names = c(), decreasing = TRUE, na.last = TRUE) { - string = list() - for(col_name in col_names){ - if(!(col_name %in% names(data))){ - stop(col_name, " is not a column in ", get_metadata(data_name_label)) - } - } - if(length(col_names)==1){ - data <<- data[with(data, order(eval(parse(text = col_names)), decreasing = decreasing, na.last = na.last)), ] - }else{ - data <<-data[ do.call(order, c(as.list(data[,col_names]), decreasing = decreasing, na.last = na.last)), ] - } - .self$set_data_changed(TRUE) -} -) - -data_obj$methods(convert_column_to_type = function(col_names = c(), to_type = "factor", factor_numeric = "by_levels") { - for(col_name in col_names){ - if(!(col_name %in% names(data))){ - stop(col_name, " is not a column in ", get_metadata(data_name_label)) - } - } - - if(length(to_type)>1){ - warning("Column(s) will be converted to type ", to_type[1]) - to_type = to_type[1] - } - - - if(!(to_type %in% c("integer", "factor", "numeric", "character"))){ - stop(to_type, " is not a valid type to convert to") - } - - if(!(factor_numeric %in% c("by_levels", "by_ordinals"))){ - stop(factor_numeric, " can either be by_levels or by_ordinals.") - } - - for(col_name in col_names){ - if(to_type=="factor"){ - data[,col_name] <<- make_factor(x = data[,col_name]) - } - - if(to_type=="integer"){ - data[,col_name] <<- as.integer(data[,col_name]) - } - - if(to_type=="numeric"){ - if(is.factor(data[,col_name]) & (factor_numeric == "by_levels")){ - data[,col_name] <<- as.numeric(levels(data[,col_name]))[data[,col_name]] - }else{ - data[,col_name] <<- as.numeric(data[,col_name]) - } - } - - if(to_type=="character"){ - data[,col_name] <<- as.character(data[,col_name]) - } - } - .self$set_data_changed(TRUE) - .self$set_variables_metadata_changed(TRUE) -} -) - -data_obj$methods(copy_columns = function(col_names = "") { - for(col_name in col_names){ - if(!(col_name %in% names(data))){ - stop(col_name, " is not a column in ", get_metadata(data_name_label)) - } - } - dat1 <- data[(col_names)] - - for(name in col_names){ - names(dat1)[names(dat1) == name] <- .self$get_next_default_column_name(prefix = paste(name, "copy", sep = "_" ) ) - } - - set_data(cbind(data, dat1)) - .self$append_to_changes(list(Copy_cols, col_names)) -} -) - -data_obj$methods(drop_unused_factor_levels = function(col_name) { - if(!col_name %in% names(data)) stop(paste(col_name,"not found in data.")) - if(!is.factor(data[[col_name]])) stop(paste(col_name,"is not a factor.")) - - .self$add_columns_to_data(col_name, droplevels(data[[col_name]])) -} -) - -data_obj$methods(set_factor_levels = function(col_name, new_levels) { - if(!col_name %in% names(data)) stop(paste(col_name,"not found in data.")) - if(!is.factor(data[[col_name]])) stop(paste(col_name,"is not a factor.")) - if(!length(new_levels)==length(levels(data[[col_name]]))) stop("Incorrect number of new levels given.") - - levels(data[[col_name]]) <<- new_levels - .self$set_data_changed(TRUE) - .self$set_variables_metadata_changed(TRUE) -} -) - -data_obj$methods(set_factor_reference_level = function(col_name, new_ref_level) { - if(!col_name %in% names(data)) stop(paste(col_name,"not found in data.")) - if(!is.factor(data[[col_name]])) stop(paste(col_name,"is not a factor.")) - if(!new_ref_level %in% levels(data[[col_name]])) stop(paste(new_ref_level, "is not a level of the factor")) - - .self$add_columns_to_data(col_name, relevel(data[[col_name]], new_ref_level)) -} -) - -data_obj$methods(reorder_factor_levels = function(col_name, new_level_names) { - if(!col_name %in% names(data)) stop(paste(col_name,"not found in data.")) - if(!is.factor(data[[col_name]])) stop(paste(col_name,"is not a factor.")) - if(length(new_level_names)!=length(levels(data[[col_name]]))) stop("Incorrect number of new level names given.") - if(!all(new_level_names %in% levels(data[[col_name]]))) stop(paste("new_level_names must be a reordering of the current levels:",paste(levels(data[[col_name]]), collapse = " "))) - .self$add_columns_to_data(col_name = col_name, col_data = factor(data[[col_name]], levels = new_level_names)) - .self$set_variables_metadata_changed(TRUE) -} -) - -data_obj$methods(get_data_type = function(col_name = "") { - if(!(col_name %in% names(data))){ - stop(col_name, " is not a column in ", get_metadata(data_name_label)) - } - type = "" - if(is.numeric(data[[col_name]])){ - type = "numeric" - } - - if(is.integer(data[[col_name]])){ - if(all(data[[col_name]]>0)){ - type = "positive integer" - }else{ - type = "integer" - } - } - - if(is.factor(data[[col_name]]) & (length(levels(data[[col_name]]))==2)){ - type = "two level factor" - } - - if(is.factor(data[[col_name]]) & (length(levels(data[[col_name]]))>2)){ - type = "multilevel factor" - } - return(type) - #TODO: This needs to be completed for all possible types e.g. "character", "Date" -} -) \ No newline at end of file +# remove \ No newline at end of file diff --git a/instat/static/.old/instat_object.R b/instat/static/.old/instat_object.R index fa6763da34f..50aa506deb1 100644 --- a/instat/static/.old/instat_object.R +++ b/instat/static/.old/instat_object.R @@ -1,646 +1 @@ -# Defining the reference class "instat_obj" -# This reference class can contain multiple data_objs - -instat_obj <- setRefClass("instat_obj", - fields = list(data_objects = "list", - metadata = "list", models = "list", - data_objects_changed = "logical") -) - -# INITIALIZE method -############################################################################################## -# Functions of reference classes are called methods. -# This is how you define a method for a specific reference class. -# Every reference class has an initialize method which tells R how to create new -# instat objects. - -instat_obj$methods(initialize = function(data_tables = list(), instat_obj_metadata = list(), - data_tables_variables_metadata = rep(list(data.frame()),length(data_tables)), - data_tables_metadata = rep(list(list()),length(data_tables)), - imported_from = as.list(rep("",length(data_tables))), - messages=TRUE, convert=TRUE, create=TRUE) -{ - - .self$set_meta(instat_obj_metadata) - .self$set_models(list()) - - if (missing(data_tables) || length(data_tables) == 0) { - data_objects <<- list() - } - - else { - .self$import_data(data_tables=data_tables, data_tables_variables_metadata=data_tables_variables_metadata, - data_tables_metadata=data_tables_metadata, - imported_from=imported_from, messages=messages, convert=convert, create=create) - } - - .self$data_objects_changed <<- FALSE - -} -) - -# IMPORT DATA FUNCTION -############################################################################################## - -instat_obj$methods(import_data = function(data_tables = list(), data_tables_variables_metadata = rep(list(data.frame()),length(data_tables)), - data_tables_metadata = rep(list(list()),length(data_tables)), - imported_from = as.list(rep("",length(data_tables))), - messages=TRUE, convert=TRUE, create=TRUE) -{ - - if (missing(data_tables) || length(data_tables) == 0) { - stop("No data found. No data objects can be created.") - } - - else { - - if ( ! (class(data_tables) == "list") ) { - stop("data_tables must be a list of data frames") - } - - if (length(unique(names(data_tables))) != length(names(data_tables)) ) { - stop("There are duplicate names in the data tables list.") - } - - if ( !(length(data_tables_variables_metadata) == length(data_tables)) ) { - stop("If data_tables_variables_metadata is specified, it must be a list of metadata lists with the same - length as data_tables.") - } - - if ( !(length(data_tables_metadata) == length(data_tables)) ) { - stop("If data_tables_metadata is specified, it must be a list of metadata lists with the same - length as data_tables.") - } - - if ( length(imported_from) != length(data_tables) ) { - stop("imported_from must be a list of the same length as data_tables") - } - - # loop through the data_tables list and create a data object for each - # data.frame given - - new_data_objects = list() - - for ( i in (1:length(data_tables)) ) { - new_data = data_obj$new(data=data_tables[[i]], data_name = names(data_tables)[[i]], - variables_metadata = data_tables_variables_metadata[[i]], - metadata = data_tables_metadata[[i]], - imported_from = imported_from[[i]], - start_point = i, - messages = messages, convert = convert, create = create) - - # Add this new data object to our list of data objects - .self$append_data_objects(new_data$metadata[[data_name_label]],new_data) - } - } - } -) - -#' Title -#' -#' @param data_RDS -#' @param keep_existing -#' @param overwrite_existing -#' @param include_models -#' @param include_graphics -#' @param include_metadata -#' @param include_logs -#' @param messages -#' -#' @return -#' @export -#' -#' @examples -instat_obj$methods(import_RDS = function(data_RDS, keep_existing =TRUE, overwrite_existing=FALSE, include_models=TRUE, - include_graphics=TRUE, include_metadata=TRUE, include_logs=TRUE,messages=TRUE) -{ - if(class(data_RDS) == "instat_obj"){ - if (!keep_existing & include_models & include_graphics & include_metadata & include_logs){ - .self$replace_instat_object(new_instatObj = data_RDS) - } else { - if (!keep_existing) { - .self$clear_data() - .self$set_meta(list()) - .self$set_models(list()) - } - for ( i in (1:length(data_RDS$data_objects)) ) { - if (!(data_RDS$data_objects[[i]]$metadata[[data_name_label]] %in% names(data_objects)) | overwrite_existing){ - #TODO in data_object if (!include_models) data_RDS$data_objects[i]$clear_models - #TODO in data_object if (!include_graphics) data_RDS$data_objects[i]$clear_graphics - curr_data_name = data_RDS$data_objects[[i]]$metadata[[data_name_label]] - if (!include_metadata) { - data_RDS$data_objects[[i]]$set_meta(list()) - data_RDS$data_objects[[i]]$add_defaults_meta() - data_RDS$data_objects[[i]]$set_variables_metadata(data.frame()) - data_RDS$data_objects[[i]]$update_variables_metadata() - - } - if (!include_logs) data_RDS$data_objects[i]$set_changes(list()) - # Add this new data object to our list of data objects - .self$append_data_objects(curr_data_name,data_RDS$data_objects[[i]]) - } - } - if (include_models & length(data_RDS$models) > 0){ - for ( i in (1:length(data_RDS$models)) ) { - if (!(names(data_RDS$models)[i] %in% names(models)) | overwrite_existing){ - .self$add_model(data_RDS$models[i],names(data_RDS$models)[i]) - } - } - } - if (include_metadata & length(data_RDS$metadata) > 0){ - for ( i in (1:length(data_RDS$metadata)) ) { - if (!(names(data_RDS$metadata)[i] %in% names(metadata)) | overwrite_existing){ - .self$metadata[names(data_RDS$models)[i]]<<-data_RDS$metadata[i] #todo this should be in an addmetadata method - } - } - } - } - data_objects_changed <<- TRUE - } - else if (is.data.frame(data_RDS)) { - .self$import_data(data_tables = list(data_RDS = data_RDS)) - } - else{ - if (messages){ - stop(paste("RDS_data: ", data_RDS, " Unidentified Object"))#TODO work on messages and error handling - } - } -} -) - -instat_obj$methods(replace_instat_object = function(new_instatObj) { - data_objects<<-new_instatObj$data_objects - .self$set_meta(new_instatObj$metadata) - .self$set_models(new_instatObj$models) - data_objects_changed <<- TRUE - lapply(data_objects, function(x) x$set_data_changed(TRUE)) - } - ) - - -instat_obj$methods(set_meta = function(new_meta) { - if( ! is.list(new_meta) ) { - stop("new_meta must be of type: list") - } - metadata <<- new_meta -} -) - -instat_obj$methods(set_models = function(new_models) { - if( ! is.list(new_models) ) { - stop("new_models must be of type: list") - } - models <<- new_models -} -) - -instat_obj$methods(append_data_objects = function(name, obj) { - if( !class(name) == "character") { - stop("name must be a character") - } - - if ( !class(obj) == "data_obj") { - stop("obj must be a data object") - } - - data_objects[[name]] <<- obj - data_objects[[name]]$data_changed <<- TRUE -} -) - -instat_obj$methods(clear_data = function() { - - data_objects <<- list() -} -) - -instat_obj$methods(get_data_frame = function(data_name, convert_to_character = FALSE, - stack_data = FALSE,...) { - if(!stack_data) { - if(missing(data_name)) { - retlist <- list() - for ( i in (1:length(data_objects)) ) { - retlist[[names(data_objects)[[i]]]] = data_objects[[i]]$get_data_frame(convert_to_character = convert_to_character) - } - return(retlist) - } - else return(data_objects[[data_name]]$get_data_frame(convert_to_character = convert_to_character)) - } - else { - if(missing(data_name)) stop("data to be stacked is missing") - if(!data_name %in% names(data_objects)) stop(paste(data_name, "not found.")) - return(melt(data_objects[[data_name]]$get_data_frame(), ...)) - } -} -) - -instat_obj$methods(get_variables_metadata = function(data_name, data_type = "all", convert_to_character = FALSE) { - if(missing(data_name)) { - retlist <- list() - for ( i in (1:length(data_objects)) ) { - retlist[[names(data_objects)[[i]]]] = data_objects[[i]]$get_variables_metadata(data_type = data_type, convert_to_character = convert_to_character) - } - return(retlist) - } - else return(data_objects[[data_name]]$get_variables_metadata(data_type = data_type, convert_to_character = convert_to_character)) -} -) - -instat_obj$methods(get_combined_metadata = function(convert_to_character = FALSE) { - retlist <- data.frame() - for ( i in (1:length(data_objects)) ) { - templist=data_objects[[i]]$get_metadata() - for ( j in (1:length(templist)) ) { - retlist[names(data_objects)[[i]],names(templist[j])] =templist[[j]] - } - } - if(convert_to_character) return(convert_to_character_matrix(retlist, FALSE)) - else return(retlist) -} -) - -instat_obj$methods(get_data_names = function() { - return(names(.self$data_objects)) -} -) - -instat_obj$methods(get_data_changed = function(data_name) { - if(missing(data_name)) { - if(data_objects_changed) return (TRUE) - for(curr_obj in data_objects) { - if(curr_obj$data_changed) return(TRUE) - } - return(FALSE) - } - else { - return(data_objects[[data_name]]$data_changed) - } -} -) - -instat_obj$methods(get_variables_metadata_changed = function(data_obj) { - if(missing(data_obj)) { - if(data_objects_changed) return (TRUE) - for(curr_obj in data_objects) { - if(curr_obj$variables_metadata_changed) return(TRUE) - } - return(FALSE) - } - else { - return(data_objects[[data_obj]]$variables_metadata_changed) - } -} -) - -instat_obj$methods(get_metadata_changed = function(data_obj) { - if(missing(data_obj)) { - if(data_objects_changed) return (TRUE) - for(curr_obj in data_objects) { - if(curr_obj$metadata_changed) return(TRUE) - } - return(FALSE) - } - else { - return(data_objects[[data_obj]]$metadata_changed) - } -} -) - -instat_obj$methods(set_data_frames_changed = function(data_name = "", new_val) { - if(data_name == "") { - for(curr_obj in data_objects) { - curr_obj$set_data_changed(new_val) - } - } - - else data_objects[[data_name]]$set_data_changed(new_val) - -} -) - -instat_obj$methods(set_variables_metadata_changed = function(data_name = "", new_val) { - if(data_name == "") { - for(curr_obj in data_objects) { - curr_obj$set_variables_metadata_changed(new_val) - } - } - - else data_objects[[data_name]]$set_variables_metadata_changed(new_val) -} -) - -instat_obj$methods(set_metadata_changed = function(data_name = "", new_val) { - if(data_name == "") { - for(curr_obj in data_objects) { - curr_obj$set_metadata_changed(new_val) - } - } - - else data_objects[[data_name]]$set_metadata_changed(new_val) -} -) - -instat_obj$methods(add_columns_to_data = function(data_name, col_name, col_data, use_col_name_as_prefix) { - if(missing(data_name)) stop("data_name is required") - if(!data_name %in% names(data_objects)) stop(paste(data_name, "not found")) - - if(missing(use_col_name_as_prefix)) data_objects[[data_name]]$add_columns_to_data(col_name, col_data) - else data_objects[[data_name]]$add_columns_to_data(col_name, col_data, use_col_name_as_prefix = use_col_name_as_prefix) -} -) - -instat_obj$methods(get_columns_from_data = function(data_name, col_names, from_stacked_data = FALSE, - force_as_data_frame = FALSE) { - if(missing(data_name)) stop("data_name is required") - if(!from_stacked_data) { - if(!data_name %in% names(data_objects)) stop(paste(data_name, "not found")) - data_objects[[data_name]]$get_columns_from_data(col_names, force_as_data_frame) - } - else { - if(!exists(data_name)) stop(paste(data_name, "not found.")) - if(!all(sapply(col_names, function(x) x %in% names(data_name)))) stop("Not all column names were found in data") - if(length(col_names)==1) return (data_name[[col_names]]) - else return(data_name[col_names]) - } -} -) - -instat_obj$methods(add_model = function(model, model_name = paste("model",length(models)+1)) { - if(missing(model)) stop("model is required") - if(model_name %in% names(models)) message(paste("A model called", model_name, "already exists. It will be replaced.")) - - models[[model_name]] <<- model -} -) - -instat_obj$methods(get_model = function(model_name) { - if(missing(model_name)) return(models) - if(!is.character(model_name)) stop("name must be a character") - if(!model_name %in% names(models)) stop(model_name, "not found in models") - models[[model_name]] -} -) - -instat_obj$methods(get_model_names = function() { - return(names(models)) -} -) - -instat_obj$methods(get_from_model = function(model_name, value1, value2, value3) { - if(missing(model_name)) stop("model_name must be specified.") - if(!is.character(model_name)) stop("name must be a character") - if(!model_name %in% names(models)) stop(model_name, "not found in models") - if(missing(value1)) stop("value1 must be specified.") - if(!value1 %in% names(get_model(model_name))) stop(paste(value1, "not found in", model_name)) - if(missing(value2)) { - if(!missing(value3)) warning(paste("value2 is missing so value3 =",value3, "will be ignored.")) - return(get_model(model_name)[[value1]]) - } - else { - if(!value2 %in% names(get_model(model_name)[[value1]])) stop(paste0(value2, " not found in ", model_name,"[[\"",value1,"\"]]")) - if(missing(value3)) return(get_model(model_name)[[value1]][[value2]]) - else { - if(!value3 %in% names(get_model(model_name)[[value1]][[value2]])) stop(paste0(value3, " not found in ", model_name,"[[\"",value1,"\"]]","[[\"",value2,"\"]]")) - return(get_model(model_name)[[value1]][[value2]][[value3]]) - } - } -} -) - -instat_obj$methods(replace_value_in_data = function(data_name, col_name, index, new_value) { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$replace_value_in_data(col_name, index, new_value) -} -) - -instat_obj$methods(rename_column_in_data = function(data_name, column_name, new_val) { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$rename_column_in_data(column_name, new_val) -} -) - -instat_obj$methods(remove_columns_in_data_from_start_position = function(data_name, start_pos, col_numbers) { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$remove_columns_in_data_from_start_position(start_pos = start_pos, col_numbers = col_numbers) -} -) - -instat_obj$methods(remove_columns_in_data = function(data_name, cols) { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$remove_columns_in_data(cols = cols) -} -) - -instat_obj$methods(remove_rows_in_data = function(data_name, start_pos, num_rows) { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$remove_rows_in_data(start_pos = start_pos, num_rows = num_rows) -} -) - -instat_obj$methods(get_next_default_column_name = function(data_name, prefix) { - if(missing(data_name)) { - out = list() - for(curr_obj in data_objects) { - out[[curr_obj$get_metadata(data_name_label)]] = curr_obj$get_next_default_column_name(prefix) - } - return(out) - } - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - return(data_objects[[data_name]]$get_next_default_column_name(prefix)) -} -) - -instat_obj$methods(get_column_names = function(data_name) { - if(missing(data_name)) stop("data_name must be given") - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - return(names(data_objects[[data_name]]$data)) -} -) - -instat_obj$methods(insert_column_in_data = function(data_name, col_data =c(), start_pos, number_cols) { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$insert_column_in_data(col_data = col_data, start_pos = start_pos, number_cols = number_cols ) -} -) - -# instat_obj$methods(move_columns_in_data = function(data_name, col_names = "", col_number){ -# if(!is.character(data_name)) stop("data_name must be of type character") -# if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) -# -# data_objects[[data_name]]$move_columns_in_data(col_names = col_names, col_number = col_number) -# } -# ) - -instat_obj$methods(order_columns_in_data = function(data_name, col_order){ - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$order_columns_in_data(col_order = col_order) -} -) - -instat_obj$methods(insert_row_in_data = function(data_name, start_pos, row_data = c(), number_rows) { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$insert_row_in_data(start_pos = start_pos, row_data = row_data, number_rows = number_rows) -} -) - -instat_obj$methods(length_of_data = function(data_name) { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$length_of_data() -} -) - -instat_obj$methods(get_next_default_dataframe_name = function(prefix, include_index = TRUE, start_index = 1) { - next_default_item(prefix = prefix, existing_names = names(data_objects), include_index = include_index, start_index = start_index) -} -) - -instat_obj$methods(delete_dataframe = function(data_name = data_name) { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$get_data_frame()<<-NULL - data_objects_changed <<- TRUE -} -) - -instat_obj$methods(get_column_factor_levels = function(data_name,col_name = "") { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$get_column_factor_levels(col_name) -} -) - -instat_obj$methods(sort_dataframe = function(data_name, col_names = c(), decreasing = FALSE, na.last = TRUE) { - #data_name = is.character(data_name) - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$sort_dataframe(col_names = col_names, decreasing = decreasing, na.last = na.last) -} -) - -instat_obj$methods(rename_dataframe = function(data_name, new_value = "") { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - for (i in 1:length(names(data_objects))){ - if(names(data_objects)[i] == data_name){ - names(data_objects)[i] <<- new_value - } - } - - data_objects[[new_value]]$append_to_metadata(data_name_label, new_value) -} -) - -instat_obj$methods(convert_column_to_type = function(data_name, col_names = c(), to_type ="factor", factor_numeric = "by_levels") { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$convert_column_to_type(col_names = col_names, to_type = to_type, factor_numeric = factor_numeric) -} -) - -instat_obj$methods(append_to_variables_metadata = function(data_name, col_name, property, new_val) { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$append_to_variables_metadata(col_name, property, new_val) -} -) - -instat_obj$methods(append_to_dataframe_metadata = function(data_name, property, new_val) { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$append_to_metadata(property, new_val) -} -) - - -instat_obj$methods(order_dataframes = function(data_frames_order) { - if(length(data_frames_order) != length(names(data_objects))) stop("number data frames to order should be equal to number of dataframes in the object") - for(name in names(data_objects)){ - if(!(name %in% data_frames_order)){ - stop(name, "is missing in data frames to order") - } - } - new_data_objects = list() - for(i in 1:length(names(data_objects))){ - new_data_objects[[i]] = data_objects[[data_frames_order[i]]] - } - names(new_data_objects) <- data_frames_order - data_objects <<- new_data_objects - data_objects_changed <<- TRUE -} -) - -instat_obj$methods(copy_columns = function(data_name, col_names = "") { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$copy_columns(col_names = col_names) -} -) - -instat_obj$methods(drop_unused_factor_levels = function(data_name, col_name) { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$drop_unused_factor_levels(col_name = col_name) -} -) - -instat_obj$methods(set_factor_levels = function(data_name, col_name, new_levels) { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$set_factor_levels(col_name = col_name, new_levels = new_levels) -} -) - -instat_obj$methods(set_factor_reference_level = function(data_name, col_name, new_ref_level) { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$set_factor_reference_level(col_name = col_name, new_ref_level = new_ref_level) -} -) - -instat_obj$methods(get_column_count = function(data_name) { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - return(ncol(data_objects[[data_name]]$data)) -} -) - -instat_obj$methods(reorder_factor_levels = function(data_name, col_name, new_level_names) { - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(data_objects)) stop(paste("dataframe: ", data_name, " not found")) - - data_objects[[data_name]]$reorder_factor_levels(col_name = col_name, new_level_names = new_level_names) -} -) \ No newline at end of file +# removed \ No newline at end of file diff --git a/instat/static/ClimateObject/R/BackendComponents/summary_functions.R b/instat/static/ClimateObject/R/BackendComponents/summary_functions.R index 892a6f0f6e1..50aa506deb1 100644 --- a/instat/static/ClimateObject/R/BackendComponents/summary_functions.R +++ b/instat/static/ClimateObject/R/BackendComponents/summary_functions.R @@ -1,191 +1 @@ -# summary function labels -sum_label="summary_sum" -mode_label="summary_mode" -count_label="summary_count_all" -count_over_threshold_label="summary_count_over_threshold" -sd_label = "summary_sd" -median_label = "summary_median" -range_label = "summary_range" -count_label = "summary_count" -min_label="summary_min" -max_label="summary_max" -mean_label="summary_mean" -running_summary_label="summary_running_summary" -start_of_rain_summary_label="summary_start_of_rain" -end_of_rain_summary_label="summary_end_of_rain" - -# list of summary functions -summaries_list=c(sum_label, mode_label, count_label, sd_label, median_label, range_label, count_label, min_label, max_label, mean_label, running_summary_label, start_of_rain_summary_label, end_of_rain_summary_label, count_over_threshold_label) - -summary_mode <- function(x,...) { - ux <- unique(x) - ux[which.max(tabulate(match(x, ux)))] -} - -summary_mean <- function (x, na.rm = FALSE,...) { - if( length(x)==0 || (na.rm && length(x[!is.na(x)])==0) ) return(NA) - else mean(x, na.rm=na.rm,...) -} - -summary_sum <- function (x, na.rm = FALSE,...) { - sum(x, na.rm = FALSE) -} - -summary_count <- function(x,...) { - length(x) -} - -summary_sd <- function(x, na.rm = FALSE,...) { - return(sd(x,...)) -} - -summary_max <- function (x, na.rm = FALSE,...) { - if( length(x)==0 || (na.rm && length(x[!is.na(x)])==0) ) return(NA) - else max(x, na.rm = na.rm) -} - -summary_min <- function (x, na.rm = FALSE,...) { - if( length(x)==0 || (na.rm && length(x[!is.na(x)])==0) ) return(NA) - else min(x, na.rm = na.rm) -} - -#get the range of the data -summary_range <- function(x, na.rm = TRUE, ...){ - max(x, na.rm = na.rm) - min(x, na.rm = na.rm) -} - -# median function -summary_median <- function(x, ...) { - odd.even <- length(x)%%2 - if (odd.even == 0)(sort(x)[length(x)/2] + sort(x)[1 + length(x)/2])/2 - else sort(x)[ceiling(length(x)/2)] -} - -#in progress with return and print -# summary_count <- function(x, proportions = c(120,140,160,180,200), na.rm = TRUE, ...){ -# count = c() -# for (i in 1:length(proportions)){ -# count[i] = sum(x <= proportions[i], na.rm = na.rm) -# return(paste("count <=", proportions[i], "is", count[i])) -# } -# } - -# results as percent of data (in progress) -summary_percents = function(x,data, proportions = c(120,140,160,180,200), na.rm = FALSE, ...){ - count = c() - percent = c() - for (i in 1:length(proportions)){ - count[i] = sum(x <= proportions[i], na.rm = na.rm) - percent[i] = (count[i]/nrow(data))*100 - print(paste("% of data <=", proportions[i], "is", percent[i])) - } -} - -# proportion of data (in progress) -summary_proportions <- function(x, data, proportions = c(120,140,160,180,200), na.rm = FALSE, ...){ - count = c() - proportion = c() - for (i in 1:length(proportions)){ - count[i] = sum(x <= proportions[i], na.rm = na.rm) - proportion[i] = (count[i]/nrow(data)) - print(paste("proportion <=", proportions[i], "is", proportion[i])) - } -} - -summary_running_summary <- function(data, total_days = 1, func = max_label, na.rm = FALSE,...) { - h=c() - for (i in 1:(length(data)-total_days+1)){ - h[i] <- match.fun(sum_label)(data[i:(i+total_days-1)], na.rm = na.rm) - } - - # print(h) - if(missing(func)) return(h) - else { - func = match.fun(func) - return(func(h, na.rm = na.rm)) - } -} - -summary_start_of_rain <- function(data, earliest_day = 92, total_days = 2, rain_total = 20, dry_spell_condition = FALSE, threshold = 0.85, dry_length = 30, dry_days = 10,...) { - # initialize to 0 incase conditions are never met - start_of_rain = 0 - - # initialize current earliest day - curr_earliest_day = earliest_day - - # if dry spell required use the simple sum_check to get start of the rain - if(!dry_spell_condition) start_of_rain = sum_check(data, curr_earliest_day, total_days, rain_total)[1] - - else { - # If sum and dry spell conditions are required - - # indicates whether both conditions have been met and - # start of rain has been found - # initialize to FALSE - found = FALSE - - num_rows = nrow(data) - - # while start of the rain has not been found and our earliest day to check is not too - # close to the end of year we continue looking for the start of the rain - # if the dry_length is greater than the remaining number of rows - # we will not be able to check for dry spells so we cannot get a start of the rain - - while( !found && sum(data[[1]]==curr_earliest_day)>0 && dry_length <= num_rows - which(data[[1]]==curr_earliest_day) ) { - # get the first day after earliest_day which is over rain_total - day = sum_check(data, curr_earliest_day, total_days, rain_total)[1] - - # if the dry_length is greater than the remaining number of rows - # we can no longer check for dry spells so end the loop - # also if day is missing, end the loop. - # NA will be returned - if( is.na(day) || dry_length > num_rows - which(data[[1]]==day) ) break - - # start day to check for a dry spell is the day after the day found by sum_check - start_row = which(data[[1]]==day+1) - - # if there is no dry spell we have found the start of the rain - # found = TRUE will mean the loop does not run again - if( !dry_spell_check(data[start_row:num_rows, 2], dry_length, dry_days, threshold) ) { - start_of_rain = day - found = TRUE - } - else { - # in the worst case there was a dry spell of length dry_days start after day. - # The next check should begin after this potential dry spell. - # if this day is beyond the end of the year, exit the loop to return NA. - if(is.na(which(data[[1]]==day + dry_length))) break - else curr_earliest_day = day + dry_length - } - } - } - start_of_rain -} - -summary_end_of_rain <- function(data, earliest_day = 228,...) { - data = data[data[2] >= earliest_day, ] - - water_balance = data[[1]] - dos = data[[2]] - - # default value if end of season not found - end_rain = NA - - # subsetting above may give an empty data frame - if(nrow(data)==0) return(end_rain) - - for(i in 1:nrow(data)) { - if( !is.na(water_balance[i]) - && water_balance[i] == 0 ) { - end_rain = dos[i] - break - } - } - end_rain -} - -summary_count_over_threshold <- function(x, na.rm = FALSE, threshold = 0.85, strict_ineq = TRUE,...) { - if(strict_ineq) return(sum(x>threshold, na.rm=na.rm)) - else return(sum(x>=threshold, na.rm=na.rm)) - -} \ No newline at end of file +# removed \ No newline at end of file diff --git a/instat/static/InstatObject/R/Backend_Components/DisplayDaily.R b/instat/static/InstatObject/R/Backend_Components/DisplayDaily.R index 7d35d107093..50aa506deb1 100644 --- a/instat/static/InstatObject/R/Backend_Components/DisplayDaily.R +++ b/instat/static/InstatObject/R/Backend_Components/DisplayDaily.R @@ -1,326 +1 @@ -DataBook$set("public", "display_daily_table", function(data_name, climatic_element, date_col, year_col, station_col, Misscode, Tracecode, Zerocode, monstats = c("min", "mean", "median", "max", "IQR", "sum")) { - self$get_data_objects(data_name)$display_daily_table(data_name = data_name, climatic_element = climatic_element, date_col = date_col, year_col =year_col, station_col = station_col, Misscode = Misscode, Tracecode = Tracecode, Zerocode = Zerocode, monstats = monstats) -} -) - -DataSheet$set("public", "display_daily_table", function(data_name, climatic_element, date_col = date_col, year_col = year_col, station_col = station_col, Misscode, Tracecode, Zerocode, monstats = c("min", "mean", "median", "max", "IQR", "sum")) { - curr_data <- self$get_data_frame() - if(missing(station_col)) curr_data[["Station"]] <- self$get_metadata(data_name_label) - else names(curr_data)[names(curr_data) == station_col] <- "Station" - names(curr_data)[names(curr_data) == date_col] <- "Date" - names(curr_data)[names(curr_data) == year_col] <- "Year" - return(DisplayDaily(Datain = curr_data, Variables = climatic_element, option = 1, Misscode = Misscode, Tracecode = Tracecode, Zerocode = Zerocode, monstats = monstats)) -} -) - -DisplayDaily <- function(Datain,Stations,Variables,option=1,Years,Misscode,Tracecode,Zerocode,Fileout=NA,monstats=c("min","mean","median","max","IQR","sum")){ - #-----------------------------------------------------------------------# - # Helen Greatrex, SSC, SSC-RCLIM package # - # # - # DisplayDaily: This function displays daily meteorological data in an # - # easy to view format # - # # - # INPUTS # - # Datain :This is the name of the data.frame containing your data # - # The format of this data.frame is discussed in the details # - # section of this help file # - # # - # Stations: This is the name of the weather station(s) you wish to view # - # e.g. "KUND0002", or c("Paris","London") # - # It is case sensitive and the stations should be values # - # in the Station column of your data.frame. This is set by # - # default to all stations in the data.frame # - # # - # Variables:This is name of the column of data you wish to view # - # e.g.Rain, TMin, RH. It is also case sensitive. This is set # - # default to all variables in the data.frame # - # # - # Option: Set to 1 if you have a column entitled Date, which contains # - # dates in the format yyyy-mm-dd. Set to 2 if you have a # - # column entitled Year, one entitled Month and one entitled # - # Day # - # # - # Years: A number or vector of the year(s) you would like to view # - # This is set by default to all years # - # # - # Months: A number or vector of the month(s) you would like to view # - # This is set by default to all months. This option has been # - # removed as it does not overlap to new years # - # # - # Misscode: This is the how you would like to display missing values # - # It is set by default to "-" # - # # - # Tracecode:This is the how you would like to display trace rainfall # - # values. It is set by default to "tr" # - # # - # Zerocode: This is the how you would like to display zero rainfall # - # values. It is set by default to "--" # - # # - # File: Would you like to view the results on the screen or print # - # them to a file. By default this prints to the screen, # - # If you want to print to a file, set this to a character # - # string with the filename (and path if needed) # - # # - # DETAILS # - # This function prints out daily data in an easy to view format, # - # similar to INSTAT's DisplayDaily button. # - # # - # Your input data.frame should have one column for each variable, one # - # row for each date and include the following columns (case sensitive) # - # # - # Station - containing your station names or identifier # - # # - # EITHER (set option = 1) # - # Date - containing a list of dates in the format yyyy-mm-dd # - # OR (set option = 2) # - # Year - containing a list of years in the format yyyy # - # Month - containing a list of months # - # Day - containing a list of days of month (from 1-31) # - # # - # A column for the variable you wish to study e.g. Rain, TMax, RH, ... # - # # - # Note, an experienced R user might wonder why I have not used the # - # options in print as much as I might have. This is because often # - # trace rainfall values need to be displayed as text, which messes up # - # the print function's missing/zero value options # - #-----------------------------------------------------------------------# - - #-----------------------------------------------------------------------# - # First, check that appropriate inputs exist # - #-----------------------------------------------------------------------# - if(missing(Datain)==TRUE){stop("Please enter an input data.frame")} - if(missing(Stations)==TRUE){Stations <- unique(Datain$Station)} - if(missing(Years)==TRUE){Years <- unique(Datain$Year)} - if(missing(Variables)==TRUE){ - Variables <- names(Datain) - Variables <- Variables[!(Variables %in% c("Key","Station","Date","Year","Month","Day","UserDOY"))] - if(length(Variables) <= 0){ - stop("Please indicate which column name you would like to display") - } - } - flag=0 - Months <- 1:12 - #-----------------------------------------------------------------------# - # Loop through the stations # - #-----------------------------------------------------------------------# - for (s in 1:length(Stations)){ - #-------------------------------------------------------------------# - # Does the selected station exist? # - #-------------------------------------------------------------------# - tmp <- Datain[(as.character(Datain$Station) == Stations[s]),] - - #-------------------------------------------------------------------# - # Stop the function if not # - #-------------------------------------------------------------------# - if(length(tmp[,1]) <= 0){ - print(paste("The function could not find your chosen station: '",Stations[s],"'",sep=""),quote=FALSE) - print("Possible choices in the Station column of selected data.frame are",quote=FALSE) - print(unique(Datain$Station)) - stop("Choose a station that exists in your data.frame") - } - - #-------------------------------------------------------------------# - # Loop through the variables # - #-------------------------------------------------------------------# - for(v in 1:length(Variables)){ - #--------------------------------------------------------------# - # Does the selected variable exist? # - #--------------------------------------------------------------# - if(!(Variables[v] %in% names(tmp))){ - print(paste("There is no column in your data.frame called '", Variables[v],"'",sep=""),quote=FALSE) - print("Please choose out of",quote=FALSE) - print(as.character(names(tmp))) - stop() - } - - #-------------------------------------------------------------# - # Now move on to years. First, fill in any missing dates # - #-------------------------------------------------------------# - if(option == 1){ - startday <- as.Date(paste(min(as.numeric(format.Date(tmp$Date,"%Y")),na.rm=TRUE),1,1,sep="-")) - endday <- as.Date(paste(max(as.numeric(format.Date(tmp$Date,"%Y")),na.rm=TRUE),12,31,sep="-")) - tmp$TEMPDate2 <- as.Date(tmp$Date) - tmp$Date <- as.Date(tmp$Date) - }else{ - startday <- as.Date(paste(min(tmp$Year,na.rm=TRUE),1,1,sep="-")) - endday <- as.Date(paste(max(tmp$Year,na.rm=TRUE),12,31,sep="-")) - tmp$TEMPDate2 <- as.Date(paste(tmp$Year,tmp$Month,tmp$Day,sep="-")) - tmp$Date <- as.Date(tmp$Date) - } - - newdatain <- data.frame(Date = seq(from=startday,to= endday,by="d")) - new <- merge(newdatain,tmp,by="Date",all.x=TRUE,all.y=TRUE) - # removes NA rows - new <- new[!is.na(new$TEMPDate2), ] - loc <- which(names(new)== Variables[v]) - - # newdates <- seq(startday,endday,1) - # new <- as.data.frame(matrix(nrow=length(newdates),ncol=dim(tmp)[2])) - # names(new) <- names(tmp) - # new$TEMPDate2 <- newdates - - # tmpmatch <- match(tmp$TEMPDate2,new$TEMPDate2) - # new[tmpmatch,(1:(dim(new)[2]-1))] <- tmp[,(1:(dim(new)[2]-1))] - - # new <- m - - #-------------------------------------------------------------# - # Rename things like the Station name # - #-------------------------------------------------------------# - new$Station <- Stations[s] - - #-------------------------------------------------------------# - # Now start to display the data # - # First, make an output template # - #-------------------------------------------------------------# - template <- as.data.frame(matrix(nrow=32,ncol=(1+length(Months)))) - names(template) <- c("MONTH",month.abb[Months]) - template[1,] <- "****" - template[,1] <- c("DAY",1:31) - - statstemplate <- as.data.frame(matrix(ncol=(1+length(Months)),nrow=length(monstats))) - names(statstemplate) <- names(template) - statstemplate[,1] <- monstats - - - for(y in 1:length(Years)){ - #----------------------------------------------------------# - # Print out header # - #----------------------------------------------------------# - print(" ",quote=FALSE) - print(paste("STATION : '",Stations[s],"'",sep=""),quote=FALSE) - print(paste("VARIABLE : '",Variables[v],"'",sep=""),quote=FALSE) - print(paste("YEAR :",Years[y]),quote=FALSE) - - #----------------------------------------------------------# - # And set up the template/subset the data for that year # - #----------------------------------------------------------# - out <- template - outstats <- statstemplate - tmp2 <- new[as.numeric(format.Date(new$TEMPDate2,"%Y")) == Years[y],] - - #----------------------------------------------------------# - # Now, put into the data.frame for each month.. # - #----------------------------------------------------------# - for(m in 1:length(Months)){ - dat <- tmp2[as.numeric(format.Date(tmp2$TEMPDate2,"%m")) == Months[m],] - - for(st in 1:length(monstats)){ - if(length(eval(parse(text=paste("suppressWarnings(",monstats[st],"(dat[,loc],na.rm=TRUE))"))))> 1){ - outstats[,(m+1)] <- "NA" - if(m == 1){ - print("",quote=FALSE) - print("*********",quote=FALSE) - print(paste("The '", monstats[st],"' statistic has more than one value in its output, e.g.",sep=""),quote=FALSE) - print(eval(parse(text=paste("suppressWarnings(",monstats[st],"(dat[,loc],na.rm=TRUE))"))),quote=FALSE) - print("therefore it has been ignored",quote=FALSE) - print("",quote=FALSE) - } - }else{ - outstats[st,(m+1)] <- as.character(sprintf("%12.1f",eval(parse(text=paste("suppressWarnings(",monstats[st],"(dat[,loc],na.rm=TRUE))"))))) - outstats[st,(m+1)] <- gsub("\\s+","",outstats[st,(m+1)]) - } - } - # If all the data is NA, set the stats to NA (except summary_count_miss) - if(length(dat[,loc])==length(which(is.na(dat[,loc]==TRUE)))){ - outstats[which(monstats != "summary_count_miss"),(m+1)] <- "NA" - } - # If there's no data, set all the stats to NA - if(length(dat[,loc]) == 0) outstats[,(m+1)] <- "NA" - #-------------------------------------------------------# - # Set to 2 d.p. # - #-------------------------------------------------------# - dat[is.na(dat[,loc])==FALSE,loc] <- as.character(sprintf("%12.2f",dat[is.na(dat[,loc])==FALSE,loc])) - - #-------------------------------------------------------# - # Deal with trace rainfall values # - #-------------------------------------------------------# - tmptrace <- which(dat[,loc] == "0.01") - if(length(tmptrace) > 0 && !missing(Tracecode)){ - dat[tmptrace,loc] <- Tracecode - } - - #-------------------------------------------------------# - # Set to 1 d.p. and remove spaces # - #-------------------------------------------------------# - dat[,loc] <- substr(dat[,loc],1,nchar(dat[,loc])-1) - dat[,loc] <- gsub("\\s+","",dat[,loc]) - - #-------------------------------------------------------# - # Deal with missing values (exc non existant dates) # - #-------------------------------------------------------# - tmpmiss <- which(is.na(dat[,loc]) == TRUE) - if(length(tmpmiss) > 0 && !missing(Misscode)){ - dat[tmpmiss,loc] <- Misscode - } - - #-------------------------------------------------------# - # Deal with zero values (exc non existant dates) # - #-------------------------------------------------------# - tmpzero <- which(dat[,loc] == "0.0") - if(length(tmpzero) > 0 && !missing(Zerocode)){ - dat[tmpzero,loc] <- Zerocode - } - tmpzero <- which(outstats[,(m+1)] == "0.0") - if(length(tmpzero) > 0){ - outstats[tmpzero,(m+1)] <- "0" - } - #-------------------------------------------------------# - # And put into the display data.frame # - #-------------------------------------------------------# - dat$TEMPday <- as.numeric(format.Date(dat$TEMPDate2,"%d")) - out[2:32,(m+1)] <- dat[match(1:31,dat$TEMPday),loc] - }#m - - #----------------------------------------------------------# - # Add the statistics onto the main data.frame # - #----------------------------------------------------------# - out[dim(out)[1]+1,] <- "____" - out[dim(out)[1],1] <- "STATS" - # temp fix to shorten display name of summary function - outstats[,1][outstats[,1] == "summary_count_miss"] <- "n_miss" - out <- rbind(out,outstats) - - #----------------------------------------------------------# - # Set non existant dates to be blank and display # - #----------------------------------------------------------# - out[is.na(out) == TRUE] <- " " - print(out,quote=FALSE,justify="right",row.names=F,digits=1) - print(" ",quote=FALSE) - - #----------------------------------------------------------# - # Write to file if requested # - #----------------------------------------------------------# - if(is.na(Fileout) == FALSE){ - if(flag==0){ - fileoutput <- file(Fileout, "w") - flag = 1 - cat(paste("STATION : '",Stations[s],"'",sep=""),"\n",file=fileoutput,sep="") - cat(paste("VARIABLE : '",Variables[v],"'",sep=""),"\n",file=fileoutput,sep="") - cat(paste("YEAR :",Years[y]),"\n",file=fileoutput,sep="") - cat(paste(names(out)),"\n",file=fileoutput,sep="\t") - write.table(out,file=fileoutput,sep="\t",append=TRUE,quote=FALSE,col.names=FALSE,row.names=FALSE) - }else{ - cat("\n",paste("STATION : '",Stations[s],"'",sep=""),"\n",file=fileoutput,sep="") - cat(paste("VARIABLE : '",Variables[v],"'",sep=""),"\n",file=fileoutput,sep="") - cat(paste("YEAR :",Years[y]),"\n",file=fileoutput,sep="") - cat(paste(names(out)),"\n",file=fileoutput,sep="\t") - write.table(out,file=fileoutput,sep="\t",append=TRUE,quote=FALSE,col.names=FALSE,row.names=FALSE) - } - } - rm(tmp2) - rm(out) - }#y - }#v - }#s - if(is.na(Fileout) == FALSE){ - close(fileoutput) - } -} #end of function - - - - - - -################################################################################# +# removed \ No newline at end of file diff --git a/instat/static/InstatObject/R/Backend_Components/calculations.R b/instat/static/InstatObject/R/Backend_Components/calculations.R index e7a52f6259a..1d22a4176d1 100644 --- a/instat/static/InstatObject/R/Backend_Components/calculations.R +++ b/instat/static/InstatObject/R/Backend_Components/calculations.R @@ -1,900 +1,5 @@ -calculation <- R6::R6Class("calculation", - public = list( - initialize = function(function_name = "", parameters = list(), - calculated_from = c(), is_recalculable = TRUE, - sub_calculations = list(), type = "", filter_conditions = list(), - filters = list(), name = "") { - self$function_name = function_name - self$parameters = parameters - self$calculated_from = c() - self$is_recalculable = is_recalculable - self$sub_calculations = sub_calculations - self$type = type - self$name = name - self$filter_conditions = filter_conditions - self$filters = filters - }, - function_name = "", - parameters = list(), - calculated_from = c(), - is_recalculable = TRUE, - sub_calculations = list(), - filter_conditions = list(), - filters = list(), - name = "", - type = "" - ) - # , - # Removed because filter_conditions can be public - # private = list( - # .filter_conditions = list() - # ) - # , - # active = list( - # filter_conditions = function(new_filter_conditions) { - # if(missing(new_filter_conditions)) return(private$.filter_conditions) - # else private$.filter_conditions <- new_filter_conditions - # } - # ) -) - -calculation$set("public", "add_sub_calculation", function(sub_calculation, name) { - sub_calculations[[name]] <- sub_calculation -} -) - -calculation$set("public", "data_clone", function() { - ret <- calculation$new(function_name = self$function_name, parameters = self$parameters, - calculated_from = self$calculated_from, is_recalculable = self$is_recalculable, - sub_calculations = self$sub_calculations, type = self$type, - filter_conditions = self$filter_conditions, filters = self$filters, - name = self$name) - return(ret) -} -) - -# This ensures cloned filter objects from older saved data_books have the expected parameters -check_filter <- function(filter_obj) { - if (is.null(filter_obj$parameters[["and_or"]])) filter_obj$parameters[["and_or"]] <- "&" - if (is.null(filter_obj$parameters[["outer_not"]])) filter_obj$parameters[["outer_not"]] <- FALSE - if (is.null(filter_obj$parameters[["inner_not"]])) filter_obj$parameters[["inner_not"]] <- FALSE - return(filter_obj) -} - -# calculation$set("public", "data_clone", function() { -# ret = calculation$new(function_name = private$function_name, parameters = private$parameters, calculated_from = private$calculated_from, is_recalculable = private$is_recalculable, sub_calculations = private$sub_calculations, type = private$type, filter_conditions = private$.filter_conditions) -# sub_calculations[[name]] <- sub_calculation -# } -# ) - -DataBook$set("public", "apply_calculation", function(calc) { - if(calc$type == "summary") { - out <- self$get_data_objects(calc[["parameters"]][["data_name"]])$calculate_summary(calc = calc, ... = calc[["parameters"]][["..."]]) - if(calc[["parameters"]][["store_results"]]) self$append_summaries_to_data_object(out, calc[["parameters"]][["data_name"]], calc[["parameters"]][["columns_to_summarise"]], calc[["parameters"]][["summaries"]], calc[["parameters"]][["factors"]], calc[["parameters"]][["summary_name"]], calc) - if(calc[["parameters"]][["return_output"]]) return(out) - else return(NULL) - } -} -) - -DataBook$set("public", "save_calculation", function(end_data_frame, calc) { - self$get_data_objects(end_data_frame)$save_calculation(calc) -} -) - -DataSheet$set("public", "save_calculation", function(calc) { - if(calc$name == "") calc$name <- next_default_item("calc", names(private$calculations)) - if(calc$name %in% names(private$calculations)) warning("There is already a calculation called ", calc$name, ". It will be replaced.") - private$calculations[[calc$name]] <- calc - return(calc$name) -} -) - -# class to store calculations -# Fields: -# function_exp : string - passed directly to one of dplyr functions e.g. "Yield > 50" for a filter or "Yield * Size" for a calculation -# type : string - one of "by", "calculation", "filter", "summary", "combination" to determine the type of calculation to do -# each corresponds directly to one of dplyr's functions (except combination). -# "by" - for dplyr's "group_by", to group the data by column(s) (usually factors) in preparation for doing summaries, for example. -# "sort" - for dplyr's "arrange", to sort the data by column(s) -# "calculation" - for dplyr's "mutate", a simple calculation to produce a column within the same data frame -# "filter" - for dplyr's "filter", to filter the rows of a data frame by any logical expression -# "summary" - for dplyr's "summarise", to produce summaries. Produces a singple value unless the data is already grouped, -# and then one value for each "group" is produced. -# "combination" - for calculations which contain sub calculations (and possibly mutations) but no main calculation is needed. -# name : string - a name for the calculation. If save_calc = FALSE then name is not used anywhere. -# result_name : string - a name for the output (usually a column) produced by the calculation e.g. "YieldDouble" -# result_data_frame : string - a name for the data frame that the output should go to. This could be an existing data frame on a new one -# the default, "", means the linking system will be used to determine the data frame the output should below to -# manipulations : list - a list of calculations to be performed before any sub_calculations and the main calculation is performed. -# the order of these is important, as the output from each manipulation is the input to the next manipulation -# the output of the final manipulation is the input for all sub_calculations -# these are typically "by" and "filter" calculations, but could be any -# sub_calculations : list - a list of calculations to be performed after manipulations, but before the main calculation -# typically these are calculations which must be calculated before the main calculation can be done. -# the output of the final manipulation is the input for all sub_calculations -# the order is not important, as the output from each sub_calculation is not passed as the input to the next sub_calculation -# If the order is important, then manipulations should be used instead. -# calculated_from : list - a list with values as the names of the columns the calculation depends on, -# and names as the names of the data frames corresponding the columns e.g. list(survey = "Yield", survey = "Size") -# save : integer- either, 0: nothing is saved, 1: calculation is saved but not the result or 2: calculation and result is saved -# (saving the result without saving the calculation was decided not to be a sensible option - prevents recalculating etc. -# saving calculation only is useful to reproduce results in output window without needing to save in a data frame e.g. single value summaries) - -instat_calculation <- R6::R6Class("instat_calculation", - public = list( - initialize = function(function_exp = "", type = "", name = "", result_name = "", result_data_frame = "", manipulations = list(), - sub_calculations = list(), calculated_from = list(), save = 0, before = FALSE, adjacent_column = "", param_list = list()) { - if((type == "calculation" || type == "summary") && missing(result_name)) stop("result_name must be provided for calculation and summary types") - if(type == "combination" && save > 0) { - warning("combination types do not have a main calculation which can be saved. save_output will be stored as FALSE") - save <- 0 - #TODO Should this do something else like set save_output = TRUE for all sub_calculations? - } - self$function_exp <- function_exp - self$type <- type - self$name <- name - self$result_name <- result_name - self$result_data_frame <- result_data_frame - self$manipulations <- manipulations - self$sub_calculations <- sub_calculations - self$calculated_from <- calculated_from - self$save <- save - self$before <- before - self$adjacent_column <- adjacent_column - self$param_list <- param_list - }, - name = "", - result_name = "", - result_data_frame = "", - type = "", - manipulations = list(), - sub_calculations = list(), - function_exp = "", - calculated_from = list(), - save = 0, - before = FALSE, - adjacent_column = "", - param_list = list() - ) -) - -instat_calculation$set("public", "data_clone", function(...) { - ret <- instat_calculation$new(function_exp = self$function_exp, type = self$type, - name = self$name, result_name = self$result_name, - manipulations = lapply(self$manipulations, function(x) x$data_clone()), - sub_calculations = lapply(self$sub_calculations, function(x) x$data_clone()), - calculated_from = self$calculated_from, save = self$save, - param_list = self$param_list) - # adjacent column / before to be in here? - return(ret) -} -) - -# Calculation labels -#These are the names of the list returned by apply_instat_calculation -c_data_label <- "data" -c_link_label <- "link" -c_has_summary_label <- "has_summary" -c_has_filter_label <- "has_filter" - -# This performs the calculation, including saving the output and calculation if required. -# A list of 4 elements is returned: -# $data : a data frame containing the output from the calculation, usually not just the output but also other columns at the same "level" -# $link : a link used to determine which data frame the output should be saved in. -# $has_summary : a logical value indicating whether a summary has been done -# $has_filter : a logical value indicating whether a filter has been done - -# This method is called recursively, and it would not be called by a user, another function would always handle the output and display -# results to the user (usually only the $data part of the list) -DataBook$set("public", "apply_instat_calculation", function(calc, curr_data_list, previous_manipulations = list(), param_list = list()) { - # for our by calculation, read our drop parameter which is stored in param_list. This is read in - drop_value <- ifelse("drop" %in% names(param_list), param_list$drop, FALSE) - preserve_value <- ifelse("preserve" %in% names(param_list), param_list$preserve, FALSE) - - # apply each manipulation first, and recursively store the output and pass to the next manipulation - # because of this, manipulations are dependant on each other - for(manipulation in calc$manipulations) { - curr_data_list <- self$apply_instat_calculation(manipulation, curr_data_list, previous_manipulations, param_list = param_list) - previous_manipulations[[length(previous_manipulations) + 1]] <- manipulation - } - # If curr_data_list is not empty, (either an argument or from manipulations) - # then this is passed in to apply_instat_calculation for each sub_calculation - # sub_calculations are independant of each other (the order does not affect the output) - if(!missing(curr_data_list)) { - sub_calc_results <- curr_data_list - curr_groups <- groups(curr_data_list[[c_data_label]]) - } - else curr_groups <- c() - first_sub_calc <- TRUE - - for(sub_calc in calc$sub_calculations) { - curr_sub_calc <- self$apply_instat_calculation(sub_calc, curr_data_list, previous_manipulations, param_list = param_list) - if(first_sub_calc) { - sub_calc_results <- curr_sub_calc - first_sub_calc <- FALSE - } - else { - #### Set the require_merge logical - # Defined as variables as these are needed later - #overall_merge_required <- sub_calc_results[[c_require_merge_label]] - #current_calc_merge_required <- curr_sub_calc[[c_require_merge_label]] - overall_has_summary <- sub_calc_results[[c_has_summary_label]] - overall_has_filter <- sub_calc_results[[c_has_filter_label]] - current_calc_has_summary <- curr_sub_calc[[c_has_summary_label]] - current_calc_has_filter <- curr_sub_calc[[c_has_filter_label]] - - # A merge is required if a merge was already required, or if the current sub_calculation requires a merge - #sub_calc_results[[c_require_merge_label]] <- overall_merge_required || current_calc_merge_required - # TODO how to set new values for has summary/filter - - #### Set the data and link - # Defined as variables as these are used throughout - curr_calc_link_cols <- curr_sub_calc[[c_link_label]][["link_cols"]] - overall_calc_link_cols <- sub_calc_results[[c_link_label]][["link_cols"]] - curr_calc_from <- curr_sub_calc[[c_link_label]][["from_data_frame"]] - overall_calc_from <- sub_calc_results[[c_link_label]][["from_data_frame"]] - - # Warning if current sub calc result is already in the data - if(sub_calc$result_name %in% names(sub_calc_results[[c_data_label]])) warning(sub_calc$result_name, " is already a column in the existing data. The column will be replaced. This may have unintended consequences for the calculation") - - # If either calc is a single value summary we don't do a merge. - if((current_calc_has_summary && length(curr_calc_link_cols) == 0) || (overall_has_summary && length(overall_calc_link_cols) == 0)) { - # Don't think this needs to be done separately now - # If both calcs are single value summaries - # if(current_calc_has_summary && length(curr_calc_link_cols) == 0 && overall_has_summary && length(overall_calc_link_cols) == 0) { - # sub_calc_results[[c_data_label]] <- mutate(sub_calc_results[[c_data_label]], curr_sub_calc[[c_data_label]]) - # } - - # If curr_calc is a single value, add this on to overall data as new column - # QUESTION: Should there be checks here? This "works" with any two data frames - # because it's just adding a single value as a new column - if(current_calc_has_summary && length(curr_calc_link_cols) == 0) { - sub_calc_results[[c_data_label]][[sub_calc$result_name]] <- curr_sub_calc[[c_data_label]][[1]] - } - # If only overall is a single value, add this on to the current sub calc output - # QUESTION: Should there be checks here? This "works" with any two data frames - # because it's just adding a single value as a new column - else if(overall_has_summary && length(overall_calc_link_cols) == 0 && !(current_calc_has_summary && length(curr_calc_link_cols) == 0)) { - temp_data <- curr_sub_calc[[c_data_label]] - temp_data[[names(sub_calc_results[[c_data_label]])[1]]] <- sub_calc_results[[c_data_label]][[1]] - sub_calc_results[[c_data_label]] <- temp_data - sub_calc_results[[c_has_summary_label]] <- curr_sub_calc[[c_has_summary_label]] - sub_calc_results[[c_has_filter_label]] <- curr_sub_calc[[c_has_filter_label]] - sub_calc_results[[c_link_label]] <- curr_sub_calc[[c_link_label]] - #TODO Multiple links needed - # Above changes the from_data_frame. Is that what we want? - } - } - # In this case, both are simple calculations on the same data frame without filters - # So we just add a column instead of a merge. - else if(curr_calc_from == overall_calc_from && !overall_has_summary && !overall_has_filter && !current_calc_has_summary && !current_calc_has_filter) { - sub_calc_results[[c_data_label]][[sub_calc$result_name]] <- curr_sub_calc[[c_data_label]][[sub_calc$result_name]] - } - # Otherwise we must do a merge. - # If we can't do a merge, we stop here. - else { - # To be able to do a merge, a key in one of the DFs must be "equivalent" to a subset of a key in the other - # If the DF is a summary, then the link columns define the only key - if(overall_has_summary) { - overall_links <- list() - overall_links[[1]] <- overall_calc_link_cols - } - # Otherwise, there must be existing keys defined in the data frame - else { - if(!self$has_key(overall_calc_from)) stop("Cannot merge sub calculations as there is no key defined in ", overall_calc_from) - overall_links <- self$get_keys(overall_calc_from) - } - if(current_calc_has_summary) { - curr_calc_links <- list() - curr_calc_links[[1]] <- curr_calc_link_cols - } - else { - if(!self$has_key(curr_calc_from)) stop("Cannot merge sub calculations as there is no key defined in ", curr_calc_from) - curr_calc_links <- self$get_keys(curr_calc_from) - } - by <- NULL - for(temp_overall_link in overall_links) { - for(temp_curr_link in curr_calc_links) { - equ_overall_cols <- self$get_equivalent_columns(curr_calc_from, temp_curr_link, overall_calc_from) - if(length(equ_overall_cols) > 0 && all(equ_overall_cols %in% temp_overall_link)) { - by <- temp_curr_link - names(by) <- equ_overall_cols - join_into_overall <- TRUE - break - } - equ_curr_cols <- self$get_equivalent_columns(overall_calc_from, temp_overall_link, curr_calc_from) - if(length(equ_curr_cols) > 0 && all(equ_curr_cols %in% temp_curr_link)) { - by <- temp_overall_link - names(by) <- equ_curr_cols - join_into_overall <- FALSE - break - } - } - if(length(by) > 0) break - } - if(length(by) == 0) { - stop("Cannot find linking columns to merge output from sub calculations.") - } - - # If the data frames are the same and filters have been used then need to subset before the join - # so that we don't get duplicate columns - joined <- FALSE - if(curr_calc_from == overall_calc_from) { - if(overall_has_filter && current_calc_has_filter) { - # If both sub calcs have filter then the order of the rows in the output may not be sensible - # if filters are different but this case should be rare - # To avoid possibly losing data by subsetting columns we don't subset here and instead add to by columns - additional_cols <- intersect(names(sub_calc_results[[c_data_label]]), names(curr_sub_calc[[c_data_label]])) - additional_cols <- additional_cols[!additional_cols %in% by] - if(length(additional_cols) > 0) by <- c(by, additional_cols) - sub_calc_results[[c_data_label]] <- dplyr::full_join(curr_sub_calc[[c_data_label]], sub_calc_results[[c_data_label]], by = by) - joined <- TRUE - } - else if(overall_has_filter) { - # If the overall data has a filter and current does not, then we should merge the overall into the current - # We subset the current data to only have by and the output columns so that merge doesn't produce duplicate columns - # Overall sub data should be full data so we don't lose any data by subsetting the current sub calc - sub_calc_results[[c_data_label]] <- dplyr::full_join(curr_sub_calc[[c_data_label]][c(as.vector(by), sub_calc$result_name)], sub_calc_results[[c_data_label]], by = by) - # Current data has no filter so output now does not - sub_calc_results[[c_has_filter_label]] <- FALSE - joined <- TRUE - } - else if(current_calc_has_filter) { - # If the current data has a filter and overall does not, then we should merge the current into the overall - # We subset the current data to only have by and output columns so that merge doesn't produce duplicate columns - # Overall sub data should be full data so we don't lose any data by subsetting the current sub calc - sub_calc_cols <- as.vector(by) - if(sub_calc$result_name != "") sub_calc_cols <- c(sub_calc_cols, sub_calc$result_name) - sub_calc_results[[c_data_label]] <- dplyr::full_join(sub_calc_results[[c_data_label]], curr_sub_calc[[c_data_label]][sub_calc_cols], by = by) - # Overall data has no filter so output does even though current does - joined <- TRUE - } - } - if(!joined) { - if(join_into_overall) sub_calc_results[[c_data_label]] <- dplyr::full_join(sub_calc_results[[c_data_label]], curr_sub_calc[[c_data_label]], by = by) - else { - sub_calc_results[[c_data_label]] <- dplyr::full_join(curr_sub_calc[[c_data_label]], sub_calc_results[[c_data_label]], by = by) - # The overall data will be joined into the current sub calc, so this becomes the new link - sub_calc_results[[c_link_label]] <- curr_sub_calc[[c_link_label]] - } - } - } - } - } - - # If there were any sub_calculations then the input for the main calculation should be the output from the last sub_calculation - # Otherwise it is the output from the mainipulations - if(!first_sub_calc) curr_data_list <- sub_calc_results - - #TODO investigate better way to do this - # Any case where we don't want this? - # we want param_list to read into them all, not just "by", otherwise we lose our parameters here. - for(var in curr_groups) { - curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% group_by(dplyr::across({{ var }}), .add = TRUE, .drop = drop_value) - } - - # Names of the data frames required for the calculation - data_names <- unique(as.vector(names(calc$calculated_from))) - # If argument was missing and there were no manipulations or sub_calculations then it should be created. - if(missing(curr_data_list)) { - if(length(data_names) == 0) stop("No data specified for calculation.") - #else if(length(data_names) > 1) stop("Calculations from multiple data frame not yet implemented") - else { - curr_data_list <- list() - #TODO Add current filter as manipulation in calc definition if needed. - # The data is the data from the instat object based on data_names - curr_data_list[[c_data_label]] <- self$get_data_frame(data_names[[1]], use_current_filter = FALSE) - # The link has from_data_frame based on data_names and no current linking columns - link_list <- list(data_names[[1]], c()) - names(link_list) <- c("from_data_frame", "link_cols") - curr_data_list[[c_link_label]] <- link_list - # By default, a summary or filter has not been done - #curr_data_list[[c_require_merge_label]] <- FALSE - curr_data_list[[c_has_summary_label]] <- FALSE - curr_data_list[[c_has_filter_label]] <- FALSE - } - } - - col_names_exp = c() - col_names_exp_2 <- c() - i = 1 - # This checks that the columns specified in calculated_from appear in the current data - for(i in seq_along(calc$calculated_from)) { - col_name <- calc$calculated_from[[i]] - data_frame_name <- names(calc$calculated_from)[i] - overall_calc_from <- curr_data_list[[c_link_label]][["from_data_frame"]] - # TODO Is this a good check? - if(!(col_name %in% names(curr_data_list[[c_data_label]]))) { - if(curr_data_list[[c_has_summary_label]]) { - overall_links <- list() - overall_links[[1]] <- curr_data_list[[c_link_label]][["link_cols"]] - } - # Otherwise, there use the keys if they exist - else { - if(self$has_key(overall_calc_from)) { - overall_links <- self$get_keys(overall_calc_from) - } - else overall_links <- NULL - } - if(self$has_key(data_frame_name)) { - new_data_links <- self$get_keys(data_frame_name) - } - else new_data_links <- NULL - #TODO Make this it's own method? - by <- NULL - # Search for linking columns from overall_links - for(temp_overall_link in overall_links) { - equ_curr_cols <- self$get_equivalent_columns(overall_calc_from, temp_overall_link, data_frame_name) - if(length(equ_curr_cols) > 0) { # && all(equ_curr_cols %in% temp_curr_link)) { - by <- temp_overall_link - names(by) <- equ_curr_cols - join_into_overall <- FALSE - break - } - } - # If not found, search for linking columns from new_data_links - if(length(by) == 0) { - for(temp_curr_link in new_data_links) { - equ_overall_cols <- self$get_equivalent_columns(data_frame_name, temp_curr_link, overall_calc_from) - if(length(equ_overall_cols) > 0) { #&& all(equ_overall_cols %in% temp_overall_link)) { - by <- temp_curr_link - names(by) <- equ_overall_cols - join_into_overall <- TRUE - break - } - } - } - if(length(by) == 0) { - stop("Cannot find linking columns to merge output from sub calculations with data for calculated_from.") - } - if(join_into_overall){ - new_data_list <- self$get_data_frame(data_frame_name, use_current_filter = FALSE) - by_col_attributes <- list() - for(i in seq_along(by)) { - # Collect column attributes - by_col_attributes[[by[[i]]]] <- get_column_attributes(new_data_list[[by[[i]]]]) - - # Check and align the data types for each "by" column - if (class(new_data_list[[by[[i]]]]) != class(curr_data_list[[c_data_label]][[by[[i]]]])) { - warning(paste0("Type is different for ", by[[i]], " in the two data frames. Setting as numeric in both data frames.")) - - # Convert factors to numeric if necessary - if (class(new_data_list[[by[[i]]]]) == "factor") { - new_data_list[[by[[i]]]] <- as.numeric(as.character(new_data_list[[by[[i]]]])) - } else if (class(curr_data_list[[c_data_label]][[by[[i]]]]) == "factor") { - curr_data_list[[c_data_label]][[by[[i]]]] <- as.numeric(as.character(curr_data_list[[c_data_label]][[by[[i]]]])) - } else { - stop(paste0("Type is different for ", by[[i]], " in the two data frames and cannot be coerced.")) - } - } - } - curr_data_list[[c_data_label]] <- dplyr::full_join(curr_data_list[[c_data_label]], self$get_data_frame(data_frame_name, use_current_filter = FALSE), by = by) - } else { - curr_groups <- dplyr::groups(curr_data_list[[c_data_label]]) - curr_data_list[[c_data_label]] <- dplyr::full_join(self$get_data_frame(data_frame_name, use_current_filter = FALSE), curr_data_list[[c_data_label]], by = by) - #TODO investigate better way to do this - # Any case where we don't want this? - for(var in curr_groups) { - curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% dplyr::group_by(dplyr::across({{ var }}), .add = TRUE, .drop = drop_value) - } - # The overall data is joined into the current sub calc, so the curr_data_list is "reset" to default values - curr_data_list[[c_link_label]] <- list(from_data_frame = data_frame_name, link_cols = c()) - curr_data_list[[c_has_summary_label]] <- FALSE - curr_data_list[[c_has_filter_label]] <- FALSE - } - } - # This is a character vector containing the column names in a format that can be passed to dplyr functions using Standard Evalulation - col_names_exp[[i]] <- lazyeval::interp(~ var, var = as.name(col_name)) - col_names_exp_2[i] <- col_name - i = i + 1 - } - - # this type is adding a column to the data - # the data is at the same "level" so the link is unchanged - if(calc$type == "calculation") { - if(calc$result_name %in% names(curr_data_list[[c_data_label]])) warning(calc$result_name, " is already a column in the existing data. The column will be replaced. This may have unintended consequences for the calculation") - curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% - #dplyr::mutate_(.dots = setNames(list(as.formula(paste0("~", calc$function_exp))), calc$result_name)) - dplyr::mutate(!!as.name(calc$result_name) := !!rlang::parse_expr(calc$function_exp)) - - } - # this type performs a summary - # the data is not at a different "level" so the link is changed and link columns are the groups of the data before summarising - # A merge is now required because the data is at a different "level" - else if(calc$type == "summary") { - curr_data_list[[c_link_label]][["link_cols"]] <- as.character(dplyr::groups(curr_data_list[[c_data_label]])) - calc_from_data_name <- curr_data_list[[c_link_label]][["from_data_frame"]] - formula_fn_exp <- as.formula(paste0("~", calc$function_exp)) - # note: important that there is *no* space between | for grepl function - # and important there IS a psace in str_detect..! - - if (exists("col_name")){ - # get the data type of the column - col_data_type <- self$get_variables_metadata(data_name = calc_from_data_name, column = col_name, property = "class") - # if it is a ordered factor... - if (any(stringr::str_detect("ordered", col_data_type))){ - # put in here the ones that DO work for ordered factor - - - if (any(grepl("summary_count|summary_count_miss|summary_n_distinct|summary_count_all|summary_min|summary_max|summary_range|summary_median|summary_quantile|p10|p20|p25|p30|p33|p40|p60|p67|p70|p75|p80|p90", formula_fn_exp))){ - - curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% - dplyr::summarise(!!calc$result_name := !!rlang::parse_expr(calc$function_exp)) - } else { - curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% - #dplyr::summarise_(.dots = setNames(list(NA), calc$result_name)) - dplyr::summarise(!!calc$result_name := NA) - } - # if it is a factor or character, do not work for anything except... - } else if (any(stringr::str_detect("factor | character", col_data_type))){ - # put in here the ones that DO work for factor or character - - - if (any(grepl("summary_count|summary_count_miss|summary_n_distinct|summary_count_all", formula_fn_exp))){ - - curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% - dplyr::summarise(!!calc$result_name := !!rlang::parse_expr(calc$function_exp)) - } else { - curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% - dplyr::summarise(!!calc$result_name := NA) - } - } else if (any(stringr::str_detect("Date | POSIXct | POSIXt", col_data_type))){ - # put in here the ones that DO NOT work for date - if (any(grepl("summary_sum", formula_fn_exp))){ - curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% - dplyr::summarise(!!calc$result_name := NA) - } else { - curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% - dplyr::summarise(!!calc$result_name := !!rlang::parse_expr(calc$function_exp)) - } - } else { - curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% dplyr::summarise(!!calc$result_name := !!rlang::parse_expr(calc$function_exp)) - #curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% dplyr::summarise_(.dots = setNames(list(as.formula(paste0("~", calc$function_exp))), calc$result_name)) - } - } else{ - curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% dplyr::summarise(!!calc$result_name := !!rlang::parse_expr(calc$function_exp)) - } - curr_data_list[[c_has_summary_label]] <- TRUE - } - # This type is grouping the data - # The data remains unchanged so link and require merge remain unchanged - else if(calc$type == "by") { - curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% dplyr::group_by(dplyr::across({{ col_names_exp_2 }}), .add = TRUE, .drop = drop_value) - - #curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% dplyr::group_by_(.dots = col_names_exp, add = TRUE, .drop = FALSE) -} - # This type is sorting the data - # The rows are now in a different order so a merge is required - else if(calc$type == "sort") { - curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% dplyr::arrange(across({{ col_names_exp_2 }})) - #curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% dplyr::arrange_(.dots = col_names_exp) - curr_data_list[[c_has_filter_label]] <- TRUE - } - # This type is filtering the data - # The data is at the same "level" so the link is unchanged - # The rows are now different so a merge is required - else if(calc$type == "filter") { - curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% dplyr::filter(!!rlang::parse_expr(calc$function_exp), .preserve = preserve_value) - #curr_data_list[[c_data_label]] <- curr_data_list[[c_data_label]] %>% dplyr::filter_(.dots = as.formula(paste0("~", calc$function_exp))) - curr_data_list[[c_has_filter_label]] <- TRUE - } - # This type is when there is no main calculation but some sub_calculations - # There is no change to the data - else if(calc$type == "combination") {} - else stop("Cannot detect calculation type: ", calc$type) - - # This is done to clear the column attributes which are carried from the calculated columns - # TODO test this to check for any unintended side effects - # Seems only safe way to do this, as.vector can't be used on factor, Date etc. - if(calc$type == "calculation" || calc$type == "summary") { - result_col <- curr_data_list[[c_data_label]][[calc$result_name]] - for(att in names(attributes(result_col))[!names(attributes(result_col)) %in% c("levels", "class")]) { - attr(curr_data_list[[c_data_label]][[calc$result_name]], att) <- NULL - } - } - # if calc$save == 2 then column generated by calculation is saved into instat object and calc saved in to_data_frame - if(calc$save == 2) self$save_calc_output(calc, curr_data_list, previous_manipulations) - # if output is not saved the calculation can still be saved but now it is saved with the from_data_frame - # (to_data_frame may not exist) - else if(calc$save == 1) self$save_calculation(data_names, calc) - # list is returned so it can be used recursively for manipulations, sub_calculations etc. - return(curr_data_list) -} -) - -# Call this to run a calculation and display the data -DataBook$set("public", "run_instat_calculation", function(calc, display = TRUE, param_list = list()) { - # param list has to be read in separately because of recursive nature of apply_instat_function. We want to ensure our param_list are in all calc()'s. - out <- self$apply_instat_calculation(calc, param_list = param_list) - if(display) return(out$data) -} -) - -# given a set of columns in one data frame, this will return named list with corresponding columns in second data frame, where a link exists -# TODO: Needs to update to not just look at direct links -DataBook$set("public", "get_corresponding_link_columns", function(first_data_frame_name, first_data_frame_columns, second_data_frame_name) { - by <- c() - if(self$link_exists_between(first_data_frame_name, second_data_frame_name)) { - existing_link <- self$get_link_between(first_data_frame_name, second_data_frame_name) - link_pairs <- unlist(existing_link$link_columns) - for(link_column in first_data_frame_columns) { - if(existing_link$from_data_frame == first_data_frame_name && existing_link$to_data_frame == second_data_frame_name) { - if(link_column %in% names(link_pairs)) { - by[link_column] <- link_pairs[which(names(link_pairs) == link_column)][1] - } - else by[link_column] <- link_column - } - else if(existing_link$from_data_frame == second_data_frame_name && existing_link$to_data_frame == first_data_frame_name) { - if(link_column %in% link_pairs) { - by[link_column] <- names(link_pairs)[which(link_pairs == link_column)][1] - } - else by[link_column] <- link_column - } - } - } - # If no link then do by by columns in first data frame - else { - by <- first_data_frame_columns - names(by) <- first_data_frame_columns - } - return(by) -} -) - -# finds a link between two data frames and returns named list used for by -# also checks link columns still are in both data frames -DataBook$set("public", "get_link_columns_from_data_frames", function(first_data_frame_name, first_data_frame_columns, second_data_frame_name, second_data_frame_columns) { - by = c() - if(self$link_exists_between(first_data_frame_name, second_data_frame_name)) { - existing_link <- self$get_link_between(first_data_frame_name, second_data_frame_name) - found <- FALSE - for(curr_link_set in existing_link$link_columns) { - if(existing_link$from_data_frame == first_data_frame_name && existing_link$to_data_frame == second_data_frame_name) { - if(all(curr_link_set %in% first_data_frame_columns) && all(names(curr_link_set) %in% second_data_frame_columns)) { - by <- curr_link_set - break - } - } - else if(existing_link$from_data_frame == second_data_frame_name && existing_link$to_data_frame == first_data_frame_name) { - if(all(curr_link_set %in% second_data_frame_columns) && all(names(curr_link_set) %in% first_data_frame_columns)) { - by <- names(curr_link_set) - names(by) <- curr_link_set - break - } - } - } - } - return(by) -} -) - -# Called from apply_instat_calculation if calc$save_calc == TRUE -DataBook$set("public", "save_calc_output", function(calc, curr_data_list, previous_manipulations) { - - # Add previous manipulations to calc so that it can be rerun on its own (it may have been a sub calculation) - calc$manipulations <- c(previous_manipulations, calc$manipulations) - calc_dependencies <- calc$get_dependencies() - # Variables used throughout method - calc_from_data_name <- curr_data_list[[c_link_label]][["from_data_frame"]] - calc_link_cols <- curr_data_list[[c_link_label]][["link_cols"]] - - # Not sure this is correct. What if result is going into a differennt data frame? - if(calc$result_name %in% names(self$get_data_frame(calc_from_data_name))) warning(calc$result_name, " is already a column in the existing data. The column will be replaced by the output from the calculation. This may have unintended consequences for the calculation") - if(calc$result_data_frame != "") { - to_data_name <- calc$result_data_frame - if(to_data_name %in% names(self$get_data_names())) { - #TODO - } - else { - to_data_list <- list() - # Ensures that the to_data_name is a valid name - to_data_name <- calc$result_data_frame - to_data_name <- make.names(to_data_name) - to_data_name <- next_default_item(to_data_name, self$get_data_names(), include_index = FALSE) - # Subset to only get linking columns and result (don't want sub calcs as well, saved separately) - to_data_list[[to_data_name]] <- curr_data_list[[c_data_label]] - self$import_data(to_data_list) - to_data_exists <- TRUE - if(length(calc_link_cols) > 0) { - # Add the link to the new to_data_name - new_key <- calc_link_cols - names(new_key) <- calc_link_cols - self$add_link(calc_from_data_name, to_data_name, new_key, keyed_link_label) - # Add metadata to the linking columns - # This adds metadata: is_calculated = TRUE to the linking columns, which indicates that the column has been created by a calculation - self$append_to_variables_metadata(to_data_name, calc_link_cols, is_calculated_label, TRUE) - } - - # Adds metadata at data frame level to indicate that the data frame is calculated - # Note: all columns do not have to be calculated for data frame to be set as calculated - self$append_to_dataframe_metadata(to_data_name, is_calculated_label, TRUE) - } - } - else { - if(curr_data_list[[c_has_summary_label]]) { - # If there has been a summary, we look for an existing data frame that this could be linked to - link_def <- self$get_possible_linked_to_defintion(calc_from_data_name, calc_link_cols) - # If this is not empty then it is a list of two items: 1. the data frame to link to 2. the columns to link to - if(length(link_def) > 0) { - to_data_exists <- TRUE - to_data_name <- link_def[[1]] - # The check above only confirms it is possible to have a direct link to link_def[[1]] - # If there is not already a direct link between the data frames, we add one - if(!self$link_exists_from(calc_from_data_name, calc_link_cols)) { - link_pairs <- link_def[[2]] - names(link_pairs) <- calc_link_cols - self$add_link(calc_from_data_name, to_data_name, link_pairs, keyed_link_label) - } - # This is done so that calc$name can be used later and we know it won't be changed - # We can only do this check once we know the to_data_frame as this is where the calc is stored - if(calc$name %in% self$get_calculation_names(to_data_name)) { - calc$name <- next_default_item(calc$name, self$get_calculation_names(to_data_name)) - } - if(calc$result_name %in% self$get_column_names(to_data_name)) { - # Delete is needed because merge will not replace - # If not wanting to replace, this should be checked when calculation is defined. - warning("A column named ", calc$result_name, " already exists in ", to_data_name, ". It will be replaced by the output from the calculation.") - suppressWarnings(self$remove_columns_in_data(to_data_name, calc$result_name, TRUE)) - } - if(length(calc_link_cols) > 0) { - # merge_data merges into to_data_frame in instat object - # method takes care of data frame attributes correctly - # need to subset so that only the new column from this calc is added (not sub_calc columns as well as they have already been added if saved) - # type = "full" so that we do not lose any data from either part of the merge - by <- calc_link_cols - names(by) <- link_def[[2]] - self$get_data_objects(to_data_name)$merge_data(curr_data_list[[c_data_label]][c(calc_link_cols, calc$result_name)], by = by, type = "full") - } - else { - self$get_data_objects(to_data_name)$add_columns_to_data(calc$result_name, curr_data_list[[c_data_label]][calc$result_name], before = calc$before, adjacent_column = calc$adjacent_column) - } - } - else { - # If no link exists then the to_data_frame doesn't exist so output from calc becomes new to_data_frame - # and a link will be added to new to_data_frame - to_data_list <- list() - # Ensures that the to_data_name is a valid name that doesn't exist in list of current data frame names - to_data_name <- paste(calc_from_data_name, "by", paste(calc_link_cols, collapse = "_"), sep="_") - to_data_name <- make.names(to_data_name) - to_data_name <- next_default_item(to_data_name, self$get_data_names(), include_index = FALSE) - # Subset to only get linking columns and result (don't want sub calcs as well, saved separately) - to_data_list[[to_data_name]] <- curr_data_list[[c_data_label]][c(calc_link_cols, calc$result_name)] - self$import_data(to_data_list) - to_data_exists <- TRUE - # Add the link to the new to_data_frame - new_key <- calc_link_cols - names(new_key) <- calc_link_cols - self$add_link(calc_from_data_name, to_data_name, new_key, keyed_link_label) - - if(length(calc_link_cols) > 0) { - # Add metadata to the linking columns - # This adds metadata: is_calculated = TRUE to the linking columns, which indicates that the column has been created by a calculation - self$append_to_variables_metadata(to_data_name, calc_link_cols, is_calculated_label, TRUE) - } - - # Adds metadata at data frame level to indicate that the data frame is calculated - # Note: all columns do not have to be calculated for data frame to be set as calculated - self$append_to_dataframe_metadata(to_data_name, is_calculated_label, TRUE) - } - } - else if(curr_data_list[[c_has_filter_label]]) { - # If filter done and no summary done then to_data_frame == from_data_frame - # to do the join there must be a key defined in from dataframe because output may have a subset of rows of original data - # TODO should we still add a link in this case? - to_data_name <- calc_from_data_name - # If the data frame has keys defined then we use get_link_columns_from_data_frames to find the by - if(self$has_key(calc_from_data_name)) { - by <- self$get_link_columns_from_data_frames(calc_from_data_name, names(curr_data_list[[c_data_label]]), calc_from_data_name, self$get_column_names(calc_from_data_name)) - # subset to only get output and key columns, do not want sub_calculation or extra columns to be merged as well - #TODO If by = NULL should we try the merge with a warning or just stop? - if(length(by) == 0) stop("Cannot save output because the key columns are not present in the calculation output") - self$get_data_objects(calc_from_data_name)$merge_data(curr_data_list[[c_data_label]][c(as.vector(by), calc$result_name)], by = by, type = "full") - } - # Cannot do merge if the data frame has no keys defined - else { - #TODO Should we try the merge? - stop("Cannot save output from this calculation because the data frame does not have any defined keys.") - } - } - else { - # If no summary or join, then simply add result as new column - # Because no join was required, the rows should match 1-1 in both data frames - self$add_columns_to_data(data_name = calc_from_data_name, col_name = calc$result_name, col_data = curr_data_list[[c_data_label]][[calc$result_name]], before = calc$before, adjacent_column = calc$adjacent_column) - to_data_name <- calc_from_data_name - if(calc$name %in% self$get_calculation_names(to_data_name)) { - calc$name <- next_default_item(calc$name, self$get_calculation_names(to_data_name)) - } - } - } - - # Add metadata for the new column - output_column <- calc$result_name - names(output_column) <- to_data_name - # Add metadata to calculated_from columns - # for example, calculated_from may include sub_calculation columns which were not saved and so don't appear in the instat object data - for(i in seq_along(calc_dependencies)) { - # This adds metadata: has_dependants = TRUE which indicates that the calculated_from columns have columns that depend on them - self$append_to_variables_metadata(names(calc_dependencies[i]), calc_dependencies[[i]], has_dependants_label, TRUE) - # This adds the output_column to the calculated_from columns' list of dependent columns - self$add_dependent_columns(names(calc_dependencies[i]), calc_dependencies[[i]], output_column) - } - # This adds metadata: is_calculated = TRUE to the output column, which indicates that the column has been created by a calculation - if(calc$result_name != "") { - self$append_to_variables_metadata(to_data_name, calc$result_name, is_calculated_label, TRUE) - - # This adds metadata: dependencies to the output column with value, a list of the calculated_from columns - if(length(calc_dependencies) > 0) self$append_to_variables_metadata(to_data_name, calc$result_name, dependencies_label, calc_dependencies) - # This adds metadata: calculated_by to the output column, with value as the name of the calculation - self$append_to_variables_metadata(to_data_name, calc$result_name, calculated_by_label, calc$name) - } - self$save_calculation(to_data_name, calc) -} -) - -# Could be a standalone method? Method of calculation? -instat_calculation$set("public", "get_dependencies", function(depens = c()) { - for(manip in self$manipulations) { - for(i in seq_along(manip$calculated_from)) { - ind <- which(depens == manip$calculated_from[[i]]) - if(length(ind) == 0 || names(depens)[ind] != names(manip$calculated_from)[i]) { - depens <- c(depens, manip$calculated_from[i]) - } - } - } - for(sub_calc in self$sub_calculations) { - depens <- sub_calc$get_dependencies(depens) - } - for(j in seq_along(self$calculated_from)) { - ind <- which(depens == self$calculated_from[[j]]) - if(length(ind) == 0 || names(depens)[ind] != names(self$calculated_from)[j]) { - depens <- c(depens, self$calculated_from[j]) - } - } - return(depens) -} -) - -calc_from_convert <- function(x) { - calc_list <- list() - for(i in seq_along(x)) { - for(j in seq_along(x[[i]])) { - calc_list[[length(calc_list) + 1]] <- x[[i]][j] - names(calc_list)[length(calc_list)] <- names(x)[i] - } - } - return(calc_list) -} - -# given a column name (column) and a calculated_from list (x) -# this returns the name of the data frame the column is associated with -find_df_from_calc_from <- function(x, column) { - for(i in seq_along(x)) { - if(column %in% x[[i]]) return(names(x)[i]) - } - return("") -} - -DataBook$set("public", "remove_unused_station_year_combinations", function(data_name, year, station){ - # Create linked data name - linked_data_name <- self$get_linked_to_data_name(data_name, link_cols=c(year, station)) - - # Column Summaries - self$calculate_summary(data_name = data_name, - store_results=TRUE, - factors=c(year, station), - summaries=c("summary_count"), - silent=TRUE) - - self$rename_column_in_data(data_name = linked_data_name, column_name="count_all", new_val="count_year_station_combination_for_linking", label="") - - # Create Filter subdialog: Created new filter - self$add_filter(filter=list(C0=list(column="count_year_station_combination_for_linking", operation="! is.na")), data_name = linked_data_name, filter_name = "removing_additional_years") - - # Dialog: Filter - self$copy_data_object(data_name = linked_data_name, new_name = linked_data_name, filter_name="removing_additional_years") - - # Right click menu: Delete Column(s) - self$remove_columns_in_data(data_name=linked_data_name, cols="count_year_station_combination_for_linking") -} -) +# to run on load due to issues with old RDS files +calculation <- instatCalculations::calculation +get_data_book_output_object_names <- databook:::get_data_book_output_object_names +get_data_book_scalar_names <- databook:::get_data_book_scalar_names +overall_label="[Overall]" \ No newline at end of file diff --git a/instat/static/InstatObject/R/Backend_Components/instat_comment.R b/instat/static/InstatObject/R/Backend_Components/instat_comment.R index bb87979a09e..72e3f7047b2 100644 --- a/instat/static/InstatObject/R/Backend_Components/instat_comment.R +++ b/instat/static/InstatObject/R/Backend_Components/instat_comment.R @@ -1,162 +1 @@ -# A comment is metadata for a row or cell of a data frame -# A DataSheet will contain a list of instat_comment objects as part of the metadata for the data frame -# id : numeric/character - A unique identifier for the comment. id must be unique within a data frame. -# This could be a number or a character -# key_values : named character vector - This identifies the row the comment is on. -# The names are the names of the key columns in the data frame -# The values are the values of those columns at the row the comment is on. -# column : character - If the comment is on a cell, column is the name of the column of the cell -# value : single value - If the comment is on a cell, value is the value in the cell at the time the comment was created. -# type : character - The type of comment. One of "critical", "warning", "messaged" -# comment : character - The comment text -# label : character - A label or grouping for the comment e.g. if comments are produced by an operation they may all have the same label. -# This then allows similar comments to be identified e.g. for editing/deleting -# calculation : character - If the comment was created through a calculation e.g. filtering the data frame, calculation shows how the calculation done on the data frame -# time_stamp : POSIXct, POSIXt - Date and time the comment was created on. -# replies : list - A list of replies to the comment. A reply could be a comment itself -# resolved : logical - Is the comment marked as resolved (default FALSE). -# active : logical - Is the comment marked as active (default TRUE) -# attributes : list - A named list of additional information about the comment. - -comment_types <- c("critical", "warning", "message", "") - -instat_comment <- R6::R6Class("instat_comment", - public = list( - initialize = function(id = "", - key_values = c(), - column = "", - value = "", - type = "", - comment = "", - label = "", - calculation = "", - time_stamp = "", - replies = list(), - resolved = FALSE, - active = TRUE, - attributes = list()) { - self$id <- id - self$key_values <- key_values - self$column <- column - self$value <- value - if(!type %in% comment_types) stop("type must be blank or one of: ", paste(comment_types, collapse = ",")) - self$type <- type - self$comment <- comment - self$label <- label - self$calculation <- calculation - if(time_stamp == "") time_stamp <- Sys.time() - self$time_stamp <- time_stamp - self$replies <- replies - self$resolved <- resolved - self$active <- active - self$attributes <- attributes - }, - id = "", - key_values = c(), - column = "", - value = "", - type = "", - comment = "", - label = "", - calculation = "", - time_stamp = "", - replies = list(), - resolved = FALSE, - active = TRUE, - attributes = list() - ), - private = list(), - active = list() -) - -instat_comment$set("public", "data_clone", function(...) { - replies <- list() - for(curr_reply in self$replies) { - if("instat_comment" %in% class(curr_reply)) replies[[length(replies) + 1]] <- curr_reply$data_clone() - else replies[[length(replies) + 1]] <- curr_reply - } - ret <- instat_comment$new(id = self$id, - key_values = self$key_values, - column = self$column, - value = self$value, - type = self$type, - comment = self$comment, - label = self$label, - calculation = self$calculation, - time_stamp = self$time_stamp, - replies = replies, - resolved = self$resolved, - active = self$active, - attributes = self$attributes) - return(ret) -} -) - -DataBook$set("public", "add_comment", function(data_name, new_comment) { - self$get_data_objects(data_name)$add_comment(new_comment) -} -) - -DataSheet$set("public", "add_comment", function(new_comment) { - if(!self$has_key()) stop("Define a key before adding comments. Comments can only be added to data frames when rows can be identified by a key.") - if(!"instat_comment" %in% class(new_comment)) stop("new_comment must be of class 'instat_comment'") - if(!self$is_key(names(new_comment$key_values))) stop("The columns specified as the names of key_values must be a key in the data frame") - all_comment_ids <- self$get_comment_ids() - if(length(all_comment_ids) > 0 && new_comment$id %in% all_comment_ids) warning("A comment with id: ", new_comment$id, " already exists. It will be replaced.") - if(new_comment$id == "") new_comment$id <- as.character(max(as.numeric(all_comment_ids), 0, na.rm = TRUE) + 1) - private$comments[[new_comment$id]] <- new_comment -} -) - -DataBook$set("public", "delete_comment", function(data_name, comment_id) { - self$get_data_objects(data_name)$delete_comment(comment_id) -} -) - -DataSheet$set("public", "delete_comment", function(comment_id) { - if(!comment_id %in% self$get_comment_ids()) stop("No comment with id: ", comment_id, " was found.") - private$comments[[comment_id]] <- NULL -} -) - -DataBook$set("public", "get_comment_ids", function(data_name) { - return(self$get_data_objects(data_name)$get_comment_ids()) -} -) - -DataSheet$set("public", "get_comment_ids", function() { - return(names(private$comments)) -} -) - -DataBook$set("public", "get_comments_as_data_frame", function(data_name) { - return(self$get_data_objects(data_name)$get_comments_as_data_frame()) -} -) - -DataSheet$set("public", "get_comments_as_data_frame", function() { - id <- sapply(private$comments, function(x) x$id) - # Needs expanding for each key column - key_columns <- unique(unlist(sapply(private$comments, function(x) names(x$key_values)))) - # key_vals <- list() - # for(col in key_columns) { - # key_vals[[col]] <- sapply(private$comments, function(x) x$key_values[col]) - # } - column <- sapply(private$comments, function(x) x$column) - # Not sure what value will be yet - value <- sapply(private$comments, function(x) x$value) - type <- sapply(private$comments, function(x) x$type) - comment <- sapply(private$comments, function(x) x$comment) - label <- sapply(private$comments, function(x) x$label) - calculation <- sapply(private$comments, function(x) x$calculation) - # Returned as character to prevent sapply coercing to numeric - time_stamp <- sapply(private$comments, function(x) as.character(x$time_stamp)) - # TODO how to display replies in data frame? - no_replies <- sapply(private$comments, function(x) length(x$no_replies)) - resolved <- sapply(private$comments, function(x) x$resolved) - active <- sapply(private$comments, function(x) x$active) - # TODO how to display attributes in data frame? - no_attributes <- sapply(private$comments, function(x) length(x$attributes)) - return(data.frame(id = id, key_values = key_values, column = column, value = value, type = type, comment = comment, label = label, calculation = calculation, time_stamp = time_stamp, no_replies = no_replies, resolved = resolved, active = active, no_attributes = no_attributes)) -} -) \ No newline at end of file +# Removed \ No newline at end of file diff --git a/instat/static/InstatObject/R/Backend_Components/link.R b/instat/static/InstatObject/R/Backend_Components/link.R index 47219b1b139..50aa506deb1 100644 --- a/instat/static/InstatObject/R/Backend_Components/link.R +++ b/instat/static/InstatObject/R/Backend_Components/link.R @@ -1,401 +1 @@ -# A link is a relationship between two data frames -# from_data_frame : character - the name of the first data frame of the link -# to_data_frame : character - the name of the second data frame of the link -# type : character - the type of link e.g. "keyed" -# link_columns : list - a list where each element defines how the data frames are linked -# It is a list because two data frames may be linked in different ways -# Each element of the list is a named character vector -# where the name-element pairs of the vector define linking columns between the data frames -# the name is the name of column in from_data_frame -# the element is the corresponding name of a column in to_data_frame -link <- R6::R6Class("link", - public = list( - initialize = function(from_data_frame = "", to_data_frame = "", type = "", link_columns = list()) { - self$from_data_frame <- from_data_frame - self$to_data_frame <- to_data_frame - self$type <- type - self$link_columns <- link_columns - }, - from_data_frame = "", - to_data_frame = "", - # TODO Is type needed anymore? - type = "", - # remove calculation - # replace with list of list of pairs of linking columns - # each list of pairs of columns corresponds to a key in the to_data_frame - link_columns = list() - ), - private = list(), - active = list() -) - -link$set("public", "data_clone", function(...) { - ret <- link$new(from_data_frame = self$from_data_frame, to_data_frame = self$to_data_frame, - type = self$type, link_columns = self$link_columns) - return(ret) -} -) - -link$set("public", "rename_data_frame_in_link", function(old_data_name, new_data_name) { - if(self$from_data_frame == old_data_name) self$from_data_frame <- new_data_name - if(self$to_data_frame == old_data_name) self$to_data_frame <- new_data_name -} -) - -link$set("public", "rename_column_in_link", function(data_name, old_column_name, new_column_name) { - print(self$link_columns) - if(self$from_data_frame == data_name) { - for(i in seq_along(self$link_columns)) { - names(self$link_columns[[i]])[which(old_column_name %in% names(self$link_columns[[i]]))] <- new_column_name - } - } - if(self$to_data_frame == data_name) { - for(i in seq_along(self$link_columns)) { - self$link_columns[[i]][which(old_column_name %in% self$link_columns[[i]])] <- new_column_name - } - } -} -) - -DataBook$set("public", "update_links_rename_data_frame", function(old_data_name, new_data_name) { - for(i in seq_along(private$.links)) { - private$.links[[i]]$rename_data_frame_in_link(old_data_name, new_data_name) - } -} -) - -DataBook$set("public", "update_links_rename_column", function(data_name, old_column_name, new_column_name) { - for(i in seq_along(private$.links)) { - private$.links[[i]]$rename_column_in_link(data_name, old_column_name, new_column_name) - } -} -) - -DataBook$set("public", "add_link", function(from_data_frame, to_data_frame, link_pairs, type, link_name) { - if(length(names(link_pairs)) != length(link_pairs)) stop("link_pairs must be a named vector or list.") - if(!self$link_exists_between(from_data_frame, to_data_frame)) { - # This means when creating a link to single value data frame, there will be no key in to_data_frame - # Will this cause any issues? - if(length(link_pairs) > 0 && !self$is_key(to_data_frame, link_pairs)) { - message("link columns must be a key in the to_data_frame\nAttempting to create key...") - self$add_key(to_data_frame, as.character(link_pairs)) - message("New key created") - } - new_link <- link$new(from_data_frame = from_data_frame, to_data_frame = to_data_frame, link_columns = list(link_pairs), type = type) - if(missing(link_name)) link_name <- next_default_item("link", names(private$.links)) - if(link_name %in% names(private$.links)) warning("A link called ", link_name, " already exists. It wil be replaced.") - private$.links[[link_name]] <- new_link - } - else { - index <- integer(0) - for(i in 1:length(private$.links)) { - if(private$.links[[i]]$from_data_frame == from_data_frame && private$.links[[i]]$to_data_frame == to_data_frame) { - index <- i - from_on_left <- TRUE - break - } - else if(private$.links[[i]]$from_data_frame == to_data_frame && private$.links[[i]]$to_data_frame == from_data_frame) { - index <- i - from_on_left <- FALSE - break - } - } - # This should never happen because we are inside the Else of link_exists_between - if(length(index) == 0) stop("link not found") - - if(type != private$.links[[index]]$type) stop("Cannot add link of type ", type, ". These data frames are already linked by type: ", private$.links[[index]]$type) - curr_link_columns <- private$.links[[index]]$link_columns - curr_num_links <- length(curr_link_columns) - found <- FALSE - for(curr_link_pairs in curr_link_columns) { - # Are these the right checks on the link columns? - if(from_on_left && length(link_pairs) == length(curr_link_pairs) && setequal(names(link_pairs), names(curr_link_pairs))) { - message("A link with these columns already exists. A new link will not be added.") - found <- TRUE - break - } - else if(!from_on_left && length(link_pairs) == length(curr_link_pairs) && (setequal(link_pairs, names(curr_link_pairs)))) { - message("A link with these columns already exists. A new link will not be added.") - found <- TRUE - break - } - } - if(!found) { - if(!self$is_key(to_data_frame, link_pairs)) { - message("link columns must be a key in the to_data_frame\nAttempting to create key...") - self$add_key(to_data_frame, as.character(link_pairs)) - message("new key created") - } - if(from_on_left) private$.links[[index]]$link_columns[[curr_num_links + 1]] <- link_pairs - else { - new_link_pairs <- names(link_pairs) - names(new_link_pairs) <- link_pairs - private$.links[[index]]$link_columns[[curr_num_links + 1]] <- new_link_pairs - } - } - } - if (from_data_frame != to_data_frame){ - cat(paste("Link name:", link_name), - paste("From data frame:", from_data_frame), - paste("To data frame:", to_data_frame), - paste("Link columns:", paste(names(link_pairs), "=", link_pairs, collapse = ", ")), - sep = "\n") - } -} -) - -DataBook$set("public", "get_link_names", function(data_name, include_overall = TRUE, include, exclude, include_empty = FALSE, as_list = FALSE, excluded_items = c(), exclude_self_links = TRUE) { - if(exclude_self_links) { - out <- c() - i <- 1 - for(link in private$.links) { - if(link$from_data_frame != link$to_data_frame) out <- c(out, names(private$.links)[i]) - i <- i + 1 - } - } - else out <- names(private$.links) - if(as_list) { - lst <- list() - lst[[overall_label]] <- out - return(lst) - } - else return(out) -} -) - -DataBook$set("public", "link_exists_from", function(curr_data_frame, link_pairs) { - link_exists <- FALSE - for(curr_link in private$.links) { - if(curr_link$from_data_frame == curr_data_frame) { - for(curr_link_pairs in curr_link$link_columns) { - if(length(link_pairs) == length(curr_link_pairs) && setequal(link_pairs, names(curr_link_pairs))) { - return(TRUE) - break - } - } - } - } - return(FALSE) -} -) - -DataBook$set("public", "link_exists_from_by_to", function(first_data_frame, link_pairs, second_data_frame) { - link_exists <- FALSE - for(curr_link in private$.links) { - if(curr_link$from_data_frame == first_data_frame && curr_link$to_data_frame == second_data_frame) { - for(curr_link_pairs in curr_link$link_columns) { - if(length(link_pairs) == length(curr_link_pairs) && setequal(link_pairs, names(curr_link_pairs))) { - return(TRUE) - break - } - } - } - } - return(FALSE) -} -) - -DataBook$set("public", "get_linked_to_data_name", function(from_data_frame, link_cols = c(), include_self = FALSE) { - out <- c() - if(include_self) out <- c(out, from_data_frame) - for(curr_link in private$.links) { - if(curr_link$from_data_frame == from_data_frame) { - if(length(link_cols) == 0) { - out <- c(out, curr_link$to_data_frame) - } - else { - for(curr_link_pairs in curr_link$link_columns) { - if(length(link_cols) == length(curr_link_pairs) && setequal(link_cols, names(curr_link_pairs))) { - out <- c(out, curr_link$to_data_frame) - } - } - } - } - } - return(unique(out)) -} -) - -DataBook$set("public", "get_linked_to_definition", function(from_data_frame, link_pairs) { - to_data_name <- self$get_linked_to_data_name(from_data_frame, link_pairs) - if(length(to_data_name) > 0) { - # TODO what happens if there is more than 1? - to_data_name <- to_data_name[1] - curr_link <- self$get_link_between(from_data_frame, to_data_name) - for(curr_link in private$.links) { - for(curr_link_pairs in curr_link$link_columns) { - if(length(link_pairs) == length(curr_link_pairs) && setequal(link_pairs, names(curr_link_pairs))) { - return(list(to_data_name, as.vector(curr_link_pairs[link_pairs]))) - } - } - } - } - return(list()) -} -) - -DataBook$set("public", "get_possible_linked_to_defintion", function(from_data_frame, link_pairs) { - def <- self$get_linked_to_definition(from_data_frame, link_pairs) - if(length(def) != 0) return(def) - else { - prev_data_links <- list(list(from_data_frame, link_pairs)) - continue <- TRUE - while(continue) { - curr_data_links <- prev_data_links - curr_data_names <- sapply(curr_data_links, function(x) x[[1]]) - for(to_data_name in self$get_data_names()) { - i = 1 - for(curr_from_data_frame in curr_data_names) { - curr_link_cols <- self$link_between_containing(curr_from_data_frame, curr_data_links[[i]][[2]], to_data_name) - # Is it enough to check unqiue data frames? - if(length(curr_link_cols) != 0 && !(to_data_name %in% sapply(curr_data_links, function(x) x[[1]]))) { - curr_data_links[[length(curr_data_links) + 1]] <- list(to_data_name, curr_link_cols) - } - i = i + 1 - } - } - if(length(prev_data_links) != length(curr_data_links)) { - curr_data_names <- sapply(curr_data_links, function(x) x[[1]]) - prev_data_names <- sapply(prev_data_links, function(x) x[[1]]) - for(i in seq_along(curr_data_names)) { - if(curr_data_names[i] %in% setdiff(curr_data_names, prev_data_names)) { - def <- self$get_linked_to_definition(curr_data_names[i], curr_data_links[[i]][[2]]) - if(length(def) > 0) return(def) - } - } - prev_data_links <- curr_data_links - } - else continue <- FALSE - } - return(c()) - } -} -) - -DataBook$set("public", "get_equivalent_columns", function(from_data_name, columns, to_data_name) { - if(from_data_name == to_data_name) equivalent_columns <- columns - else equivalent_columns <- self$link_between_containing(from_data_name, columns, to_data_name) - if(length(equivalent_columns) != 0) return(equivalent_columns) - else { - prev_data_links <- list(list(from_data_name, columns)) - continue <- TRUE - while(continue) { - curr_data_links <- prev_data_links - curr_data_names <- sapply(curr_data_links, function(x) x[[1]]) - for(temp_data_name in self$get_data_names()) { - i = 1 - for(curr_from_data_frame in curr_data_names) { - if(curr_from_data_frame == temp_data_name) curr_link_cols <- curr_data_links[[i]][[2]] - curr_link_cols <- self$link_between_containing(curr_from_data_frame, curr_data_links[[i]][[2]], temp_data_name) - if(length(curr_link_cols) != 0) { - if(temp_data_name == to_data_name) { - return(curr_link_cols) - } - else if(!(temp_data_name %in% sapply(curr_data_links, function(x) x[[1]]))) { - curr_data_links[[length(curr_data_links) + 1]] <- list(temp_data_name, curr_link_cols) - } - } - i = i + 1 - } - } - if(length(prev_data_links) == length(curr_data_links)) continue <- FALSE - else prev_data_links <- curr_data_links - } - return(c()) - } -} -) - -# If ordered = TRUE then from_data_frame must be from_data_frame in the link -# otherwise from_data_frame could be to_data_frame in the link -DataBook$set("public", "link_exists_between", function(from_data_frame, to_data_frame, ordered = FALSE) { - if(ordered) { - return(any(sapply(private$.links, function(link) link$from_data_frame == from_data_frame && link$to_data_frame == to_data_frame))) - } - else { - return(any(sapply(private$.links, function(link) link$from_data_frame == from_data_frame && link$to_data_frame == to_data_frame)) - || any(sapply(private$.links, function(link) link$from_data_frame == to_data_frame && link$to_data_frame == from_data_frame))) - } -} -) - -DataBook$set("public", "link_between_containing", function(from_data_frame, containing_columns, to_data_frame) { - if(self$link_exists_between(from_data_frame, to_data_frame)) { - curr_link <- self$get_link_between(from_data_frame, to_data_frame) - for(curr_link_pairs in curr_link$link_columns) { - if(curr_link$from_data_frame == from_data_frame) { - if(all(containing_columns %in% names(curr_link_pairs))) { - out <- c() - for(col in containing_columns) { - ind <- which(names(curr_link_pairs) == col) - out <- c(out, curr_link_pairs[[ind]]) - } - return(out) - } - } - else { - if(all(containing_columns %in% curr_link_pairs)) { - out <- c() - for(col in containing_columns) { - ind <- which(curr_link_pairs == col) - out <- c(out, names(curr_link_pairs)[ind]) - } - return(out) - } - } - } - } - return(c()) -} -) - -DataBook$set("public", "get_link_between", function(from_data_frame, to_data_frame, ordered = FALSE) { - if(ordered) { - for(curr_link in private$.links) { - if((curr_link$from_data_frame == from_data_frame && curr_link$to_data_frame == to_data_frame)) { - return(curr_link) - } - } - } - else { - for(curr_link in private$.links) { - if((curr_link$from_data_frame == from_data_frame && curr_link$to_data_frame == to_data_frame) || (curr_link$from_data_frame == to_data_frame && curr_link$to_data_frame == from_data_frame)) { - return(curr_link) - } - } - } - return(NULL) -} -) - -DataBook$set("public", "view_link", function(link_name) { - temp_link <- self$get_links(link_name) - out <- "" - if(length(temp_link) > 0) { - out <- cat(paste( - paste("Link name:", link_name), - paste("From data frame:", temp_link$from_data_frame), - paste("To data frame:", temp_link$to_data_frame), - paste("Link columns:", paste(names(temp_link$link_columns), "=", temp_link$link_columns, collapse = ", ")), sep = "\n")) - } -} -) - -# A function to convert the variables in the linked "to data frame"" to be the same class as that of the "from data frame"". -DataBook$set("public", "convert_linked_variable", function(from_data_frame, link_cols) { - to_data_name <- self$get_linked_to_data_name(from_data_frame, link_cols=c(link_cols)) - if (!is.null(to_data_name)){ - linked_variable_name <- self$get_link_between(from_data_frame, to_data_name)$link_columns[[1]] - - # loop through all columns given in variable argument - for (i in seq_along(linked_variable_name)){ - variable_type <- self$get_column_data_types(data_name = from_data_frame, columns = names(linked_variable_name[i])) - linked_variable_type <- self$get_column_data_types(data_name = to_data_name, columns=linked_variable_name[i]) - - if (variable_type != linked_variable_type){ - self$convert_column_to_type(data_name=to_data_name, col_names=linked_variable_name[i], to_type=variable_type) - } - } - } -} -) +# removed \ No newline at end of file diff --git a/instat/static/InstatObject/R/Backend_Components/options_by_context.R b/instat/static/InstatObject/R/Backend_Components/options_by_context.R index 8158b9aadb9..42552ecdb7d 100644 --- a/instat/static/InstatObject/R/Backend_Components/options_by_context.R +++ b/instat/static/InstatObject/R/Backend_Components/options_by_context.R @@ -1,41 +1 @@ -# ObyC types -option_1_label <- "option_1" -option_other_label <- "option_other" -context_1_label <- "context_1" -context_2_label <- "context_2" -context_3_label <- "context_3" -context_4_label <- "context_4" -context_other_label <- "context_other" -measurement_1_label <- "measurement_1" -measurement_other_label <- "measurement_other" -id_1_label <- "id_1" -id_other_label <- "id_other" -blocking_1_label <- "blocking_1" -blocking_other_label <- "blocking_other" - -obyc_all_types <- c(option_1_label, option_other_label, context_1_label, context_2_label, context_3_label, context_4_label, context_other_label, measurement_1_label, measurement_other_label, id_1_label, id_other_label, blocking_1_label, blocking_other_label) - -# Column metadata -obyc_type_label = "O_by_C_Type" - -# Data frame metadata -is_obyc_label = "Is_O_by_C" - -DataBook$set("public","define_as_options_by_context", function(data_name, obyc_types = NULL, key_columns = NULL) { - self$append_to_dataframe_metadata(data_name, is_obyc_label, TRUE) - for(curr_data_name in self$get_data_names()) { - if(!self$get_data_objects(data_name)$is_metadata(is_obyc_label)) { - self$append_to_dataframe_metadata(curr_data_name, is_obyc_label, FALSE) - } - } - self$get_data_objects(data_name)$set_options_by_context_types(obyc_types = obyc_types, key_columns = key_columns) -} -) - -DataSheet$set("public","set_options_by_context_types", function(obyc_types = NULL, key_columns = NULL) { - if(!all(names(obyc_types) %in% obyc_all_types)) stop("Cannot recognise the following types: ", paste(names(obyc_types)[!names(obyc_types) %in% obyc_all_types], collapse = ", ")) - invisible(sapply(names(obyc_types), function(name) self$append_to_variables_metadata(obyc_types[[name]], obyc_type_label, name))) - other_cols <- dplyr::setdiff(x = self$get_column_names(), y = unlist(obyc_types)) - self$append_to_variables_metadata(other_cols, obyc_type_label, NA) -} -) +# remove \ No newline at end of file diff --git a/instat/static/InstatObject/R/Backend_Components/summary_functions.R b/instat/static/InstatObject/R/Backend_Components/summary_functions.R index 44779cf8d52..50aa506deb1 100644 --- a/instat/static/InstatObject/R/Backend_Components/summary_functions.R +++ b/instat/static/InstatObject/R/Backend_Components/summary_functions.R @@ -1,1630 +1 @@ -#Methods temporarily here to avoid conflicts -DataSheet$set("public", "merge_data", function(new_data, by = NULL, type = "left", match = "all") { - #TODO how to use match argument with dplyr join functions - old_metadata <- attributes(private$data) - curr_data <- self$get_data_frame(use_current_filter = FALSE) - by_col_attributes <- list() - - if (!is.null(by)) { - for (i in seq_along(by)) { - # Collect column attributes - by_col_attributes[[by[[i]]]] <- get_column_attributes(curr_data[[by[[i]]]]) - - # Check and align the data types for each "by" column - if (!inherits(curr_data[[by[[i]]]], class(new_data[[by[[i]]]]))) { - warning(paste0("Type is different for ", by[[i]], " in the two data frames. Setting as numeric in both data frames.")) - - # Convert factors to numeric if necessary - if (inherits(curr_data[[by[[i]]]], "factor")) { - curr_data[[by[[i]]]] <- as.numeric(as.character(curr_data[[by[[i]]]])) - } else if (inherits(new_data[[by[[i]]]], "factor")) { - new_data[[by[[i]]]] <- as.numeric(as.character(new_data[[by[[i]]]])) - } else { - stop(paste0("Type is different for ", by[[i]], " in the two data frames and cannot be coerced.")) - } - } - } -} - - - # Perform the appropriate join based on the "type" argument - if (type == "left") { - new_data <- dplyr::left_join(curr_data, new_data, by = by) - } else if (type == "right") { - new_data <- dplyr::right_join(curr_data, new_data, by = by) - } else if (type == "full") { - new_data <- dplyr::full_join(curr_data, new_data, by = by) - } else if (type == "inner") { - new_data <- dplyr::inner_join(curr_data, new_data, by = by) - } else { - stop("type must be one of left, right, inner, or full") - } - - # Update the data in the object - self$set_data(new_data) - self$append_to_changes("Merged_data") - - # Restore the old metadata - for (name in names(old_metadata)) { - if (!name %in% c("names", "class", "row.names")) { - self$append_to_metadata(name, old_metadata[[name]]) - } - } - - self$append_to_metadata("is_calculated_label", TRUE) - self$add_defaults_meta() - self$add_defaults_variables_metadata(setdiff(names(new_data), names(curr_data))) - - # Add back column attributes for the "by" columns - if (!is.null(by)) { - for (i in seq_along(by_col_attributes)) { - self$append_column_attributes(col_name = by[[i]], new_attr = by_col_attributes[[i]]) - } - } -}) - -DataBook$set("public", "append_summaries_to_data_object", function(out, data_name, columns_to_summarise, summaries, factors = c(), summary_name, calc, calc_name = "") { - if(!is.character(data_name)) stop("data_name must be of type character") - - exists = FALSE - if(self$link_exists_from(data_name, factors)) { - #TODO what happens if there is more than 1? - summary_name <- self$get_linked_to_data_name(data_name, factors)[1] - summary_obj <- self$get_data_objects(summary_name) - exists <- TRUE - } - if(exists) { - #temp fix to avoid error merging data with overlapping names - curr_data <- summary_obj$get_data_frame(use_current_filter = FALSE) - for(i in 1:length(names(out))) { - curr_col_name <- names(out)[[i]] - if((!curr_col_name %in% factors) && curr_col_name %in% names(curr_data)) { - names(out)[[i]] <- next_default_item(curr_col_name, names(curr_data)) - } - } - summary_obj$merge_data(out, by = factors, type = "inner", match = "first") - } - else { - summary_data <- list() - if(missing(summary_name) || is.na(summary_name)) summary_name <- paste(data_name, "by", paste(factors, collapse = "_"), sep="_") - summary_name <- make.names(summary_name) - summary_name <- next_default_item(summary_name, self$get_data_names(), include_index = FALSE) - summary_data[[summary_name]] <- out - self$import_data(summary_data) - summary_obj <- self$get_data_objects(summary_name) - # TODO Should the be done here or in add_link? - #summary_obj$add_key(factors) - names(factors) <- factors - self$add_link(data_name, summary_name, factors, keyed_link_label) - } - - calc_out_columns <- names(out)[-(1:length(factors))] - dependent_cols <- list(calc_out_columns) - names(dependent_cols) <- summary_name - dependencies_cols <- list(columns_to_summarise) - names(dependencies_cols) <- data_name - calc_name <- self$save_calculation(summary_name, calc) - self$append_to_variables_metadata(data_name, columns_to_summarise, has_dependants_label, TRUE) - self$add_dependent_columns(data_name, columns_to_summarise, dependent_cols) - self$append_to_variables_metadata(summary_name, calc_out_columns, is_calculated_label, TRUE) - self$append_to_variables_metadata(summary_name, calc_out_columns, calculated_by_label, calc_name) - if(!exists) { - self$append_to_variables_metadata(summary_name, names(out)[1:length(factors)], is_calculated_label, TRUE) - self$append_to_variables_metadata(summary_name, names(out)[1:length(factors)], calculated_by_label, calc_name) - } - self$append_to_variables_metadata(summary_name, calc_out_columns, dependencies_label, dependencies_cols) -} -) - -DataBook$set("public", "calculate_summary", function(data_name, columns_to_summarise = NULL, summaries, factors = c(), store_results = TRUE, drop = TRUE, return_output = FALSE, summary_name = NA, result_names = NULL, percentage_type = "none", perc_total_columns = NULL, perc_total_factors = c(), perc_total_filter = NULL, perc_decimal = FALSE, perc_return_all = FALSE, include_counts_with_percentage = FALSE, silent = FALSE, additional_filter, original_level = FALSE, signif_fig = 2, sep = "_", ...) { - if(original_level) type <- "calculation" - else type <- "summary" - include_columns_to_summarise <- TRUE - if(is.null(columns_to_summarise) || length(columns_to_summarise) == 0) { - # temporary fix for doing counts of a data frame - # dplyr cannot count data frame groups without passing a column (https://stackoverflow.com/questions/44217265/passing-correct-data-frame-from-within-dplyrsummarise) - # This is a known issue (https://github.com/tidyverse/dplyr/issues/2752) - if(length(summaries) != 1 || summaries != count_label) { - mes <- "When there are no columns to summarise can only use count function as summary" - if(silent) { - warning(mes, "Continuing summaries by using count only.") - columns_to_summarise <- self$get_column_names(data_name)[1] - summaries <- count_label - } - else { - stop(mes) - } - } - else columns_to_summarise <- self$get_column_names(data_name)[1] - include_columns_to_summarise <- FALSE - } - if(!percentage_type %in% c("none", "factors", "columns", "filter")) stop("percentage_type: ", percentage_type, " not recognised.") - if(percentage_type == "columns") { - if(!(length(perc_total_columns) == 1 || length(perc_total_columns) == length(columns_to_summarise))) stop("perc_total_columns must either be of length 1 or the same length as columns_to_summarise") - } - if(!store_results) save <- 0 - else save <- 2 - summaries_display <- as.vector(sapply(summaries, function(x) ifelse(startsWith(x, "summary_"), substring(x, 9), x))) - if(percentage_type == "factors") { - manip_factors <- intersect(factors, perc_total_factors) - } - else manip_factors <- factors - if(length(manip_factors) > 0) { - calculated_from <- as.list(manip_factors) - names(calculated_from) <- rep(data_name, length(manip_factors)) - calculated_from <- as.list(calculated_from) - factor_by <- instat_calculation$new(type = "by", calculated_from = calculated_from, param_list = list(drop = drop)) - manipulations <- list(factor_by) - } - else manipulations <- list() - if(percentage_type == "factors") { - value_factors <- setdiff(factors, manip_factors) - if(length(value_factors) > 0) { - calculated_from <- as.list(value_factors) - names(calculated_from) <- rep(data_name, length(value_factors)) - calculated_from <- as.list(calculated_from) - factor_by <- instat_calculation$new(type = "by", calculated_from = calculated_from, param_list = list(drop = drop)) - value_manipulations <- list(factor_by) - } - else value_manipulations <- list() - } - sub_calculations <- list() - - i <- 0 - for(column_names in columns_to_summarise) { - i <- i + 1 - # In the case of counting without columns, the first column column will be the "calculated from" - # which will add unwanted column metadata - calculated_from <- list(column_names) - names(calculated_from) <- rep(data_name, length(calculated_from)) - j <- 0 - for(summary_type in summaries) { - j <- j + 1 - function_exp <- "" - # if(!is.null(weights)) { - # function_exp <- paste0(function_exp, ", weights = ", weights) - # } - extra_args <- list(...) - for(i in seq_along(extra_args)) { - function_exp <- paste0(function_exp, ", ", names(extra_args)[i], " = ", extra_args[i]) - } - function_exp <- paste0(function_exp, ")") - # function_exp <- paste0(function_exp, ", na.rm =", na.rm, ")") - if(is.null(result_names)) { - result_name = summaries_display[j] - if(include_columns_to_summarise){ - if (!is.null(extra_args$y)) result_name <- paste0(result_name, sep, extra_args$y, sep, column_names) - else result_name <- paste0(result_name, sep, column_names) - } - } - #TODO result_names could be horizontal/vertical vector, matrix or single value - else result_name <- result_names[i,j] - if(percentage_type == "none") { - summary_function_exp <- paste0(summary_type, "(x = ", column_names, function_exp) - summary_calculation <- instat_calculation$new(type = type, result_name = result_name, - function_exp = summary_function_exp, - calculated_from = calculated_from, save = save) - } - else { - values_calculation <- instat_calculation$new(type = type, result_name = result_name, - function_exp = paste0(summary_type, "(x = ", column_names, function_exp), - calculated_from = calculated_from, save = save) - if(percentage_type == "columns") { - if(length(perc_total_columns) == 1) perc_col_name <- perc_total_columns - else perc_col_name <- perc_total_columns[i] - totals_calculation <- instat_calculation$new(type = type, result_name = paste0(summaries_display[j], sep, perc_total_columns, "_totals"), - function_exp = paste0(summary_type, "(x = ", perc_col_name, function_exp), - calculated_from = calculated_from, save = save) - } - else if(percentage_type == "filter") { - #TODO - } - else if(percentage_type == "factors") { - values_calculation$manipulations <- value_manipulations - totals_calculation <- instat_calculation$new(type = "summary", result_name = paste0(result_name, "_totals"), - function_exp = paste0(summary_type, "(x = ", column_names, function_exp), - calculated_from = calculated_from, save = save) - } - function_exp <- paste0(values_calculation$result_name, "/", totals_calculation$result_name) - if(!perc_decimal) { - function_exp <- paste0("(", function_exp, ") * 100") - } - perc_result_name <- paste0("perc_", result_name) - summary_calculation <- instat_calculation$new(type = "calculation", result_name = perc_result_name, - function_exp = function_exp, - calculated_from = list(), save = save, sub_calculations = list(totals_calculation, values_calculation)) - } - sub_calculations[[length(sub_calculations) + 1]] <- summary_calculation - } - } - if(self$filter_applied(data_name)) { - curr_filter <- self$get_current_filter(data_name) - curr_filter_name <- curr_filter[["name"]] - curr_filter_calc <- self$get_filter_as_instat_calculation(data_name, curr_filter_name) - manipulations <- c(curr_filter_calc, manipulations) - } - if(!missing(additional_filter)) { - manipulations <- c(additional_filter, manipulations) - } - combined_calc_sum <- instat_calculation$new(type="combination", sub_calculations = sub_calculations, manipulations = manipulations) - - # setting up param_list. Here we read in .drop and .preserve - param_list <- list() - if (length(combined_calc_sum$manipulations) > 0){ - for (i in 1:length(combined_calc_sum$manipulations)){ - if (combined_calc_sum$manipulations[[i]]$type %in% c("by", "filter")){ - param_list <- c(param_list, combined_calc_sum$manipulations[[i]]$param_list) - } - } - } - out <- self$apply_instat_calculation(combined_calc_sum, param_list = param_list) - # relocate so that the factors are first still for consistency - if (percentage_type != "none"){ - out$data <- (out$data %>% dplyr::select(c(tidyselect::all_of(factors), tidyselect::all_of(manip_factors)), tidyselect::everything())) - } - if(return_output) { - dat <- out$data - if(percentage_type == "none" || perc_return_all) return(out$data) - else { - #This is a temp fix to only returning final percentage columns. - #Depends on result name format used above for summary_calculation in percentage case - if (percentage_type != "none" && include_counts_with_percentage){ - dat <- dat %>% dplyr::mutate(dplyr::across(where(is.numeric), round, signif_fig)) - dat <- dat %>% dplyr::mutate(perc_count = paste0(count, " (", perc_count, "%)")) %>% dplyr::select(-c("count", "count_totals")) - } else { - dat[c(which(names(dat) %in% factors), which(startsWith(names(dat), "perc_")))] - } - } - } -} -) - -DataBook$set("public", "summary", function(data_name, columns_to_summarise, summaries, factors = c(), store_results = FALSE, drop = FALSE, return_output = FALSE, summary_name = NA, add_cols = c(), filter_names = c(), ...) { - calculated_from = list() - calculated_from[[1]] <- list(data_name = data_name, columns = columns_to_summarise) - summaries <- unique(summaries) - summaries <- summaries[order(match(summaries, all_summaries))] - summaries_count <- summaries[startsWith(summaries, "summary_count_all")] - summaries_other <- setdiff(summaries, summaries_count) - summaries <- c(summaries_count, summaries_other) - count_summaries_max <- length(summaries_count) - summaries_max <- length(summaries) - - summary_names <- ifelse(startsWith(summaries, "summary_"), substr(summaries, 9, nchar(summaries)), summaries) - summary_names <- gsub("_", "__", summary_names) - summary_names <- make.unique(summary_names) - summary_count_names <- summary_names[1:count_summaries_max] - summary_other_names <- summary_names[(count_summaries_max + 1):summaries_max] - - col_data_type <- self$get_variables_metadata(data_name = data_name, column = columns_to_summarise, property = data_type_label) - - factors_disp <- dplyr::if_else(length(factors) == 0, ".id", factors) - factors_levels <- lapply(factors, function(x) { - fac_col <- self$get_columns_from_data(data_name, x) - if(is.factor(fac_col)) return(levels(fac_col)) - else return(sort(unique(fac_col))) - }) - factors_levels <- expand.grid(factors_levels) - names(factors_levels) <- factors - - results <- list() - i <- 1 - for(col_new in columns_to_summarise) { - results_temp_count <- list() - results_temp_other <- list() - for(j in seq_along(summaries)) { - calc <- calculation$new(type = "summary", parameters = list(data_name = data_name, columns_to_summarise = col_new, summaries = summaries[j], factors = factors, store_results = store_results, drop = drop, return_output = return_output, summary_name = summary_name, add_cols = add_cols, ... = ...), filters = filter_names, calculated_from = calculated_from) - calc_apply <- tryCatch(self$apply_calculation(calc), - error = function(c) { - if(length(factors) == 0) { - x <- data.frame(NA, NA) - names(x) <- c(".id", summary_names[j]) - return(x) - } - else { - x <- factors_levels - x[[summary_names[j]]] <- NA - return(x) - } - }) - names(calc_apply)[length(factors_disp) + 1] <- col_new - calc_apply$summary <- summary_names[j] - names(calc_apply) <- make.names(names(calc_apply), unique = TRUE) - if(j <= count_summaries_max) results_temp_count[[length(results_temp_count) + 1]] <- calc_apply - else results_temp_other[[length(results_temp_other) + 1]] <- calc_apply - } - if(length(results_temp_count) > 0) { - results_temp_count <- dplyr::bind_rows(results_temp_count) - results_temp_count <- format(results_temp_count, scientific = FALSE) - } - if(length(results_temp_other) > 0) { - results_temp_other <- dplyr::bind_rows(results_temp_other) - results_temp_other <- format(results_temp_other, scientific = FALSE) - # Convert summaries which have been coerced to numeric but should be dates - if("Date" %in% col_data_type[i]) { - results_temp_other[[col_new]] <- dplyr::if_else(summaries_other[match(results_temp_other$summary, summary_other_names)] %in% date_summaries, - as.character(as.Date(as.numeric(results_temp_other[[col_new]]), origin = "1970/1/1")), - dplyr::if_else(stringr::str_trim(results_temp_other[[col_new]]) == "NA", NA_character_, paste(results_temp_other[[col_new]], "days"))) - } - } - results_temp <- dplyr::bind_rows(results_temp_count, results_temp_other) - if(i == 1) results <- results_temp - else results <- dplyr::full_join(results, results_temp, by = c(factors_disp, "summary")) - i <- i + 1 - } - results <- results %>% select(c(factors_disp, "summary"), everything()) - if(length(factors) == 0) { - results$.id <- NULL - results$summary <- NULL - row.names(results) <- summary_names - } - return(results) -} -) - -DataSheet$set("public", "calculate_summary", function(calc, ...) { - columns_to_summarise = calc[["parameters"]][["columns_to_summarise"]] - summaries = calc[["parameters"]][["summaries"]] - factors = calc[["parameters"]][["factors"]] - drop = calc[["parameters"]][["drop"]] - add_cols = calc[["parameters"]][["add_cols"]] - if("na.rm" %in% names(calc[["parameters"]])) na.rm = calc[["parameters"]][["na.rm"]] - else na.rm = FALSE - filter_names = calc[["filters"]] - if(missing(summaries)) stop("summaries must be specified") - # Removed since curr_data_filter has same columns - # curr_data_full <- self$get_data_frame(use_current_filter = FALSE) - # if(!all(columns_to_summarise %in% names(curr_data_full))) stop(paste("Some of the columns from:",paste(columns_to_summarise, collapse = ","),"were not found in the data.")) - # if(!all(summaries %in% all_summaries)) stop(paste("Some of the summaries from:",paste(summaries, collapse = ","),"were not recognised.")) - # if(!all(factors %in% names(curr_data_full))) stop(paste("Some of the factors:","c(",paste(factors, collapse = ","),") were not found in the data.")) - combinations = expand.grid(summaries,columns_to_summarise) - # Removed to only keep general case - # if(length(summaries)==1) { - # if(length(columns_to_summarise) == 1) out = ddply(curr_data_filter, factors, function(x) match.fun(summaries)(x[[columns_to_summarise]],...), .drop = drop) - # else out = ddply(curr_data_filter, factors, function(x) sapply(columns_to_summarise, function(y) match.fun(summaries)(x[[y]],...)), .drop = drop) - # } - # else { - # if(length(columns_to_summarise) == 1) out = ddply(curr_data_filter, factors, function(x) sapply(summaries, function(y) match.fun(y)(x[[columns_to_summarise]],...)), .drop = drop) - # else out = ddply(curr_data_filter, factors, function(x) apply(combinations, 1, FUN = function(y) match.fun(y[[1]])(x[[y[[2]]]],...)), .drop = drop) - # } - if(length(filter_names) == 0) { - filter_names <- "no_filter" - } - i = 1 - for(filter_name in filter_names) { - curr_data_filter <- self$get_data_frame(use_current_filter = TRUE, filter_name = filter_name) - curr_filter <- self$get_filter(filter_name) - if(self$filter_applied()) { - calc_filters <- list(self$get_current_filter(), curr_filter) - } - else calc_filters <- list(curr_filter) - if(!all(columns_to_summarise %in% names(curr_data_filter))) stop(paste("Some of the columns from:",paste(columns_to_summarise, collapse = ","),"were not found in the data.")) - if(!all(summaries %in% all_summaries)) stop(paste("Some of the summaries from:",paste(summaries, collapse = ","),"were not recognised.")) - if(!all(factors %in% names(curr_data_filter))) stop(paste("Some of the factors:","c(",paste(factors, collapse = ","),") were not found in the data.")) - - out <- plyr::ddply(curr_data_filter, factors, function(x) apply(combinations, 1, FUN = function(y) { - # temp disabled to allow na.rm to be passed in - #na.rm <- missing_values_check(x[[y[[2]]]]) - if("na.rm" %in% names(list(...))) stop("na.rm should not be specified. Use xxx to specify missing values handling.") - match.fun(y[[1]])(x[[y[[2]]]], add_cols = x[add_cols], na.rm = na.rm, ...) - } - ), .drop = drop) - names(out)[-(1:length(factors))] <- get_summary_calculation_names(calc, summaries, columns_to_summarise, calc_filters) - if(i == 1) { - calc_columns <- out - } - else { - calc_columns <- full_join(calc_columns, out) - } - i = i + 1 - } - return(calc_columns) -} -) - -get_summary_calculation_names <- function(calc, summaries, columns_to_summarise, calc_filters) { - filter_description <- "" - i = 1 - for(filt in calc_filters) { - if(!filt$parameters[["is_no_filter"]]) { - if(i == 1) filter_description <- filt$name - else filter_description <- paste(filter_description, filt$name, sep = ".") - } - i = i + 1 - } - if(filter_description == "") { - out <- apply(expand.grid(paste0(substring(summaries, 9),"."), columns_to_summarise), 1, paste, collapse="") - } - else out <- apply(expand.grid(paste0(substring(summaries, 9),"."), paste0(columns_to_summarise, "_"), filter_description), 1, paste, collapse="") - out <- make.names(out) - return(out) -} - -missing_values_check <- function(x) { - return(FALSE) -} - -# summary function labels -sum_label <- "summary_sum" -mode_label <- "summary_mode" -count_label <- "summary_count_all" -count_missing_label <- "summary_count_miss" -count_non_missing_label <- "summary_count" -sd_label <- "summary_sd" -var_label <- "summary_var" -median_label <- "summary_median" -range_label <- "summary_range" -min_label <- "summary_min" -max_label <- "summary_max" -mean_label <- "summary_mean" -trimmed_mean_label <- "summary_trimmed_mean" -quartile_label <- "summary_quartile" -p10_label <- "p10" -p20_label <- "p20" -p25_label <- "p25" -p30_label <- "p30" -p33_label <- "p33" -p40_label <- "p40" -p60_label <- "p60" -p67_label <- "p67" -p70_label <- "p70" -p75_label <- "p75" -p80_label <- "p80" -p90_label <- "p90" -skewness_label <- "summary_skewness" -summary_skewness_mc_label <- "summary_skewness_mc" -summary_outlier_limit_label <- "summary_outlier_limit" -kurtosis_label <- "summary_kurtosis" -summary_coef_var_label <- "summary_coef_var" -summary_median_absolute_deviation_label <- "summary_median_absolute_deviation" -summary_Qn_label <- "summary_Qn" -summary_Sn_label <- "summary_Sn" -cor_label <- "summary_cor" -cov_label <- "summary_cov" -first_label <- "summary_first" -last_label <- "summary_last" -nth_label <- "summary_nth" -n_distinct_label <- "summary_n_distinct" -proportion_label <- "proportion_calc" -count_calc_label <- "count_calc" -standard_error_mean_label <- "standard_error_mean" -circular_mean_label <- "summary_circular_mean" -circular_median_label <- "summary_circular_median" -circular_medianHL_label <- "summary_circular_medianHL" -circular_min_label <- "summary_circular_min" -circular_max_label <- "summary_circular_max" -circular_Q1_label <- "summary_circular_Q1" -circular_Q3_label <- "summary_circular_Q3" -circular_quantile_label <- "summary_circular_quantile" -circular_sd_label <- "summary_circular_sd" -circular_var_label <- "summary_circular_var" -circular_ang_dev_label <- "summary_circular_ang_dev" -circular_ang_var_label <- "summary_circular_ang_var" -circular_rho_label <- "summary_circular_rho" -circular_range_label <- "summary_circular_range" -mean_error_label <- "me" -mean_absolute_error_label <- "mae" -root_mean_square_error_label <- "rmse" -normalised_mean_square_error_label <- "nrmse" -percent_bias_label <- "PBIAS" -nash_Sutcliffe_efficiency_label <- "NSE" -modified_Nash_Sutcliffe_efficiency_label <- "mNSE" -relative_Nash_Sutcliffe_efficiency_label <- "rNSE" -Index_of_agreement_label <- "d" -modified_index_of_aggrement_label <- "md" -relative_index_of_agreement_label <- "rd" -coefficient_of_determination_label <- "R2" -coefficient_of_persistence_label <- "cp" -kling_Gupta_efficiency_label <- "KGE" -mean_squared_error_label <- "mse" -ratio_of_standard_deviations_label <- "rSD" -ratio_of_RMSE_label <- "rsr" -sum_of_squared_residuals_label <- "ssq" -volumetric_efficiency_label <- "VE" -which_min_label <- "summary_which_min" -which_max_label <- "summary_which_max" -where_min_label <- "summary_where_min" -where_max_label <- "summary_where_max" - - -# list of all summary function names -# the order of this list determines the order summaries appears in certain functions -all_summaries <- c( - count_label, count_non_missing_label, count_missing_label, - min_label, p10_label, p20_label, p25_label, p30_label, p33_label, p40_label, p60_label, p67_label, p70_label, p75_label, p80_label, p90_label, quartile_label, median_label, - summary_median_absolute_deviation_label, summary_coef_var_label, - summary_Qn_label, summary_Sn_label, - mode_label, mean_label, which_min_label, which_max_label,where_max_label, - trimmed_mean_label, max_label, sum_label, where_min_label, - sd_label, var_label, range_label, standard_error_mean_label, - skewness_label, summary_skewness_mc_label, kurtosis_label, - summary_outlier_limit_label, - cor_label, cov_label, first_label, last_label, nth_label, n_distinct_label, - proportion_label, count_calc_label, - circular_min_label, circular_Q1_label, circular_quantile_label, - circular_median_label, circular_medianHL_label, circular_mean_label, - circular_Q3_label, circular_max_label, - circular_sd_label, circular_var_label, circular_range_label, - circular_ang_dev_label, circular_ang_var_label, circular_rho_label, - mean_error_label, mean_absolute_error_label, root_mean_square_error_label, - normalised_mean_square_error_label, percent_bias_label, nash_Sutcliffe_efficiency_label, - modified_Nash_Sutcliffe_efficiency_label, relative_Nash_Sutcliffe_efficiency_label, - Index_of_agreement_label, modified_index_of_aggrement_label, relative_index_of_agreement_label, - coefficient_of_determination_label, coefficient_of_persistence_label, - kling_Gupta_efficiency_label, mean_squared_error_label, ratio_of_standard_deviations_label, - ratio_of_RMSE_label, sum_of_squared_residuals_label, volumetric_efficiency_label -) - -# which of the summaries should return a Date value when x is a Date? -date_summaries <- c( - min_label, p10_label, p20_label, p25_label, p30_label, p33_label, p40_label, p60_label, p67_label, p70_label, p75_label, p80_label, p90_label, quartile_label, median_label, - mode_label, mean_label, trimmed_mean_label, which_min_label, which_max_label, where_min_label, - max_label, first_label, last_label, nth_label, where_max_label, - circular_min_label, circular_Q1_label, circular_quantile_label, - circular_median_label, circular_medianHL_label, circular_mean_label, - circular_Q3_label, circular_max_label -) - -summary_mode <- function(x,...) { - ux <- unique(x) - out <- ux[which.max(tabulate(match(x, ux)))] - if(is.factor(x)) out <- as.character(out) - if(is.null(out)) return(NA) - else return(out) -} - -na_check <- function(x, na_type = c(), na_consecutive_n = NULL, na_max_n = NULL, na_max_prop = NULL, na_min_n = NULL, na_FUN = NULL, ...) { - res <- c() - for (i in seq_along(na_type)) { - type <- na_type[i] - if (type %in% c("n","'n'")) { - res[i] <- summary_count_miss(x) <= na_max_n - } - else if (type %in% c("prop","'prop'")) { - res[i] <- (summary_count_miss(x) / summary_count(x)) <= na_max_prop / 100 - } - else if (type %in% c("n_non_miss","'n_non_miss'")) { - res[i] <- summary_count(x) >= na_min_n - } - else if (type %in% c("FUN","'FUN'")) { - res[i] <- na_FUN(x, ...) - } - else if (type %in% c("con","'con'")) { - is_na_rle <- rle(is.na(x)) - res[i] <- max(is_na_rle$lengths[is_na_rle$values]) <= na_consecutive_n - } - else { - stop("Invalid na_type specified for missing values check.") - } - if (!res[i]) { - return(FALSE) - } - } - return(all(res)) -} - -summary_mean_circular <- function (x, na.rm = FALSE, control.circular = list(), na_type = "", ...) { - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else return(circular::mean.circular(x, na.rm = na.rm, trim = trim, control.circular = control.circular)[[1]]) -} - -summary_median_circular <- function (x, na.rm = FALSE, na_type = "", ...) { - if(!na.rm & anyNA(x)) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else return(circular::median.circular(x, na.rm = na.rm)[[1]]) -} - -summary_medianHL_circular <- function (x, na.rm = FALSE, method = c("HL1","HL2","HL3"), prop = NULL, na_type = "", ...) { - if(!na.rm & anyNA(x)) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else return(circular::medianHL.circular(x, na.rm = na.rm, method = method, prop = prop)[[1]]) -} - -summary_min_circular <- function (x, na.rm = FALSE, names = FALSE, type = 7, na_type = "", ...) { - if(length(x)==0 || (na.rm && length(x[!is.na(x)])==0)||(!na.rm & anyNA(x))) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else return(circular::quantile.circular(x, probs = 0, na.rm = na.rm, names = names, type = type)[[1]]) -} - -summary_max_circular <- function (x, na.rm = FALSE, names = FALSE, type = 7, na_type = "", ...) { - if(length(x)==0 || (na.rm && length(x[!is.na(x)])==0)||(!na.rm & anyNA(x))) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else return(circular::quantile.circular(x, probs = 1, na.rm = na.rm, names = names, type = type)[[1]]) -} - -summary_quantile_circular <- function (x, probs = seq(0, 1, 0.25), na.rm = FALSE, names = FALSE, type = 7, na_type = "", ...) { - if(length(x)==0 || (na.rm && length(x[!is.na(x)])==0)||(!na.rm & anyNA(x))) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else return(circular::quantile.circular(x, probs = probs, na.rm = na.rm, names = names, type = type)[[1]]) -} - -summary_Q3_circular <- function (x, na.rm = FALSE, names = FALSE, type = 7, na_type = "", ...) { - if(length(x)==0 || (na.rm && length(x[!is.na(x)])==0)||(!na.rm & anyNA(x))) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else return(circular::quantile.circular(x, probs = 0.75, na.rm = na.rm, names = names, type = type)[[1]]) -} - -summary_Q1_circular <- function (x, na.rm = FALSE, names = FALSE, type = 7, na_type = "", ...) { - if(length(x)==0 || (na.rm && length(x[!is.na(x)])==0)||(!na.rm & anyNA(x))) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else return(circular::quantile.circular(x, probs = 0.25, na.rm = na.rm, names = names, type = type)[[1]]) -} - -summary_sd_circular <- function (x, na.rm = FALSE, na_type = "", ...) { - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else return(circular::sd.circular(x, na.rm = na.rm)) -} - -summary_var_circular <- function (x, na.rm = FALSE, na_type = "", ...) { - if(length(x)==0 || (na.rm && length(x[!is.na(x)])==0)) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else return(circular::var.circular(x, na.rm = na.rm)) -} - -summary_ang_dev_circular <- function (x, na.rm = FALSE, na_type = "", ...) { - if(length(x)==0 || (na.rm && length(x[!is.na(x)])==0)) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else return(circular::angular.deviation(x, na.rm = na.rm)) -} - -summary_ang_var_circular <- function (x, na.rm = FALSE, na_type = "", ...) { - if(length(x)==0 || (na.rm && length(x[!is.na(x)])==0)) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else return(circular::angular.variance(x, na.rm = na.rm)) -} - -summary_range_circular <- function (x, test = FALSE, na.rm = FALSE, finite = FALSE, control.circular = list(), na_type = "", ...) { - if(length(x)==0 || (na.rm && length(x[!is.na(x)])==0)) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else return(circular::range.circular(x, test = test, na.rm = na.rm, finite = finite, control.circular = control.circular)[[1]]) -} - -summary_rho_circular <- function (x, na.rm = FALSE, na_type = "", ...) { - if(length(x)==0 || (na.rm && length(x[!is.na(x)])==0)) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else return(circular::rho.circular(x, na.rm = na.rm)) -} - -summary_quantile_circular <- function (x, probs = seq(0, 1, 0.25), na.rm = FALSE, names = FALSE, type = 7, na_type = "", ...) { - if(length(x)==0 || (na.rm && length(x[!is.na(x)])==0)||(!na.rm & anyNA(x))) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else return(circular::quantile.circular(x, probs = probs, na.rm = na.rm, names = names, type = type)[[1]]) -} - -summary_mean <- function (x, add_cols, weights = NULL, na.rm = FALSE, trim = 0, na_type = "", ...) { - if( length(x)==0 || (na.rm && length(x[!is.na(x)])==0) ) return(NA) - else { - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else { - if (missing(weights) || is.null(weights)) - return(mean(x, na.rm = na.rm, trim = trim)) - else - return(stats::weighted.mean(x, w = weights, na.rm = na.rm)) - } - } -} - -summary_trimmed_mean <- function (x, add_cols, weights = NULL, na.rm = FALSE, trimmed = 0, na_type = "", ...) { - if( length(x)==0 || (na.rm && length(x[!is.na(x)])==0) ) return(NA) - else { - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else - return(mean(x, na.rm = na.rm, trim = trimmed)) - } -} - -summary_sum <- function (x, weights = NULL, na.rm = FALSE, na_type = "", ...) { - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else { - if (missing(weights) || is.null(weights)) return(sum(x, na.rm = na.rm)) - else return(sum(x * weights, na.rm = na.rm)) - } -} - - -summary_count_all <- function(x, ...) { - return(length(x)) -} - - -summary_count_miss <- function(x, ...) { - return(sum(is.na(x))) -} - -summary_count <- function(x, ...) { - return(sum(!is.na(x))) -} - -summary_sd <- function(x, na.rm = FALSE, weights = NULL, na_type = "", ...) { - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if (missing(weights) || is.null(weights)) { - return(sd(x, na.rm = na.rm)) - } else { - return(sqrt(Hmisc::wtd.var(x, weights = weights, na.rm = na.rm))) - } - } -} - -summary_var <- function(x, na.rm = FALSE, weights = NULL, na_type = "", ...) { - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if (missing(weights) || is.null(weights)) { - return(var(x,na.rm = na.rm)) - } - else { - return(Hmisc::wtd.var(x, weights = weights, na.rm = na.rm)) - } - } -} - -summary_max <- function (x, na.rm = FALSE, na_type = "", ...) { - #TODO This prevents warning and -Inf being retured. Is this desirable? - if(length(x)==0 || (na.rm && length(x[!is.na(x)])==0)) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - return(max(x, na.rm = na.rm)) - } -} - -summary_min <- function (x, na.rm = FALSE, na_type = "", ...) { - #TODO This prevents warning and Inf being retured. Is this desirable? - if(length(x)==0 || (na.rm && length(x[!is.na(x)])==0)) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - return(min(x, na.rm = na.rm)) - } -} - -summary_which_max <- function (x, na.rm = TRUE, na_type = "", ...) { - if(length(x)==0 || (na.rm && length(x[!is.na(x)])==0)) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - # Get the minimum value - max_value <- max(x, na.rm = na.rm) - # Return all indices where x is equal to the minimum value - return(which(x == max_value)) - } -} - -summary_which_min <- function(x, na.rm = TRUE, na_type = "", ...) { - if(length(x) == 0 || (na.rm && length(x[!is.na(x)]) == 0)) return(NA) - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else { - # Get the minimum value - min_value <- min(x, na.rm = na.rm) - # Return all indices where x is equal to the minimum value - return(which(x == min_value)) - } -} - -summary_where_max <- function(x, summary_where_y=NULL, na.rm = TRUE, na_type = "", ...) { - # Check if vectors are empty - if (length(x) == 0 || length(summary_where_y) == 0) { - return(NA) - } - - # Handle NA values - if (na.rm) { - valid_indices <- !is.na(x) & !is.na(summary_where_y) - x <- x[valid_indices] - summary_where_y <- summary_where_y[valid_indices] - } - - # Find the index of the maximum value in x - max_index <- which.max(x) - - # Return the corresponding value in summary_where_y - return(summary_where_y[max_index]) -} - -summary_where_min <- function(x, summary_where_y=NULL, na.rm = TRUE, na_type = "", ...) { - # Check if vectors are empty - if (length(x) == 0 || length(summary_where_y) == 0) { - return(NA) - } - - # Handle NA values - if (na.rm) { - valid_indices <- !is.na(x) & !is.na(summary_where_y) - x <- x[valid_indices] - summary_where_y <- summary_where_y[valid_indices] - } - - # Find the index of the minimum value in x - min_index <- summary_which_min(x, na.rm = na.rm, na_type = na_type, ...) - - # Return the corresponding value in summary_where_y - return(summary_where_y[min_index]) -} - -# get the range of the data -summary_range <- function(x, na.rm = FALSE, na_type = "", ...) { - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - return(max(x, na.rm = na.rm) - min(x, na.rm = na.rm)) - } -} - -# median function -summary_median <- function(x, na.rm = FALSE, weights = NULL, na_type = "", ...) { - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(missing(weights) || is.null(weights)) { - if (stringr::str_detect(class(x), pattern = "ordered") || stringr::str_detect(class(x), pattern = "Date")) { - return(quantile(x, na.rm = na.rm, probs = 0.5, type = 1)[[1]]) - } else { - return(median(x, na.rm = na.rm)) - } - } else { - return(Hmisc::wtd.quantile(x, weights = weights, probs = 0.5, na.rm = na.rm)) - } - } -} - -# quantile function -summary_quantile <- function(x, na.rm = FALSE, weights = NULL, probs, na_type = "", ...) { - if(!na.rm && anyNA(x)) return(NA) - # This prevents multiple values being returned - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else { - if(missing(weights) || is.null(weights)) { - if (stringr::str_detect(class(x), pattern = "ordered") || stringr::str_detect(class(x), pattern = "Date")) { - return(quantile(x, na.rm = na.rm, probs = probs, type = 1)[[1]]) - } else { - return(quantile(x, na.rm = na.rm, probs = probs)[[1]]) - } - } - else { - return(Hmisc::wtd.quantile(x, weights = weights, probs = probs, na.rm = na.rm)) - } - } -} - -# p10 function -p10 <- function(x, na.rm = FALSE, na_type = "", weights = NULL, na_max_prop = NULL, ...) { - summary_quantile(x = x, na.rm = na.rm, na_type = na_type, weights = weights, probs = 0.1, na_max_prop = na_max_prop, ...) -} - -# p20 function -p20 <- function(x, na.rm = FALSE, na_type = "", weights = NULL, na_max_prop = NULL,...) { - summary_quantile(x = x, na.rm = na.rm, na_type = na_type, weights = weights, probs = 0.2, na_max_prop = na_max_prop, ...) -} - -# p25 function -p25 <- function(x, na.rm = FALSE, na_type = "", weights = NULL, na_max_prop = NULL, ...) { - summary_quantile(x = x, na.rm = na.rm, na_type = na_type, weights = weights, probs = 0.25, na_max_prop = na_max_prop, ...) -} - -# p30 function -p30 <- function(x, na.rm = FALSE, na_type = "", weights = NULL, na_max_prop = NULL, ...) { - summary_quantile(x = x, na.rm = na.rm, na_type = na_type, weights = weights, probs = 0.3, na_max_prop = na_max_prop, ...) -} - -# p33 function -p33 <- function(x, na.rm = FALSE, na_type = "", weights = NULL, na_max_prop = NULL, ...) { - summary_quantile(x = x, na.rm = na.rm, na_type = na_type, weights = weights, probs = 0.33, na_max_prop = na_max_prop, ...) -} - -# p40 function -p40 <- function(x, na.rm = FALSE, na_type = "", weights = NULL, na_max_prop = NULL, ...) { - summary_quantile(x = x, na.rm = na.rm, na_type = na_type, weights = weights, probs = 0.4, na_max_prop = na_max_prop, ...) -} - -# p60 function -p60 <- function(x, na.rm = FALSE, na_type = "", weights = NULL, na_max_prop = NULL, ...) { - summary_quantile(x = x, na.rm = na.rm, na_type = na_type, weights = weights, probs = 0.6, na_max_prop = na_max_prop, ...) -} - -# p67 function -p67 <- function(x, na.rm = FALSE, na_type = "", weights = NULL, na_max_prop = NULL, ...) { - summary_quantile(x = x, na.rm = na.rm, na_type = na_type, weights = weights, probs = 0.67, na_max_prop = na_max_prop, ...) -} - -# p70 function -p70 <- function(x, na.rm = FALSE, na_type = "", weights = NULL, na_max_prop = NULL, ...) { - summary_quantile(x = x, na.rm = na.rm, na_type = na_type, weights = weights, probs = 0.7, na_max_prop = na_max_prop, ...) -} - -# p75 function -p75 <- function(x, na.rm = FALSE, na_type = "", weights = NULL, na_max_prop = NULL, ...) { - summary_quantile(x = x, na.rm = na.rm, na_type = na_type, weights = weights, probs = 0.75, na_max_prop = na_max_prop, ...) -} - -# p80 function -p80 <- function(x, na.rm = FALSE, na_type = "", weights = NULL, na_max_prop = NULL, ...) { - summary_quantile(x = x, na.rm = na.rm, na_type = na_type, weights = weights, probs = 0.8, na_max_prop = na_max_prop, ...) -} - -# p90 function -p90 <- function(x, na.rm = FALSE, na_type = "", weights = NULL, na_max_prop = NULL, ...) { - summary_quantile(x = x, na.rm = na.rm, na_type = na_type, weights = weights, probs = 0.9, na_max_prop = na_max_prop, ...) -} - -# Skewness e1071 function -summary_skewness <- function(x, weights = NULL, na.rm = FALSE, type = 2, na_type = "", ...) { - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if (missing(weights) || is.null(weights)) { - return(e1071::skewness(x, na.rm = na.rm, type = type)) - } - if (length(weights) != length(x)) stop("'x' and 'weights' must have the same length") - if (na.rm) { - i <- !is.na(x) && !is.na(weights) - weights <- weights[i] - x <- x[i] - } - ( sum( weights * (x - Weighted.Desc.Stat::w.mean(x, weights))^3 ) / sum(weights)) / Weighted.Desc.Stat::w.sd(x, weights)^3 - } -} - -# skewness mc function -summary_skewness_mc <- function(x, na.rm = FALSE, na_type = "", ...) { - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - return(robustbase::mc(x, na.rm = na.rm)) - } -} - -# skewness outlier limit function -summary_outlier_limit <- function(x, coef = 1.5, bupperlimit = TRUE, bskewedcalc = FALSE, skewnessweight = 4, na.rm = TRUE, na_type = "", omit = FALSE, value = 0, ...){ - if(omit){ - #This is needed when we need rainy days defined(Rain>=0.85) - #if(value!=0){ - # x <- x[x>=value] - #}else{ - x <- x[x>value] - #} - } - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - quart <- quantile(x, na.rm = na.rm) - Q1 <- quart[[2]] - Q3 <- quart[[4]] - IQR <- Q3 - Q1 - MC <- 0 - if(bskewedcalc){ - MC <- robustbase::mc(x, na.rm = na.rm) - } - if(bupperlimit){ - Q3 + coef*exp(skewnessweight*MC)*IQR - } else { - Q1 - coef*exp(-skewnessweight*MC)*IQR - } - } -} - -# kurtosis function -summary_kurtosis <- function(x, na.rm = FALSE, weights = NULL, type = 2, na_type = "", ...) { - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if (missing(weights) || is.null(weights)) { - return(e1071::kurtosis(x, na.rm = na.rm, type = type)) - } - if (length(weights) != length(x)) - stop("'x' and 'weights' must have the same length") - if (na.rm) { - i <- !is.na(x) && !is.na(weights) - weights <- weights[i] - x <- x[i] - } - ((sum(weights * (x - Weighted.Desc.Stat::w.mean(x, weights))^4)/sum(weights))/Weighted.Desc.Stat::w.sd(x, weights)^4) - 3 - } -} - -# Coefficient of Variation function -summary_coef_var <- function(x, na.rm = FALSE, weights = NULL, na_type = "", ...) { - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if (missing(weights) || is.null(weights)) { - return(summary_sd(x) / summary_mean(x)) - } - if (length(weights) != length(x)) - stop("'x' and 'weights' must have the same length") - if (na.rm) { - i <- !is.na(x) && !is.na(weights) - weights <- weights[i] - x <- x[i] - } - Weighted.Desc.Stat::w.cv(x = x, mu = weights) - } -} - -# median absolute deviation function -summary_median_absolute_deviation <- function(x, constant = 1.4826, na.rm = FALSE, na_type = "", weights = NULL, low = FALSE, high = FALSE, ...) { - if (na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else { - if (missing(weights) || is.null(weights)) { - return(stats::mad(x, constant = constant, na.rm = na.rm, low = low, high = high)) - } - else { - Weighted.Desc.Stat::w.ad(x = x, mu = weights) - } - } -} - -# Qn function -summary_Qn <- function(x, constant = 2.21914, finite.corr = missing(constant), na.rm = FALSE, na_type = "", ...) { - if(!na.rm && anyNA(x)) return(NA) - else { - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - x <- x[!is.na(x)] - return(robustbase::Qn(x, constant = constant, finite.corr = finite.corr)) - } - } -} - -# Sn function -summary_Sn <- function(x, constant = 1.1926, finite.corr = missing(constant), na.rm = FALSE, na_type = "", ...) { - if(!na.rm && anyNA(x)) return(NA) - else { - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - x <- x[!is.na(x)] - return(robustbase::Qn(x, constant = constant, finite.corr = finite.corr)) - } - } -} - -# cor function -summary_cor <- function(x, y, na.rm = FALSE, na_type = "", weights = NULL, method = c("pearson", "kendall", "spearman"), cor_use = c("everything", "all.obs", "complete.obs", "na.or.complete", "pairwise.complete.obs"), ...) { - cor_use <- match.arg(cor_use) - if (na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else { - if (missing(weights) || is.null(weights)) { - return(cor(x = x, y = y, use = cor_use, method = method)) - } - else { - weights::wtd.cor(x = x, y = y, weight = weights)[1] - } - } -} - -# cov function -summary_cov <- function(x, y, na.rm = FALSE, weights = NULL, na_type = "", method = c("pearson", "kendall", "spearman"), use = c( "everything", "all.obs", "complete.obs", "na.or.complete", "pairwise.complete.obs"), ...) { - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if (missing(weights) || is.null(weights)) { - return(cov(x = x, y = y, use = use, method = method)) - } - if (length(weights) != length(x)) - stop("'x' and 'weights' must have the same length") - if (na.rm) { - i <- !is.na(x) && !is.na(weights) - weights <- weights[i] - x <- x[i] - } - (sum(weights * x * y)/sum(weights)) - (Weighted.Desc.Stat::w.mean(x = x, mu = weights) * Weighted.Desc.Stat::w.mean(x = y, mu = weights)) - } -} - -# first function -summary_first <- function(x, order_by = NULL, ...) { - return(dplyr::first(x = x, order_by = order_by)) -} - -# last function -summary_last <- function(x, order_by = NULL, ...) { - return(dplyr::last(x = x, order_by = order_by)) -} - -# nth function -summary_nth <- function(x, nth_value, order_by = NULL, ...) { - return(dplyr::nth(x = x, n = nth_value, order_by = order_by)) -} - -# n_distinct function -summary_n_distinct<- function(x, na.rm = FALSE, ...) { - return(dplyr::n_distinct(x, na.rm = na.rm)) -} - -# sample function -summary_sample <- function(x, replace = FALSE, seed, ...){ - if(!missing(seed)) set.seed(seed = seed) - return(sample(x = x, size = 1, replace = replace)) -} - -#Proportions functions -proportion_calc <- function(x, prop_test = "==", prop_value, As_percentage = FALSE, na.rm = FALSE, na_type = "", ... ){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(!na.rm){ - if(sum(is.na(x)) > 0) return(NA) - y <- x[eval(parse(text = paste("x", prop_value, sep = prop_test)))] - if(!As_percentage){ - return(round(length(y)/length(x),digits = 2)) - } - else { - return(round((length(y)/length(x)*100),digits = 2 )) - } - } - else { - remove.na <- na.omit(x) - y <- remove.na[eval(parse(text = paste("remove.na", prop_value, sep = prop_test)))] - if (!As_percentage){ - return(round(length(y)/length(remove.na), digits = 2)) - } - else{ - return(round(length(y)/length(remove.na)*100, digits = 2 )) - } - } - } -} - -#count function -count_calc <- function(x, count_test = "==", count_value, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if (!na.rm){ - if (sum(is.na(x)) > 0) return(NA) - return(length(x[eval(parse(text = paste("x", count_value, sep = count_test)))])) - } - else{ - y <- na.omit(x) - return(length(y[eval(parse(text = paste("y", count_value, sep = count_test)))])) - } - } -} - -#standard error of mean function -standard_error_mean <- function(x, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if (!na.rm){ - if(sum(is.na(x) > 0)) return(NA) - return(sd(x)/sqrt(length(x))) - } - else{ - y <- na.omit(x) - return(sd(y)/sqrt(length(y))) - } - } -} - -#Verification functions -#HydroGOF Package - -#Mean error -me <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::me(sim = y, obs = x, na.rm = na.rm)) - } -} - -#Mean absolute error -mae <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::mae(sim = y, obs = x, na.rm = na.rm)) - } -} - -#Root mean square error -rmse <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::rmse(sim = y, obs = x, na.rm = na.rm)) - } -} - -#Normalised mean square error -nrmse <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::nrmse(sim = y, obs = x, na.rm = na.rm)) - } -} - -#Percent bias -PBIAS <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::pbias(sim = y, obs = x, na.rm = na.rm)) - } -} - -#Nash-Sutcliffe efficiency -NSE <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::NSeff(sim = y, obs = x, na.rm = na.rm)) - } -} - -#Modified Nash-Sutcliffe efficiency -mNSE <- function(x, y, j = 1, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::mNSE(sim = y, obs = x, j = j, na.rm = na.rm)) - } -} - -#Relative Nash-Sutcliffe efficiency -rNSE <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::rNSeff(sim = y, obs = x, na.rm = na.rm)) - } -} - -#Index of agreement -d <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::d(sim = y, obs = x, na.rm = na.rm)) - } -} - -#Modified index of agreement -md <- function(x, y, j = 1, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::md(sim = y, obs = x, j = j, na.rm = na.rm)) - } -} - - -#Relative index of agreement -rd <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::rd(sim = y, obs = x, na.rm = na.rm)) - } -} - - -#Coefficient of determination -R2 <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::br2(sim = y, obs = x, na.rm = na.rm)) - } -} - -#Coefficient of persistence -cp <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(unique(y))==1||length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::cp(sim = y, obs = x, na.rm = na.rm)) - } -} - -#Kling-Gupta efficiency -KGE <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::KGE(sim = y, obs = x, na.rm = na.rm)) - } -} - -#mean squared error -mse <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::mse(sim = y, obs = x, na.rm = na.rm)) - } -} - - -#Ratio of standard deviations -rSD <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::rSD(sim = y, obs = x, na.rm = na.rm)) - } -} - -#Ratio of RMSE -rsr <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::rsr(sim = y, obs = x, na.rm = na.rm)) - } -} - -#Sum of squared residuals -ssq <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::ssq(sim = y, obs = x, na.rm = na.rm)) - } -} - -#Volumetric efficiency -VE <- function(x, y, na.rm = FALSE, na_type = "", ...){ - if(na.rm && na_type != "" && !na_check(x, na_type = na_type, ...)) return(NA) - else{ - if(length(x[is.na(x)])==length(x)||length(y[is.na(y)])==length(y)) return(NA) - return(hydroGOF::VE(sim = y, obs = x, na.rm = na.rm)) - } -} - -#Verification package - -#Categorical/Categorical - -##Percent Correct -pc <- function(x, y, frcst.type, obs.type, ...){ - A <- verification::verify(obs = x, pred = y, frcst.type = frcst.type, obs.type = obs.type) - return(A$pc) -} - -##Heidke Skill Score -hss <- function(x, y, frcst.type, obs.type, ...){ - A <- verification::verify(obs = x, pred = y, frcst.type = frcst.type, obs.type = obs.type) - return(A$hss) -} - -##Pierce Skill Score -pss <- function(x, y, frcst.type, obs.type, ...){ - A <- verification::verify(obs = x, pred = y, frcst.type = frcst.type, obs.type = obs.type) - return(A$pss) -} - -##Gerrity Score -GS <- function(x, y, frcst.type, obs.type, ...){ - A <- verification::verify(obs = x, pred = y, frcst.type = frcst.type, obs.type = obs.type) - return(A$gs) -} - -#Binary/Binary - -##Hit rate aka PODy -PODy <- function(x, y, frcst.type, obs.type, ...){ - A <- verification::verify(obs = x, pred = y, frcst.type = frcst.type, obs.type = obs.type) - return(A$POD) -} - -##Threat score -TS <- function(x, y, frcst.type, obs.type, ...){ - A <- verification::verify(obs = x, pred = y, frcst.type = frcst.type, obs.type = obs.type) - return(A$TS) -} - -##Equitable threat score -ETS <- function(x, y, frcst.type, obs.type, ...){ - A <- verification::verify(obs = x, pred = y, frcst.type = frcst.type, obs.type = obs.type) - return(A$ETS) -} - -##False alarm ratio -FAR <- function(x, y, frcst.type, obs.type, ...){ - A <- verification::verify(obs = x, pred = y, frcst.type = frcst.type, obs.type = obs.type) - return(A$FAR) -} - -##Heidke skill score -HSS <- function(x, y, frcst.type, obs.type, ...){ - A <- verification::verify(obs = x, pred = y, frcst.type = frcst.type, obs.type = obs.type) - return(A$HSS) -} - -##Percent correct -PC <- function(x, y, frcst.type, obs.type, ...){ - A <- verification::verify(obs = x, pred = y, frcst.type = frcst.type, obs.type = obs.type) - return(A$PC) -} - -##Bias -BIAS <- function(x, y, frcst.type, obs.type, ...){ - A <- verification::verify(obs = x, pred = y, frcst.type = frcst.type, obs.type = obs.type) - return(A$BIAS) -} - -##Extreme dependency score -EDS <- function(x, y, frcst.type, obs.type, ...){ - A <- verification::verify(obs = x, pred = y, frcst.type = frcst.type, obs.type = obs.type) - return(A$eds) -} - -##Symmetric extreme dependency score -SEDS <- function(x, y, frcst.type, obs.type, ...){ - A <- verification::verify(obs = x, pred = y, frcst.type = frcst.type, obs.type = obs.type) - return(A$seds) -} - -##Extremal dependency index -EDI <- function(x, y, frcst.type, obs.type, ...){ - A <- verification::verify(obs = x, pred = y, frcst.type = frcst.type, obs.type = obs.type) - return(A$EDI) -} - -##Symmetric Extremal dependence index -SEDI <- function(x, y, frcst.type, obs.type, ...){ - A <- verification::verify(obs = x, pred = y, frcst.type = frcst.type, obs.type = obs.type) - return(A$SEDI) -} - -##TODO:Check if there are summaries that only apply to (Probabilistic-binary) types. - -DataBook$set("public", "summary_table", function(data_name, columns_to_summarise = NULL, summaries, factors = c(), store_table = FALSE, store_results = FALSE, drop = TRUE, na.rm = FALSE, summary_name = NA, include_margins = FALSE, margins = "outer", return_output = FALSE, treat_columns_as_factor = FALSE, page_by = NULL, signif_fig = 2, na_display = "", na_level_display = "NA", weights = NULL, caption = NULL, result_names = NULL, percentage_type = "none", perc_total_columns = NULL, perc_total_factors = c(), perc_total_filter = NULL, perc_decimal = FALSE, include_counts_with_percentage = FALSE, margin_name = "(All)", additional_filter, ...) { - # TODO: write in errors - if (na_level_display == "") stop("na_level_display must be a non empty string") - # removes "summary_" from beginning of summary function names so that display is nice - summaries_display <- sapply(summaries, function(x) ifelse(startsWith(x, "summary_"), substring(x, 9), x)) - - # todo: add in code to store results if store_results = TRUE on the dialog - # only give this option if there is 1 column factor. - if (!store_results) { - save <- 0 - } else { - save <- 2 - } - - cell_values <- self$calculate_summary(data_name = data_name, columns_to_summarise = columns_to_summarise, summaries = summaries, factors = factors, store_results = FALSE, drop = drop, na.rm = na.rm, return_output = TRUE, weights = weights, result_names = result_names, percentage_type = percentage_type, perc_total_columns = perc_total_columns, perc_total_factors = perc_total_factors, perc_total_filter = perc_total_filter, perc_decimal = perc_decimal, include_counts_with_percentage = include_counts_with_percentage, margin_name = margin_name, additional_filter = additional_filter, perc_return_all = FALSE, signif_fig = signif_fig, sep = "__", ...) - for (i in seq_along(factors)) { - levels(cell_values[[i]]) <- c(levels(cell_values[[i]]), na_level_display) - cell_values[[i]][is.na(cell_values[[i]])] <- na_level_display - } - cell_values <- cell_values %>% dplyr::mutate(dplyr::across(where(is.numeric), round, signif_fig)) - cell_values <- cell_values %>% - tidyr::pivot_longer(cols = !factors, names_to = "summary-variable", values_to = "value", values_transform = list(value = as.character)) - if (treat_columns_as_factor && !is.null(columns_to_summarise)) { - cell_values <- cell_values %>% - tidyr::separate(col = "summary-variable", into = c("summary", "variable"), sep = "__") - } - shaped_cell_values <- cell_values %>% dplyr::relocate(value, .after = last_col()) - - for (i in seq_along(factors)) { - levels(shaped_cell_values[[i]]) <- c(levels(shaped_cell_values[[i]]), margin_name) - } - - # If margins --------------------------------------------------------------------------- - if (include_margins) { - margin_tables <- list() - power_sets <- rje::powerSet(factors) - # We could need last set if only have row or column factors - power_sets_outer <- power_sets[-(c(length(power_sets)))] - if (treat_columns_as_factor && !is.null(columns_to_summarise)) { - order_names <- unique(paste(shaped_cell_values$summary, shaped_cell_values$variable, sep = "__")) - } else { - order_names <- unique(shaped_cell_values$summary) - } - for (facts in power_sets_outer) { - if (length(facts) == 0) facts <- c() - margin_tables[[length(margin_tables) + 1]] <- self$calculate_summary(data_name = data_name, columns_to_summarise = columns_to_summarise, summaries = summaries, factors = facts, store_results = FALSE, drop = drop, na.rm = na.rm, return_output = TRUE, weights = weights, result_names = result_names, percentage_type = percentage_type, perc_total_columns = perc_total_columns, perc_total_factors = perc_total_factors, perc_total_filter = perc_total_filter, perc_decimal = perc_decimal, include_counts_with_percentage = include_counts_with_percentage, margin_name = margin_name, additional_filter = additional_filter, perc_return_all = FALSE, signif_fig = signif_fig, sep = "__", ...) - } - # for outer margins - margin_item <- length(summaries) * length(columns_to_summarise) - - if (("outer" %in% margins) && (length(factors) > 0)) { - # to prevent changing all variables to dates/converting dates to numeric - for (i in 1:length(margin_tables)){ - margin_tables[[i]] <- margin_tables[[i]] %>% dplyr::mutate(dplyr::across(where(is.numeric), round, signif_fig)) - margin_tables[[i]] <- margin_tables[[i]] %>% purrr::modify_if(lubridate::is.Date, as.character) - } - outer_margins <- plyr::ldply(margin_tables) - # Change shape - if (length(margin_tables) == 1) { - outer_margins <- plyr::ldply(margin_tables[[1]]) - names(outer_margins) <- c("summary-variable", "value") - } else { - outer_margins <- outer_margins %>% - tidyr::pivot_longer(cols = 1:margin_item, values_to = "value", names_to = "summary-variable", values_transform = list(value = as.character)) - } - if (treat_columns_as_factor && !is.null(columns_to_summarise)) { - outer_margins <- outer_margins %>% - tidyr::separate(col = "summary-variable", into = c("summary", "variable"), sep = "__") - } - } else { - outer_margins <- NULL - } - if ("summary" %in% margins || ("outer" %in% margins && length(factors) == 0)) { - summary_margins <- NULL - if (is.null(columns_to_summarise)){ - power_sets_summary <- power_sets[-(length(power_sets))] - } else { - if ("outer" %in% margins) { - power_sets_summary <- power_sets - } else { - power_sets_summary <- power_sets[(c(length(power_sets)))] - } - } - - for (facts in power_sets_summary) { - if (length(facts) == 0) facts <- c() - if (is.null(columns_to_summarise)){ - summary_margins_df <- data_book$get_data_frame(data_name = data_name) %>% - dplyr::select(c(tidyselect::all_of(factors))) - data_book$import_data(data_tables = list(summary_margins_df = summary_margins_df)) - summary_margins[[length(summary_margins) + 1]] <- data_book$calculate_summary(data_name = "summary_margins_df", columns_to_summarise = NULL, summaries = summaries, factors = facts, store_results = FALSE, drop = drop, na.rm = na.rm, return_output = TRUE, weights = weights, result_names = result_names, percentage_type = percentage_type, perc_total_columns = perc_total_columns, perc_total_factors = perc_total_factors, perc_total_filter = perc_total_filter, perc_decimal = perc_decimal, include_counts_with_percentage = include_counts_with_percentage, margin_name = margin_name, additional_filter = additional_filter, perc_return_all = FALSE, signif_fig = signif_fig, ...) - } else { - summary_margins_df <- data_book$get_data_frame(data_name = data_name) %>% - dplyr::select(c(tidyselect::all_of(factors), tidyselect::all_of(columns_to_summarise))) %>% - tidyr::pivot_longer(cols = columns_to_summarise, values_transform = list(value = as.character)) - data_book$import_data(data_tables = list(summary_margins_df = summary_margins_df)) - summary_margins[[length(summary_margins) + 1]] <- data_book$calculate_summary(data_name = "summary_margins_df", columns_to_summarise = "value", summaries = summaries, factors = facts, store_results = FALSE, drop = drop, na.rm = na.rm, return_output = TRUE, weights = weights, result_names = result_names, percentage_type = percentage_type, perc_total_columns = perc_total_columns, perc_total_factors = perc_total_factors, perc_total_filter = perc_total_filter, perc_decimal = perc_decimal, include_counts_with_percentage = include_counts_with_percentage, margin_name = margin_name, additional_filter = additional_filter, perc_return_all = FALSE, signif_fig = signif_fig, ...) - - } - data_book$delete_dataframes(data_names = "summary_margins_df") - } - summary_margins <- plyr::ldply(summary_margins) - if (treat_columns_as_factor && !is.null(columns_to_summarise)) { - # remove "_value" in them - for (col in 1:ncol(summary_margins)) { - colnames(summary_margins)[col] <- sub("_value", "", colnames(summary_margins)[col]) - } - summary_margins <- summary_margins %>% - tidyr::pivot_longer(cols = !factors, names_to = "summary", values_to = "value", values_transform = list(value = as.character)) - } else { - if (length(summary_margins) == 1) { - summary_margins <- data.frame(summary_margins, `summary-variable` = "count", factors = NA) - names(summary_margins) <- c("value", "summary-variable", factors) - }else { - for (col in 1:ncol(summary_margins)) { - # TODO: if the colname is the same as a factor, then do nothing - colnames(summary_margins)[col] <- sub("_value", "_all", colnames(summary_margins)[col]) - } - summary_margins <- summary_margins %>% dplyr::mutate(dplyr::across(where(is.numeric), round, signif_fig)) - summary_margins <- summary_margins %>% - tidyr::pivot_longer(cols = !factors, names_to = "summary-variable", values_to = "value", values_transform = list(value = as.character)) - } - } - } else { - summary_margins <- NULL - } - if (!is.null(summary_margins) || !is.null(outer_margins)) { - margin_tables_all <- (dplyr::bind_rows(summary_margins, outer_margins)) - margin_tables_all <- margin_tables_all %>% - dplyr::mutate_at(vars(-value), ~ replace(., is.na(.), margin_name)) %>% - dplyr::mutate(value = as.character(value)) - - # if there is one factor, then we do not yet have the factor name in the df - # (this will be added in by dplyr::bind_rows(s_c_v, m_t_a)) - # by introducing it in the outer_margins bit, we have to add it in "manually" - # this then loses the class of it, creating issues for ordered vs non-ordered factors - # so we do these changes here. - if (length(factors) > 1){ - for (i in factors){ - shaped_cell_values_levels <- levels(shaped_cell_values[[i]]) - margin_tables_all <- margin_tables_all %>% - dplyr::mutate_at(i, ~ forcats::fct_expand(., shaped_cell_values_levels), - i, ~ forcats::fct_relevel(., shaped_cell_values_levels)) - } - } - shaped_cell_values <- dplyr::bind_rows(shaped_cell_values, margin_tables_all) %>% - dplyr::mutate_at(vars(-c(value)), tidyr::replace_na, margin_name) %>% - dplyr::mutate_at(vars(-c(value)), ~forcats::as_factor(forcats::fct_relevel(.x, margin_name, after = Inf))) - } - } - # To all data -------------------------------------------------------------------------- - # Used to make all values numeric, but stopped because of issues with ordered factors/dates. - # I don't think this line is needed anymore, but will keep it commented for now in case it becomes more apparent in the future - #if (percentage_type == "none" || include_counts_with_percentage == FALSE){ - # shaped_cell_values <- shaped_cell_values %>% dplyr::mutate(value = as.numeric(as.character(value)), - # value = round(value, signif_fig)) - #} - if (treat_columns_as_factor && !is.null(columns_to_summarise)){ - shaped_cell_values <- shaped_cell_values %>% - dplyr::mutate(summary = as.factor(summary)) %>% dplyr::mutate(summary = forcats::fct_relevel(summary, summaries_display)) %>% - dplyr::mutate(variable = as.factor(variable)) %>% dplyr::mutate(variable= forcats::fct_relevel(variable, columns_to_summarise)) - } - if (!treat_columns_as_factor && !is.null(columns_to_summarise)){ - shaped_cell_values <- shaped_cell_values %>% - dplyr::mutate(`summary-variable` = forcats::as_factor(`summary-variable`)) - } - if (store_table) { - data_book$import_data(data_tables = list(shaped_cell_values = shaped_cell_values)) - } - return(tibble::as_tibble(shaped_cell_values)) -}) +# removed \ No newline at end of file diff --git a/instat/static/InstatObject/R/Rsetup.R b/instat/static/InstatObject/R/Rsetup.R index 01f8592fbc1..1157c528d89 100644 --- a/instat/static/InstatObject/R/Rsetup.R +++ b/instat/static/InstatObject/R/Rsetup.R @@ -193,11 +193,11 @@ packages_not_installed <- function() { load_R_Instat_packages <- function() { # ggthemes temp added because themes list doesn't contain package names - # sp needed for plot.region() function which requires sp loaded but gives errors through R-Instat + # sp needed for plot.region() function which requires sp loaded but gives errors through R-Instat ## note that this function is now in instatClimatic. # plyr and dplyr loaded in order to avoid conflicts # ggplot2 loaded for convenience # svglite and ggfortify needed for View Graph dialog - # PCICt needed to access PCICt class when importing NETcdf files + # PCICt needed to access PCICt class when importing NETcdf files # this is loaded in with instatClimatic now I think. # ggmosaic because geom_mosaic aes only work when ggmosaic is loaded # wakefield because many functions do not work without loading (https://github.com/trinker/wakefield/issues/11) # latticeExtra because conditionalQuantile key positions work well when latticeExtra is loaded @@ -205,7 +205,7 @@ load_R_Instat_packages <- function() { # tidyr loaded because unite() function is required by mmtable() function from mmtable2 package # purrr loaded because map_int() is required by function(s) such as header_top_left() from mmtable2 package # mc2d loaded because of triangular and continuous empirical distributions - packs_to_load <- c("purrr", "plyr", "tidyr", "dplyr", "ggplot2", "ggthemes", "svglite", "ggfortify", "PCICt", "sp", "ggmosaic", "wakefield", "circular", "latticeExtra", "texmex", "mc2d") + packs_to_load <- c("purrr", "plyr", "tidyr", "dplyr", "ggplot2", "ggthemes", "svglite", "ggfortify", "PCICt", "sp", "ggmosaic", "wakefield", "circular", "latticeExtra", "texmex", "mc2d", "databook", "instatCalculations") for(pack in packs_to_load) { try(library(pack, character.only = TRUE)) } @@ -225,8 +225,8 @@ packages_not_loaded <- function() { load_R_Instat_packages() setwd(dirname(parent.frame(2)$ofile)) -source("instat_object_R6.R") -source("data_object_R6.R") +#source("instat_object_R6.R") +#source("data_object_R6.R") source("labels_and_defaults.R") source("stand_alone_functions.R") files <- sort(dir(file.path(getwd(), 'Backend_Components/'), pattern=".R$", full.names = TRUE, recursive = TRUE)) diff --git a/instat/static/InstatObject/R/data_object_R6.R b/instat/static/InstatObject/R/data_object_R6.R index c188e3d8f03..50aa506deb1 100644 --- a/instat/static/InstatObject/R/data_object_R6.R +++ b/instat/static/InstatObject/R/data_object_R6.R @@ -1,4824 +1 @@ -DataSheet <- R6::R6Class("DataSheet", - public = list( - initialize = function(data = data.frame(), data_name = "", - variables_metadata = data.frame(), metadata = list(), - imported_from = "", - messages = TRUE, convert=TRUE, create = TRUE, - start_point=1, filters = list(), column_selections = list(), objects = list(), - calculations = list(), scalars = list(), keys = list(), comments = list(), keep_attributes = TRUE, undo_history = list(), redo_undo_history = list(), disable_undo = FALSE) -{ - # Set up the data object - self$set_data(data, messages) - self$set_changes(list()) - #removed until this can be fixed. - #self$set_variables_metadata(variables_metadata) - - # Set first so that "no_filter" is added - self$set_filters(filters) - self$set_column_selections(column_selections) - if(keep_attributes) { - self$set_meta(c(attributes(private$data), metadata)) - } - else { - self$set_meta(metadata) - self$clear_variables_metadata() - } - self$add_defaults_meta() - self$add_defaults_variables_metadata(self$get_column_names()) - #self$update_variables_metadata() - self$set_objects(objects) - self$set_calculations(calculations) - self$set_scalars(scalars) - self$set_keys(keys) - self$set_comments(comments) - - # If no name for the data.frame has been given in the list we create a default one. - # Decide how to choose default name index - if ( !(is.null(data_name) || data_name == "" || missing(data_name))) { - if(data_name != make.names(iconv(data_name, to = "ASCII//TRANSLIT", sub = "."))) { - message("data_name is invalid. It will be made valid automatically.") - data_name <- make.names(iconv(data_name, to = "ASCII//TRANSLIT", sub = ".")) - } - self$append_to_metadata(data_name_label, data_name) - } - else if (!self$is_metadata(data_name_label)) { - if (( is.null(data_name) || data_name == "" || missing(data_name))) { - self$append_to_metadata(data_name_label,paste0("data_set_",sprintf("%03d", start_point))) - if (messages) { - message(paste0("No name specified in data_tables list for data frame ", start_point, ". - Data frame will have default name: ", "data_set_",sprintf("%03d", start_point))) - } - } - else self$append_to_metadata(data_name_label, data_name) - } -} -), - private = list( - data = data.frame(), - filters = list(), - column_selections = list(), - objects = list(), - keys = list(), - undo_history = list(), - redo_undo_history = list(), - comments = list(), - calculations = list(), - scalars = list(), - changes = list(), - disable_undo = FALSE, - .current_filter = list(), - .current_column_selection = list(), - .data_changed = FALSE, - .metadata_changed = FALSE, - .variables_metadata_changed = FALSE, - .last_graph = NULL - ), - active = list( - data_changed = function(new_value) { - if(missing(new_value)) return(private$.data_changed) - else { - if(new_value != TRUE && new_value != FALSE) stop("new_val must be TRUE or FALSE") - private$.data_changed <- new_value - self$append_to_changes(list(Set_property, "data_changed")) - } - }, - metadata_changed = function(new_value) { - if(missing(new_value)) return(private$.metadata_changed) - else { - if(new_value != TRUE && new_value != FALSE) stop("new_val must be TRUE or FALSE") - private$.metadata_changed <- new_value - self$append_to_changes(list(Set_property, "metadata_changed")) - } - }, - variables_metadata_changed = function(new_value) { - if(missing(new_value)) return(private$.variables_metadata_changed) - else { - if(new_value != TRUE && new_value != FALSE) stop("new_val must be TRUE or FALSE") - private$.variables_metadata_changed <- new_value - self$append_to_changes(list(Set_property, "variable_data_changed")) - } - }, - current_filter = function(filter) { - if(missing(filter)) { - return(self$get_filter_as_logical(private$.current_filter$name)) - } - else { - private$.current_filter <- filter - self$data_changed <- TRUE - self$append_to_changes(list(Set_property, "current_filter")) - } - }, - current_column_selection = function(column_selection) { - if(missing(column_selection)) { - if (!is.null(private$.current_column_selection)) { - return(self$get_column_selection_column_names(private$.current_column_selection$name)) - } else return(names(private$data)) - } - else { - private$.current_column_selection <- column_selection - self$data_changed <- TRUE - self$append_to_changes(list(Set_property, "current_column_selection")) - } - } - ) -) - -DataSheet$set("public", "set_data", function(new_data, messages=TRUE, check_names = TRUE) { - if(is.matrix(new_data)) new_data <- as.data.frame(new_data) - #This case could happen when removing rows - #as.data.frame preserves column and data frame attributes so no issue with this - else if(tibble::is_tibble(new_data) || data.table::is.data.table(new_data)) new_data <- as.data.frame(new_data) - #TODO convert ts objects correctly - else if(is.ts(new_data)) { - ind <- zoo::index(new_data) - new_data <- data.frame(index = ind, value = new_data) - } - else if(is.array(new_data)) { - new_data <- as.data.frame(new_data) - } - else if(is.vector(new_data) && !is.list(new_data)) { - new_data <- as.data.frame(new_data) - } - - if(!is.data.frame(new_data)) { - stop("Data set must be of type: data.frame") - } - else { - if(length(new_data) == 0 && messages) { - message("data is empty. Data will be an empty data frame.") - } - if(check_names) { - # "T" should be avoided as a column name but is not checked by make.names() - if("T" %in% names(new_data)) names(new_data)[names(new_data) == "T"] <- ".T" - valid_names <- make.names(iconv(names(new_data), to = "ASCII//TRANSLIT", sub = "."), unique = TRUE) - if(!all(names(new_data) == valid_names)) { - warning("Not all column names are syntactically valid or unique. make.names() and iconv() will be used to force them to be valid and unique.") - names(new_data) <- valid_names - } - } - private$data <- new_data - self$append_to_changes(list(Set_property, "data")) - self$data_changed <- TRUE - self$variables_metadata_changed <- TRUE - } -} -) -DataSheet$set("public", "set_enable_disable_undo", function(disable_undo) { - private$disable_undo <- disable_undo - if(disable_undo) { - private$undo_history <- list() - gc() - } -}) - -DataSheet$set("public", "save_state_to_undo_history", function() { - self$set_undo_history(private$data, attributes(private$data)) -}) - -DataSheet$set("public", "is_undo", function() { - return(private$disable_undo) -}) - -DataSheet$set("public", "set_meta", function(new_meta) { - meta_data_copy <- new_meta - self$clear_metadata() - if(!is.list(meta_data_copy)) stop("new_meta must be of type: list") - for(name in names(meta_data_copy)) { - self$append_to_metadata(name, meta_data_copy[[name]]) - } - self$metadata_changed <- TRUE - self$append_to_changes(list(Set_property, "meta data")) -} -) - -#Dangerous to call directly as could remove properties needed by InstatObject -DataSheet$set("public", "clear_metadata", function() { - for(name in names(attributes(private$data))) { - if(!name %in% c(data_type_label, data_name_label, "row.names", "names")) attr(private$data, name) <- NULL - } - self$add_defaults_meta() - self$metadata_changed <- TRUE - self$append_to_changes(list(Set_property, "meta data")) -} -) - -DataSheet$set("public", "has_undo_history", function() { - return(length(private$undo_history) > 0) -} -) - -DataSheet$set("public", "undo_last_action", function() { - - # Check if there's any action to undo - if (length(private$undo_history) > 0) { - # Get the last state from the undo history - previous_state <- private$undo_history[[length(private$undo_history)]] - - # Restore the data and its attributes - restored_data <- previous_state$data # Extract the dataframe - restored_attributes <- previous_state$attributes # Extract the attributes - - # Set the dataframe in the DataSheet - self$set_data(as.data.frame(restored_data)) - - # Restore attributes - restored_attributes <- previous_state$attributes # Extract the attributes - for (property in names(restored_attributes)) { - self$append_to_metadata(property, restored_attributes[[property]]) - } - # Remove the latest state from the undo history - private$undo_history <- private$undo_history[-length(private$undo_history)] - - # Trigger garbage collection to free memory - gc() - } else { - message("No more actions to undo.") - } -}) - - -# Redo function -DataSheet$set("public", "redo_last_action", function() { - if (length(private$redo_undo_history) > 0) { - # Get the last undone state from redo undo_history - next_state <- private$redo_undo_history[[length(private$redo_undo_history)]] - - # Restore the next state - self$set_data(as.data.frame(next_state)) - - # Move the state back to the undo_history - private$undo_history <- append(private$undo_history, list(next_state)) - - # Remove the state from redo undo_history - private$redo_undo_history <- private$redo_undo_history[-length(private$redo_undo_history)] - } else { - message("No more actions to redo.") - } -}) - -#Removed until can be fixed with attributes -# DataSheet$set("public", "set_variables_metadata", function(new_meta) { -# if(!is.data.frame(new_meta)) stop("variable metadata must be of type: data.frame") -# -# private$variables_metadata <- new_meta -# self$append_to_changes(list(Set_property, "variable metadata")) -# } -# ) - -DataSheet$set("public", "set_changes", function(new_changes) { - if(!is.list(new_changes)) stop("Changes must be of type: list") - private$changes <- new_changes - self$append_to_changes(list(Set_property, "changes")) -} -) - -DataSheet$set("public", "set_filters", function(new_filters) { - if(!is.list(new_filters)) stop("Filters must be of type: list") - self$append_to_changes(list(Set_property, "filters")) - private$filters <- new_filters - if(!"no_filter" %in% names(private$filters)) { - self$add_filter(filter = list(), filter_name = "no_filter", replace = TRUE, set_as_current = TRUE, na.rm = FALSE, is_no_filter = TRUE) - } -} -) - -DataSheet$set("public", "set_column_selections", function(new_column_selections) { - stopifnot(is.list(new_column_selections)) - self$append_to_changes(list(Set_property, "column selections")) - private$column_selections <- new_column_selections - if(!".everything" %in% names(private$column_selections)) { - self$add_column_selection(column_selection = list(), name = ".everything", replace = TRUE, set_as_current = TRUE, is_everything = TRUE) - } -} -) - -DataSheet$set("public", "set_objects", function(new_objects) { - if(!is.list(new_objects)) stop("new_objects must be of type: list") - self$append_to_changes(list(Set_property, "objects")) - private$objects <- new_objects -} -) - -DataSheet$set("public", "set_calculations", function(new_calculations) { - if(!is.list(new_calculations)) stop("new_calculations must be of type: list") - self$append_to_changes(list(Set_property, "calculations")) - private$calculations <- new_calculations -} -) - -DataSheet$set("public", "set_scalars", function(new_scalars) { - if(!is.list(new_scalars)) stop("scalars must be of type: list") - self$append_to_changes(list(Set_property, "scalars")) - private$scalars <- new_scalars -} -) - -# Set undo_history with memory management -DataSheet$set("public", "set_undo_history", function(new_data, attributes = list()) { - if (!is.data.frame(new_data)) stop("new_data must be of type: data.frame") - - if (!private$disable_undo) { - # Define memory and undo_history limits - MAX_undo_history_SIZE <- 10 # Limit to last 10 undo_history states - MAX_MEMORY_LIMIT_MB <- 1024 # Limit the memory usage for undo_history - - # Check current memory usage - current_memory <- monitor_memory() - - # If memory exceeds limit, remove the oldest entry - if (current_memory > MAX_MEMORY_LIMIT_MB) { - message(paste("Memory limit exceeded:", round(current_memory, 2), "MB. Removing oldest entry.")) - private$undo_history <- private$undo_history[-1] # Remove the oldest entry - gc() # Trigger garbage collection to free memory - } - - # Limit undo_history size - if (length(private$undo_history) >= MAX_undo_history_SIZE) { - private$undo_history <- private$undo_history[-1] # Remove the oldest entry - gc() # Trigger garbage collection to free memory - } - - # Package the new data and attributes into a list - new_undo_entry <- list(data = new_data, attributes = attributes) - - # Append the new entry to the undo history - private$undo_history <- append(private$undo_history, list(new_undo_entry)) - } -}) - - -DataSheet$set("public", "set_keys", function(new_keys) { - if(!is.list(new_keys)) stop("new_keys must be of type: list") - self$append_to_changes(list(Set_property, "keys")) - private$keys <- new_keys -} -) - -DataSheet$set("public", "set_comments", function(new_comments) { - if(!is.list(new_comments)) stop("new_comments must be of type: list") - self$append_to_changes(list(Set_property, "comments")) - private$comments <- new_comments -} -) - -# DataSheet$set("public", "update_variables_metadata", function() { -# #Not needed now using attributes -# #if(ncol(private$data) != nrow(private$variables_metadata) || !all(colnames(private$data)==rownames(private$variables_metadata))) { -# # if(all(colnames(private$data) %in% rownames(private$variables_metadata))) { -# #self$set_variables_metadata(private$variables_metadata[colnames(private$data),]) -# # } -# # else { -# # } -# #} -# for(col in colnames(self$get_data_frame())) { -# if(!self$is_variables_metadata(signif_figures_label, col)) self$append_to_variables_metadata(col, signif_figures_label, get_default_significant_figures(self$get_columns_from_data(col, use_current_filter = FALSE))) -# #self$append_to_variables_metadata(col, data_type_label, class(private$data[[col]])) -# self$append_to_variables_metadata(col, name_label, col) -# } -# } -# ) - -DataSheet$set("public", "set_data_changed", function(new_val) { - self$data_changed <- new_val -} -) - -DataSheet$set("public", "set_variables_metadata_changed", function(new_val) { - self$variables_metadata_changed <- new_val -} -) - -DataSheet$set("public", "set_metadata_changed", function(new_val) { - self$metadata_changed <- new_val -} -) - -DataSheet$set("public", "get_data_frame", function(convert_to_character = FALSE, include_hidden_columns = TRUE, use_current_filter = TRUE, use_column_selection = TRUE, filter_name = "", column_selection_name = "", stack_data = FALSE, remove_attr = FALSE, retain_attr = FALSE, max_cols, max_rows, drop_unused_filter_levels = FALSE, start_row, start_col, ...) { - if(!stack_data) { - if(!include_hidden_columns && self$is_variables_metadata(is_hidden_label)) { - hidden <- self$get_variables_metadata(property = is_hidden_label) - hidden[is.na(hidden)] <- FALSE - out <- private$data[!hidden] - } - else out <- private$data - nam <- names(out) - if(use_current_filter && self$filter_applied()) { - if(filter_name != "") { - out <- out[self$current_filter & self$get_filter_as_logical(filter_name = filter_name), ] - } - else { - out <- out[self$current_filter, ] - #TODO This needs to be done for all cases! - if(drop_unused_filter_levels) out <- drop_unused_levels(out, self$get_current_filter_column_names()) - } - } - else { - if(filter_name != "") { - out <- out[self$get_filter_as_logical(filter_name = filter_name), ] - } - } - if (column_selection_name != "") { - selected_columns <- - self$get_column_selection_column_names(column_selection_name) - missing_columns <- - selected_columns[!selected_columns %in% names(private$data)] - - if (!length(missing_columns) > 0) { - out <- out[, selected_columns, drop = FALSE] - } - } - #TODO: consider removing include_hidden_columns argument from this function - if(use_column_selection && self$column_selection_applied()) { - old_metadata <- attributes(private$data) - selected_columns <- self$get_column_names() - - missing_columns <- selected_columns[!selected_columns %in% names(private$data)] - - if (!length(missing_columns) > 0) { - out <- out[, selected_columns, drop = FALSE] - for (name in names(old_metadata)) { - if (!(name %in% c("names", "class", "row.names"))) { - attr(out, name) <- old_metadata[[name]] - } - } - all_columns <- - self$get_column_names(use_current_column_selection = FALSE) - hidden_cols <- - all_columns[!(all_columns %in% selected_columns)] - self$append_to_variables_metadata(hidden_cols, is_hidden_label, TRUE) - private$.variables_metadata_changed <- TRUE - } - } - if(!is.data.frame(out)) { - out <- data.frame(out) - if(length(nam) == length(out)) names(out) <- nam - } - # This is needed as some R function misinterpret the class of a column - # when there are extra attributes on columns - if(remove_attr) { - for(i in seq_along(out)) { - attributes(out[[i]])[!names(attributes(out[[i]])) %in% c("class", "levels")] <- NULL - } - } - - # If a filter has been done, some column attributes are lost. - # This ensures they are present in the returned data. - if (retain_attr) { - for (col_name in names(out)) { - private_attr_names <- names(attributes(private$data[[col_name]])) - for (attr_name in private_attr_names) { - if (!attr_name %in% c("class", "names")) { - private_attr <- attr(private$data[[col_name]], attr_name) - if (!is.null(private_attr)) { - attr(out[[col_name]], attr_name) <- private_attr - } - } - } - } - } - - # If there is a start column, return columns from start onwards - if (!missing(start_col) && start_col <= ncol(out)) #out <- out[start_col:max_cols] - { - if (!missing(max_cols)) - { - # If maximum columns to be displayed is greater than number of columns return all columns - if (max_cols + start_col > ncol(out)){ - out <- out[start_col:ncol(out)] - } - # -1 after columns as start column is added and is not 0 based - else{ - out <- out[start_col:(start_col + max_cols - 1)] - } - } - else{ - out <- out[start_col:ncol(out)] - } - } - # If there is a maximum number of columns return up to that maximum - else if(!missing(max_cols) && max_cols < ncol(out)) out <- out[1:max_cols] - # If there is a start row, return rows from start onwards - if (!missing(start_row) && start_row <= nrow(out)) { - if (!missing(max_rows)){ - # If maximum rows to be displayed is greater than number of rows return all rows - if (max_rows + start_row > nrow(out)){ - if(ncol(out) == 1){ - #for data frames with 1 col use slice because out[1:max_rows, ] will return a vector - rnames <- row.names(out)[start_row:nrow(out)] - out <- as.data.frame(dplyr::slice(out,start_row:nrow(out))) - row.names(out) <- rnames - } - else { - out <- out[start_row:nrow(out), ] - } - } - else{ - #for data frames with 1 col use slice because out[1:max_rows, ] will return a vector - if(ncol(out) == 1){ - rnames <- row.names(out)[start_row:(start_row + max_rows - 1)] - out <- as.data.frame(dplyr::slice(out,start_row:(start_row + max_rows - 1))) - row.names(out) <- rnames - } - # -1 after rows as start row is added and is not 0 based - else { - out <- out[start_row:(start_row + max_rows - 1), ] - } - } - } - else{ - #for data frames with 1 col use slice because out[1:max_rows, ] will return a vector - if(ncol(out) == 1){ - rnames <- row.names(out)[start_row:nrow(out)] - out <- as.data.frame(dplyr::slice(out,start_row:nrow(out))) - row.names(out) <- rnames - } - else { - out <- out[start_row:nrow(out), ] - } - } - } - else if(!missing(max_rows) && max_rows < nrow(out)) { - #for data frames with 1 col use slice because out[1:max_rows, ] will return a vector - if(ncol(out) == 1){ - rnames <- row.names(out)[1:max_rows] - out <- as.data.frame(dplyr::slice(out,1:max_rows)) - row.names(out) <- rnames - } else { - out <- out[1:max_rows, ] - } - } - - if(convert_to_character) { - decimal_places = self$get_variables_metadata(property = signif_figures_label, column = names(out), error_if_no_property = FALSE, use_column_selection = use_column_selection) - scientific_notation = self$get_variables_metadata(property = scientific_label, column = names(out), error_if_no_property = FALSE) - return(convert_to_character_matrix(data = out, format_decimal_places = TRUE, decimal_places = decimal_places, is_scientific = scientific_notation)) - } - else return(out) - } - else { - return(reshape2::melt(self$get_data_frame(include_hidden_columns = include_hidden_columns, use_current_filter = use_current_filter, filter_name = filter_name), ...)) - } -} -) - -# As a temp fix to rlink crashing here we access private$data directly -DataSheet$set("public", "get_variables_metadata", function(data_type = "all", convert_to_character = FALSE, property, column, error_if_no_property = TRUE, direct_from_attributes = FALSE, use_column_selection = TRUE) { - #if(update) self$update_variables_metadata() - if(direct_from_attributes) { - #if(missing(property)) return(attributes(self$get_columns_from_data(column, use_current_filter = FALSE))) - if(missing(property)) return(attributes(private$data[[column]])) - #else return(attr(self$get_columns_from_data(column, use_current_filter = FALSE), property)) - else return(attr(private$data[[column]], property)) - } - # special case of getting "class" property which isn't always stored in attributes - else if(!missing(property) && length(property == 1) && property == data_type_label) { - if(missing(column)) column <- names(private$data) - #if(missing(column)) column <- self$get_column_names() - out <- sapply(private$data[column], class) - out <- sapply(out, function(x) paste(unlist(x), collapse = ",")) - return(as.vector(out)) - } - else { - out <- list() - #curr_data <- self$get_data_frame(use_current_filter = FALSE) - if(missing(column)) { - curr_data <- private$data - cols <- names(curr_data) - if(self$column_selection_applied()) cols <- self$current_column_selection - } - else { - cols <- column - if(self$column_selection_applied()) cols <- self$current_column_selection - curr_data <- private$data[column] - } - for (i in seq_along(cols)) { - col <- curr_data[[cols[i]]] - ind <- which(names(attributes(col)) == "levels") - if(length(ind) > 0) col_attributes <- attributes(col)[-ind] - else col_attributes <- attributes(col) - if(is.null(col_attributes)) col_attributes <- list() - col_attributes[[data_type_label]] <- class(col) - for(j in seq_along(col_attributes)) { - att_name <- names(col_attributes)[j] - if(att_name == labels_label) { - num_labels <- length(col_attributes[[att_name]]) - max_labels <- min(max_labels_display, num_labels) - col_attributes[[att_name]] <- paste(names(col_attributes[[att_name]])[1:max_labels], "=", col_attributes[[att_name]][1:max_labels], collapse = ", ") - if(num_labels > max_labels) col_attributes[[att_name]] <- paste0(col_attributes[[att_name]], "...") - } - else if(is.list(col_attributes[[att_name]]) || length(col_attributes[[att_name]]) > 1) col_attributes[[att_name]] <- paste(unlist(col_attributes[[att_name]]), collapse = ",") - # TODO Possible alternative to include names of list - # TODO See how to have data frame properly containing lists - #if(is.list(col_attributes[[att_name]]) || length(col_attributes[[att_name]]) > 1) col_attributes[[att_name]] <- paste(names(unlist(col_attributes[[att_name]])), unlist(col_attributes[[att_name]]), collapse = ",") - } - #if(is.null(col_attributes)) { - # col_attributes <- data.frame(class = NA) - #} - col_attributes <- data.frame(col_attributes, stringsAsFactors = FALSE) - out[[i]] <- col_attributes - } - #RLink crashes with bind_rows for some data frames with ~50+ columns - #rbind.fill safer alternative currently - out <- plyr::rbind.fill(out) - out <- as.data.frame(out) - if(all(c(name_label, label_label) %in% names(out))) out <- out[c(c(name_label, label_label), sort(setdiff(names(out), c(name_label, label_label))))] - else if(name_label %in% names(out)) out <- out[c(name_label, sort(setdiff(names(out), name_label)))] - #row.names(out) <- self$get_column_names() - row.names(out) <- cols - if(data_type != "all") { - if(data_type == "numeric") { - out <- out[out[[data_type_label]] %in% c("numeric", "integer"), ] - } - else { - out <- out[out[[data_type_label]] == data_type, ] - } - } - not_found <- FALSE - if(!missing(property)) { - if(!property %in% names(out)) { - if(error_if_no_property) stop(property, " not found in variables metadata") - not_found <- TRUE - } - if(!missing(column)) { - #if(!all(column %in% self$get_column_names())) stop(column, " not found in data") - if(!all(column %in% names(private$data))) stop(column, " not found in data") - if(not_found) out <- rep(NA, length(column)) - else out <- out[column, property] - } - else { - #if(not_found) out <- rep(NA, length(self$get_column_names())) - if(not_found) out <- rep(NA, ncol(private$data)) - else out <- out[, property] - } - } - if(is.data.frame(out)) row.names(out) <- NULL - #TODO get convert_to_character_matrix to work on vectors - if(convert_to_character && missing(property)) return(convert_to_character_matrix(out, FALSE)) - else return(out) - } -} -) - -DataSheet$set("public", "get_column_data_types", function(columns) { - if(missing(columns)) return(as.vector(sapply(private$data, function(x) paste(class(x), collapse = ",")))) - else return(as.vector(sapply(private$data[columns], function(x) paste(class(x), collapse = ","), USE.NAMES = FALSE))) -} -) - -DataSheet$set("public", "get_column_labels", function(columns) { - if(missing(columns)) return(as.vector(sapply(private$data, function(x) paste(attr(x, "label"), collapse = ",")))) - else return(as.vector(sapply(private$data[columns], function(x) paste(attr(x, "label"), collapse = ","), USE.NAMES = FALSE))) -} -) - -DataSheet$set("public", "get_data_frame_label", function(use_current_filter = FALSE) { - return(attr(self$get_data_frame(use_current_filter = use_current_filter), "label")) -} -) - -DataSheet$set("public", "clear_variables_metadata", function() { - for(column in self$get_data_frame(use_current_filter = FALSE)) { - for(name in names(attributes(column))) { - if(!name %in% c(data_type_label, data_name_label)) attr(self, name) <- NULL - } - } - self$add_defaults_variables_metadata(self$get_column_names()) -} -) - -DataSheet$set("public", "get_metadata", function(label, include_calculated = TRUE, excluded_not_for_display = TRUE) { - curr_data <- self$get_data_frame(use_current_filter = FALSE) - n_row <- self$get_data_frame_length(use_current_filter = TRUE) #this is to avoid an eventual bug if we consider using curr_data <- self$get_data_frame(use_current_filter = TRUE) - if(missing(label)) { - if(include_calculated) { - #Must be private$data because assigning attribute to data field - attr(curr_data, row_count_label) <- n_row - attr(curr_data, column_count_label) <- ncol(curr_data) - } - if(excluded_not_for_display) { - ind <- which(names(attributes(curr_data)) %in% c("names", "row.names")) - if(length(ind) > 0) out <- attributes(curr_data)[-ind] - } - else out <- attributes(curr_data) - return(out) - } - else { - if(label %in% names(attributes(curr_data))) return(attributes(curr_data)[[label]]) - else if(label == row_count_label) return(n_row) - else if(label == column_count_label) return(ncol(curr_data)) - else return("") - } -} -) - -DataSheet$set("public", "get_changes", function() { - return(private$changes) -} -) - -DataSheet$set("public", "get_calculations", function() { - return(private$calculations) -} -) - -DataSheet$set("public", "get_calculation_names", function(as_list = FALSE, excluded_items = c()) { - out = names(private$calculations) - if(length(excluded_items) > 0) { - ex_ind = which(out %in% excluded_items) - if(length(ex_ind) != length(excluded_items)) warning("Some of the excluded_items were not found in the list of calculations") - if(length(ex_ind) > 0) out = out[-ex_ind] - } - if(!as_list) { - return(out) - } - lst = list() - lst[[self$get_metadata(data_name_label)]] <- out - return(lst) -} -) - -DataSheet$set("public", "get_scalars", function() { - out <- - private$scalars[self$get_scalar_names()] - return(out) -} -) - -DataSheet$set("public", "get_scalar_names", function(as_list = FALSE, excluded_items = c(),...) { - out <- get_data_book_scalar_names(scalar_list = private$scalars, - as_list = as_list, - list_label= self$get_metadata(data_name_label) ) - return(out) -} -) - -DataSheet$set("public", "get_scalar_value", function(scalar_name) { - if(missing(scalar_name)) stop(stop("scalar_name must be specified.")) - return(private$scalars[[scalar_name]]) -} -) - -DataSheet$set("public", "add_scalar", function(scalar_name = "", scalar_value) { - if(missing(scalar_name)) scalar_name <- next_default_item("scalar", names(private$scalars)) - if(scalar_name %in% names(private$scalars)) warning("A scalar called", scalar_name, "already exists. It will be replaced.") - private$scalars[[scalar_name]] <- scalar_value - self$append_to_metadata(scalar, private$scalars) - self$append_to_changes(list(Added_scalar, scalar_name)) - cat(paste("Scalar name: ", scalar_name), - paste("Value: ", private$scalars[[scalar_name]]), - sep = "\n") -} -) - -DataSheet$set("public", "add_columns_to_data", function(col_name = "", col_data, use_col_name_as_prefix = FALSE, hidden = FALSE, before, adjacent_column = "", num_cols, require_correct_length = TRUE, keep_existing_position = TRUE) { - # Save the current state to undo_history before making modifications - self$save_state_to_undo_history() - - # Column name must be character - if(!is.character(col_name)) stop("Column name must be of type: character") - if(missing(num_cols)) { - if(missing(col_data)) stop("One of num_cols or col_data must be specified.") - if(!missing(col_data) && (is.matrix(col_data) || is.data.frame(col_data))) { - num_cols = ncol(col_data) - } - else num_cols = 1 - if(tibble::is_tibble(col_data)) col_data <- data.frame(col_data) - } - else { - if(missing(col_data)) col_data = replicate(num_cols, rep(NA, self$get_data_frame_length())) - else { - if(length(col_data) != 1) stop("col_data must be a vector/matrix/data.frame of correct length or a single value to be repeated.") - col_data = replicate(num_cols, rep(col_data, self$get_data_frame_length())) - } - } - if( col_name != "" && (length(col_name) != 1) && (length(col_name) != num_cols) ) stop("col_name must be a character or character vector with the same length as the number of new columns") - if(col_name == "") { - if(!is.null(colnames(col_data)) && length(colnames(col_data)) == num_cols) { - col_name = colnames(col_data) - } - else { - col_name = "X" - use_col_name_as_prefix = TRUE - } - } - - if(length(col_name) != num_cols && (num_cols == 1 || length(col_name) == 1)) { - use_col_name_as_prefix = TRUE - } else use_col_name_as_prefix = FALSE - - replaced <- FALSE - previous_length = self$get_column_count() - if(adjacent_column != "" && !adjacent_column %in% self$get_column_names()) stop(adjacent_column, "not found in the data") - - new_col_names <- c() - for(i in 1:num_cols) { - if(num_cols == 1) { - curr_col = col_data - } - else curr_col = col_data[,i] - if(is.matrix(curr_col) || is.data.frame(curr_col)) curr_col = curr_col[,1] - if(self$get_data_frame_length() %% length(curr_col) != 0) { - if(require_correct_length) stop("Length of new column must be divisible by the length of the data frame") - else curr_col <- rep(curr_col, length.out = self$get_data_frame_length()) - } -print(use_col_name_as_prefix) - if(use_col_name_as_prefix) curr_col_name = self$get_next_default_column_name(col_name) - else curr_col_name = col_name[i] - - curr_col_name <- make.names(iconv(curr_col_name, to = "ASCII//TRANSLIT", sub = ".")) - new_col_names <- c(new_col_names, curr_col_name) - if(curr_col_name %in% self$get_column_names()) { - message(paste("A column named", curr_col_name, "already exists. The column will be replaced in the data")) - self$append_to_changes(list(Replaced_col, curr_col_name)) - replaced <- TRUE - } - else self$append_to_changes(list(Added_col, curr_col_name)) - private$data[[curr_col_name]] <- curr_col - self$data_changed <- TRUE - } - self$add_defaults_variables_metadata(new_col_names) - - # If replacing existing columns and not repositioning them, or before and adjacent_column column positioning parameters are missing - # then do not reposition. - if((replaced && keep_existing_position) || (missing(before) && adjacent_column == "")) return() - - # Get the adjacent position to be used in appending the new column names - if(before) { - if(adjacent_column == "") adjacent_position <- 0 - else adjacent_position <- which(self$get_column_names(use_current_column_selection =FALSE) == adjacent_column) - 1 - } else { - if(adjacent_column == "") adjacent_position <- self$get_column_count() - else adjacent_position <- which(self$get_column_names(use_current_column_selection =FALSE) == adjacent_column) - } - # Replace existing names with empty placeholders. Maintains the indices - temp_all_col_names <- replace(self$get_column_names(use_current_column_selection = FALSE), self$get_column_names(use_current_column_selection = FALSE) %in% new_col_names, "") - # Append the newly added column names after the set position - new_col_names_order <- append(temp_all_col_names, new_col_names, adjacent_position) - # Remove all empty characters placeholders to get final reordered column names - new_col_names_order <- new_col_names_order[! new_col_names_order == ""] - # Only do reordering if the column names order differ -if(!all(self$get_column_names(use_current_column_selection = FALSE) == new_col_names_order)) self$reorder_columns_in_data(col_order=new_col_names_order) -} -) - -#A bug in sjPlot requires removing labels when a factor column already has labels, using remove_labels for this if needed. -DataSheet$set("public", "get_columns_from_data", function(col_names, force_as_data_frame = FALSE, use_current_filter = TRUE, use_column_selection = TRUE, remove_labels = FALSE, drop_unused_filter_levels = FALSE) { - if(missing(col_names)) stop("no col_names to return") - #if(!all(col_names %in% self$get_column_names())) stop("Not all column names were found in data") - if(!all(col_names %in% names(private$data))) stop("Not all column names were found in data") - - if(length(col_names)==1) { - if(force_as_data_frame) { - dat <- self$get_data_frame(use_current_filter = use_current_filter, use_column_selection = use_column_selection, drop_unused_filter_levels = drop_unused_filter_levels)[col_names] - if(remove_labels) { - for(i in seq_along(dat)) { - if(!is.numeric(dat[[i]])) attr(dat[[i]], "labels") <- NULL - } - } - return(dat) - } - else { - dat <- self$get_data_frame(use_current_filter = use_current_filter, use_column_selection = use_column_selection, drop_unused_filter_levels = drop_unused_filter_levels)[[col_names]] - if(remove_labels && !is.numeric(dat)) attr(dat, "labels") <- NULL - return(dat) - } - } - else { - dat <- self$get_data_frame(use_current_filter = use_current_filter, use_column_selection = use_column_selection, drop_unused_filter_levels = drop_unused_filter_levels)[col_names] - if(remove_labels) { - for(i in seq_along(dat)) { - if(!is.numeric(dat[[i]])) attr(dat[[i]], "labels") <- NULL - } - } - return(dat) - } -} -) - -DataSheet$set("public", "anova_tables", function(x_col_names, y_col_name, signif.stars = FALSE, sign_level = FALSE, means = FALSE) { - if(missing(x_col_names) || missing(y_col_name)) stop("Both x_col_names and y_col_names are required") - if(sign_level || signif.stars) message("This is no longer descriptive") - if(sign_level) end_col = 5 else end_col = 4 - for (i in seq_along(x_col_names)) { - mod <- lm(formula = as.formula(paste0("as.numeric(", as.name(y_col_name), ") ~ ", as.name(x_col_names[i]))), data = self$get_data_frame()) - cat("ANOVA table: ", y_col_name, " ~ ", x_col_names[i], "\n", sep = "") - print(anova(mod)[1:end_col], signif.stars = signif.stars) - cat("\n") - if(means) (print(model.tables(aov(mod), type = "means"))) - } -} -) - -DataSheet$set("public", "cor", function(x_col_names, y_col_name, use = "everything", method = c("pearson", "kendall", "spearman")) { - x <- self$get_columns_from_data(x_col_names, force_as_data_frame = TRUE) - y <- self$get_columns_from_data(y_col_name) - x <- sapply(x, as.numeric) - y <- as.numeric(y) - results <- cor(x = x, y = y, use = use, method = method) - dimnames(results)[[2]] <- y_col_name - cat("Correlations:\n") - return(t(results)) -} -) - -DataSheet$set("public", "update_selection", function(new_values, column_selection_name = NULL) { - if (missing(new_values)) stop("new_values is required") - if (missing(column_selection_name)) stop("column_selection_name is required") - - column_selection_obj <- private$column_selections[[column_selection_name]] - - if (is.null(column_selection_obj)) { - stop("No column selection found with the name: ", column_selection_name) - } - - # Update conditions in the column selection with new values - updated_conditions <- lapply(column_selection_obj$conditions, function(condition) { - # Check if the parameters exist and replace them with new values - if ("parameters" %in% names(condition)) { - condition$parameters$x <- new_values - } - return(condition) - }) - - # Update the column selection object with the new conditions - column_selection_obj$conditions <- updated_conditions - private$column_selections[[column_selection_name]] <- column_selection_obj - - # Optionally, mark data as changed - self$data_changed <- TRUE - - message("Column selection '", column_selection_name, "' updated successfully with new values.") -}) - - -DataSheet$set("public", "rename_column_in_data", function(curr_col_name = "", new_col_name = "", label = "", type = "single", .fn, .cols = everything(), new_column_names_df, new_labels_df, ...) { - curr_data <- self$get_data_frame(use_current_filter = FALSE, use_column_selection = FALSE) - - # Save the current state to undo_history before making modifications - self$save_state_to_undo_history() - - # Column name must be character - if (type == "single") { - if (new_col_name != curr_col_name) { - if (new_col_name %in% names(curr_data)) { - stop("Cannot rename this column. A column named: ", new_col_name, " already exists in the data.") - } - if (!is.character(curr_col_name)) { - stop("Current column name must be of type: character") - } else if (!(curr_col_name %in% names(curr_data))) { - stop(paste0("Cannot rename column: ", curr_col_name, ". Column was not found in the data.")) - } else if (!is.character(new_col_name)) { - stop("New column name must be of type: character") - } else { - if (sum(names(curr_data) == curr_col_name) > 1) { - # Should never happen since column names must be unique - warning("Multiple columns have name: '", curr_col_name, "'. All such columns will be renamed.") - } - # remove key - get_key <- self$get_variables_metadata() %>% dplyr::filter(Name == curr_col_name) - if (!is.null(get_key$Is_Key)){ - if (!is.na(get_key$Is_Key) && get_key$Is_Key){ - active_keys <- self$get_keys() - keys_to_delete <- which(grepl(curr_col_name, active_keys)) - keys_to_delete <- purrr::map_chr(.x = keys_to_delete, .f = ~names(active_keys[.x])) - purrr::map(.x = keys_to_delete, .f = ~self$remove_key(key_name = names(active_keys[.x]))) - } - } - # Need to use private$data here because changing names of data field - names(private$data)[names(curr_data) == curr_col_name] <- new_col_name - - column_names <- self$get_column_names() - - if (anyNA(column_names)) { - column_names[is.na(column_names)] <- new_col_name - } else { - column_names <- new_col_name - } - - self$update_selection(column_names, private$.current_column_selection$name) - if(any(c("sfc", "sfc_MULTIPOLYGON") %in% class(private$data[[curr_col_name]]))){ - # Update the geometry column reference - sf::st_geometry(private$data) <- new_col_name - } - names(private$data)[names(private$data) == curr_col_name] <- new_col_name - - self$append_to_variables_metadata(new_col_name, name_label, new_col_name) - # TODO decide if we need to do these 2 lines - self$append_to_changes(list(Renamed_col, curr_col_name, new_col_name)) - self$data_changed <- TRUE - self$variables_metadata_changed <- TRUE - } - } - if (label != "") { - self$append_to_variables_metadata(col_name = new_col_name, property = "label", new_val = label) - self$variables_metadata_changed <- TRUE - } - } else if (type == "multiple") { - if (!missing(new_column_names_df)) { - new_col_names <- new_column_names_df[, 1] - cols_changed_index <- which(names(private$data) %in% new_column_names_df[, 2]) - curr_col_names <- names(private$data) - curr_col_names[cols_changed_index] <- new_col_names - if(any(duplicated(curr_col_names))) stop("Cannot rename columns. Column names must be unique.") - names(private$data)[cols_changed_index] <- new_col_names - - column_names <- self$get_column_names() - - if (anyNA(column_names)) { - column_names[is.na(column_names)] <- new_col_names - } else { - column_names <- new_col_names - } - - self$update_selection(column_names, private$.current_column_selection$name) - - if(any(c("sfc", "sfc_MULTIPOLYGON") %in% class(private$dataprivate$data)[cols_changed_index])){ - # Update the geometry column reference - sf::st_geometry(private$data) <- new_col_names - } - names(private$data)[cols_changed_index] <- new_col_names - - for (i in seq_along(cols_changed_index)) { - self$append_to_variables_metadata(new_col_names[i], name_label, new_col_names[i]) - } - } - if (!missing(new_labels_df)) { - new_labels <- new_labels_df[, 1] - new_labels_index <- new_labels_df[, 2] - for (i in seq_along(new_labels)) { - if (isTRUE(new_labels[i] != "")) { - self$append_to_variables_metadata(col_name = names(private$data)[new_labels_index[i]], property = "label", new_val = new_labels[i]) - } - } - } - self$data_changed <- TRUE - self$variables_metadata_changed <- TRUE - } else if (type == "rename_with") { - if (missing(.fn)) stop(.fn, "is missing with no default.") - curr_col_names <- names(curr_data) - column_names <- self$get_column_names() - private$data <- curr_data |> - dplyr::rename_with( - .fn = .fn, - .cols = {{ .cols }}, ... - ) - - new_col_names <- names(private$data) - if (!all(new_col_names %in% curr_col_names)) { - new_col_names <- new_col_names[!(new_col_names %in% curr_col_names)] - for (i in seq_along(new_col_names)) { - self$append_to_variables_metadata(new_col_names[i], name_label, new_col_names[i]) - } - - column_names <- self$get_column_names() - if (anyNA(column_names)) { - column_names[is.na(column_names)] <- new_col_names - } else { - column_names <- new_col_names - } - - self$update_selection(column_names, private$.current_column_selection$name) - - self$data_changed <- TRUE - self$variables_metadata_changed <- TRUE - } - } else if (type == "rename_labels"){ - # to rename column labels. Here, instead of renaming a column name, we're giving new values in a column. - curr_metadata <- self$get_variables_metadata() - curr_col_names <- names(curr_data %>% dplyr::select(.cols)) - - # create a new data frame containing the changes - but only apply to those that we actually plan to change for efficiency. - new_metadata <- curr_metadata |> - dplyr::filter(Name %in% curr_col_names) %>% - dplyr::mutate( - dplyr::across( - label, - ~ .fn(., ...) - ) - ) - - if(self$column_selection_applied()) self$remove_current_column_selection() - # apply the changes - new_label_names <- new_metadata[!("Name" %in% curr_col_names)]$label - for (i in seq_along(new_label_names)) { - self$append_to_variables_metadata(curr_col_names[i], property = "label", new_val = new_label_names[i]) - } - self$data_changed <- TRUE - self$variables_metadata_changed <- TRUE - } -}) - - - -DataSheet$set("public", "remove_columns_in_data", function(cols=c(), allow_delete_all = FALSE) { - # Save the current state to undo_history before making modifications - self$save_state_to_undo_history() - - if(length(cols) == self$get_column_count()) { - if(allow_delete_all) { - warning("You are deleting all columns in the data frame.") - } else { - stop("Cannot delete all columns through this function. Use delete_dataframe to delete the data.") - } - } - for(col_name in cols) { - # Column name must be character - if(!is.character(col_name)) { - stop("Column name must be of type: character") - } else if (!(col_name %in% self$get_column_names())) { - stop(paste0("Column :'", col_name, " was not found in the data.")) - } else { - get_key <- self$get_variables_metadata() %>% dplyr::filter(Name == col_name) - if (!is.null(get_key$Is_Key)){ - if (!is.na(get_key$Is_Key) && get_key$Is_Key){ - active_keys <- self$get_keys() - keys_to_delete <- which(grepl(col_name, active_keys)) - keys_to_delete <- purrr::map_chr(.x = keys_to_delete, .f = ~names(active_keys[.x])) - purrr::map(.x = keys_to_delete, .f = ~self$remove_key(key_name = names(active_keys[.x]))) - } - } - private$data[[col_name]] <- NULL - } - self$append_to_changes(list(Removed_col, cols)) - self$data_changed <- TRUE - self$variables_metadata_changed <- TRUE -} -} -) - -DataSheet$set("public", "replace_value_in_data", function(col_names, rows, old_value, old_is_missing = FALSE, start_value = NA, end_value = NA, new_value, new_is_missing = FALSE, closed_start_value = TRUE, closed_end_value = TRUE, locf = FALSE, from_last = FALSE) { - curr_data <- self$get_data_frame(use_current_filter = FALSE) - self$save_state_to_undo_history() - # Column name must be character - if(!all(is.character(col_names))) stop("Column name must be of type: character") - if (!all(col_names %in% names(curr_data))) stop("Cannot find all columns in the data.") - if(!missing(rows) && !all(rows %in% row.names(curr_data))) stop("Not all rows found in the data.") - if(!is.na(start_value) && !is.numeric(start_value)) stop("start_value must be numeric") - if(!is.na(end_value) && !is.numeric(end_value)) stop("start_value must be numeric") - if(old_is_missing) { - if(!missing(old_value)) stop("Specify only one of old_value and old_is_missing") - old_value <- NA - } - if(new_is_missing) { - if(!missing(new_value)) stop("Specify only one of new_value and new_is_missing") - new_value <- NA - } - data_row_names <- row.names(curr_data) - filter_applied <- self$filter_applied() - if(filter_applied) curr_filter <- self$current_filter - for(col_name in col_names) { - done = FALSE - str_data_type <- self$get_variables_metadata(property = data_type_label, column = col_name) - curr_column <- self$get_columns_from_data(col_name, use_current_filter = FALSE) - if(locf){ - my_data <- zoo::na.locf(curr_column, fromLast = from_last, na.rm = FALSE) - } - else{ - if("factor" %in% str_data_type) { - if(!missing(rows)) { - if(!is.na(new_value) && !new_value %in% levels(self$get_columns_from_data(col_name, use_current_filter = FALSE))) { - stop("new_value must be an existing level of the factor column.") - } - replace_rows <- (data_row_names %in% rows) - } - else { - if(filter_applied) stop("Cannot replace values in a factor column when a filter is applied. Remove the filter to do this replacement.") - if(is.na(old_value)) { - if(!is.na(new_value) && !new_value %in% levels(self$get_columns_from_data(col_name, use_current_filter = FALSE))) stop(new_value, " is not a level of this factor. Add this as a level of the factor before using replace.") - replace_rows <- (is.na(curr_column)) - } - else { - self$edit_factor_level(col_name = col_name, old_level = old_value, new_level = new_value) - done = TRUE - } - } - } - else if(str_data_type == "integer" || str_data_type == "numeric") { - if(!is.na(new_value)) { - if(!is.numeric(new_value)) stop(col_name, " is a numeric/integer column. new_value must be of the same type") - if(str_data_type == "integer" && !(new_value %% 1 == 0)) stop(col_name, " is an integer column. new_value must be an integer") - } - if(!missing(rows)) { - replace_rows <- (data_row_names %in% rows) - if(!missing(old_value) || !is.na(start_value) || !is.na(end_value)) warning("old_value, start_value and end_value will be ignored because rows has been specified.") - } - else { - if(!is.na(start_value) || !is.na(end_value)) { - if(!missing(old_value)) warning("old_value will be ignored because start_value or end_value has been specified.") - if(closed_start_value) start_value_ineq = match.fun(">=") - else start_value_ineq = match.fun(">") - if(closed_end_value) end_value_ineq = match.fun("<=") - else end_value_ineq = match.fun("<") - - if(!is.na(start_value) && is.na(end_value)) { - replace_rows <- start_value_ineq(curr_column, start_value) - } - else if(is.na(start_value) && !is.na(end_value)) { - replace_rows <- end_value_ineq(curr_column, end_value) - } - else if(!is.na(start_value) && !is.na(end_value)) { - replace_rows <- (start_value_ineq(curr_column,start_value) & end_value_ineq(curr_column, end_value)) - } - } - else { - if(is.na(old_value)) replace_rows <- (is.na(curr_column)) - else replace_rows <- (curr_column == old_value) - } - } - } - else if(str_data_type == "character") { - if(!missing(rows)) { - replace_rows <- (data_row_names %in% rows) - if(!missing(old_value)) warning("old_value will be ignored because rows has been specified.") - } - else { - if(is.na(old_value)) replace_rows <- (is.na(curr_column)) - else replace_rows <- (curr_column == old_value) - } - new_value <- as.character(new_value) - } - else if(str_data_type == "logical") { - #Removed because new columns are logical and we need to be able to type in new values - #if(!is.logical(new_value)) stop(col_name, " is a logical column. new_value must be a logical value") - if(!missing(rows)) { - replace_rows <- (data_row_names %in% rows) - if(!missing(old_value)) warning("old_value will be ignored because rows has been specified.") - } - else { - if(is.na(old_value)) replace_rows <- (is.na(curr_column)) - else replace_rows <- (curr_column == old_value) - } - } - #TODO add other data type cases - else { - if(!missing(rows)) { - replace_rows <- (data_row_names %in% rows) - if(!missing(old_value)) warning("old_value will be ignored because rows has been specified.") - } - else { - if(is.na(old_value)) replace_rows <- (is.na(curr_column)) - else replace_rows <- (curr_column == old_value) - } - } - - } - if(!done) { - if(locf){ - private$data[[col_name]] <- my_data - } - else{ - replace_rows[is.na(replace_rows)] <- FALSE - if(sum(replace_rows) > 0) { - if(filter_applied) { - replace_rows <- replace_rows & curr_filter - } - # Need private$data here as replacing values in data - - if(sum(replace_rows) > 0) private$data[[col_name]][replace_rows] <- new_value - else message("No values to replace in ", col_name) - } - else message("No values to replace in ", col_name) - } - - } - } - #TODO need to think what to add to changes - self$append_to_changes(list(Replaced_value, col_names)) - self$data_changed <- TRUE - self$variables_metadata_changed <- TRUE -} -) - -#reads passed clipboard data and saves it to selected data frame -DataSheet$set("public", "paste_from_clipboard", function(col_names, start_row_pos = 1, first_clip_row_is_header = FALSE, clip_board_text) { - - #get the clipboard text contents as a data frame - clip_tbl <- clipr::read_clip_tbl(x = clip_board_text, header = first_clip_row_is_header) - - #get the selected data frame - current_tbl <- self$get_data_frame(use_current_filter = FALSE) - - #check if copied data rows are more than current data rows - if( nrow(clip_tbl) > nrow(current_tbl) ){ - stop(paste("rows copied cannot be more than number of rows in the data frame.", - "Current data frame rows:", nrow(current_tbl), ". Copied rows:", nrow(clip_tbl)) ) - } - - - #if column names are missing then just add the clip data as new columns and quit function - if( missing(col_names) ){ - #append missing values if rows are less than the selected data frame. - #new column rows should be equal to existing column rows - if( nrow(clip_tbl) < nrow(current_tbl) ){ - empty_values_df <- data.frame(data = matrix(data = NA, nrow = ( nrow(current_tbl) - nrow(clip_tbl) ), ncol = ncol(clip_tbl) )) - names(empty_values_df) <- names(clip_tbl) - clip_tbl <- rbind(clip_tbl, empty_values_df) - } - new_col_names <- colnames(clip_tbl) - for(index in seq_along(new_col_names)){ - self$add_columns_to_data(col_name = new_col_names[index], col_data = clip_tbl[, index]) - } - return() - } - - #for existing column names - #check if number of copied columns and selected columns are equal - if(ncol(clip_tbl) != length(col_names)){ - stop(paste("number of columns are not the same.", - "Selected columns:", length(col_names), ". Copied columns:", ncol(clip_tbl)) ) - } - - - #check copied data integrity - for(index in seq_along(col_names)){ - col_data <- current_tbl[, col_names[index]] - #get column type of column from the current table using column name - col_type <- class(col_data) - #check copied data integrity based on the data type expected - if (is.factor(col_data)) { - #get all the factor levels of the selected column in the current data frame - expected_factor_levels <- levels(col_data) - #check if all copied data values are contained in the factor levels - #if any invalid is found. exit function - for(val in clip_tbl[,index]){ - if(!is.na(val) && !is.element(val,expected_factor_levels)){ - stop("Invalid column values. Level not found in factor") - } - }#end inner for loop - } else if( !(is.numeric(col_data) || is.logical(col_data) || is.character(col_data)) ) { - #clipr support above column types only. So pasting to a column not recognised by clipr may result to unpredictible results - #if not in any of above column types then exit function - stop( paste("Cannot paste into columns of type:", col_type) ) - }#end if - }#end outer for loop - - #replace values in the selected columns - for(index in seq_along(col_names)){ - #set the row positions and the values - rows_to_replace <- c(start_row_pos : (start_row_pos + nrow(clip_tbl) - 1 )) - new_values <- clip_tbl[,index] - - # Replace the old values with new values - for (i in seq_along(new_values)) { - # Replace each value one by one - self$replace_value_in_data(col_names = col_names[index], rows = rows_to_replace[i], new_value = new_values[i]) - } - - #rename header if first row of clip data is header. - if(first_clip_row_is_header){ - self$rename_column_in_data(curr_col_name = col_names[index], new_col_name = colnames(clip_tbl)[index]) - } - }#end for loop -} -)#end function - -DataSheet$set("public", "append_to_metadata", function(property, new_value = "") { - if(missing(property)) stop("property must be specified.") - - if (!is.character(property)) stop("property must be of type: character") - - attr(private$data, property) <- new_value - self$append_to_changes(list(Added_metadata, property, new_value)) - self$metadata_changed <- TRUE - # Not sure this is correct way to ensure unhidden data frames appear. - # Possibly better to modify the Grid Link - if(property == is_hidden_label) self$data_changed <- TRUE -} -) - -DataSheet$set("public", "append_to_variables_metadata", function(col_names, property, new_val = "") { - if (missing(property)) stop("property must be specified.") - if (!is.character(property)) stop("property must be a character") - if (!missing(col_names)) { - # if(!all(col_names %in% self$get_column_names())) stop("Not all of ", paste(col_names, collapse = ","), " found in data.") - if (!all(col_names %in% names(private$data))) stop("Not all of ", paste(col_names, collapse = ","), " found in data.") - for (curr_col in col_names) { - #see comments in PR #7247 to understand why ' property == labels_label && new_val == "" ' check was added - #see comments in issue #7337 to understand why the !is.null(new_val) check was added. - if (((property == labels_label && any(new_val == "")) || (property == colour_label && new_val == -1)) && !is.null(new_val)) { - #reset the column labels or colour property - attr(private$data[[curr_col]], property) <- NULL - } else { - attr(private$data[[curr_col]], property) <- new_val - } - self$append_to_changes(list(Added_variables_metadata, curr_col, property)) - } - } else { - for (col_name in self$get_column_names()) { - #see comments in PR #7247 to understand why ' property == labels_label && new_val == "" ' check was added - #see comments in issue #7337 to understand why the !is.null(new_val) check was added. - if (((property == labels_label && any(new_val == "")) || (property == colour_label && new_val == -1)) && !is.null(new_val)) { - #reset the column labels or colour property - attr(private$data[[col_name]], property) <- NULL - } else { - attr(private$data[[col_name]], property) <- new_val - } - } - self$append_to_changes(list(Added_variables_metadata, property, new_val)) - } - self$variables_metadata_changed <- TRUE - self$data_changed <- TRUE -}) - -DataSheet$set("public", "append_to_changes", function(value) { - - #functionality disabled temporarily - #see PR #8465 and issue #7161 comments - - #if(missing(value)) { - # stop("value arguements must be specified.") - #}else { - #see comments in issue #7161 that explain more about why list() was used - #primary reason was because of performance when it comes to wide data sets - #private$changes[[length(private$changes)+1]] <- value - #private$changes<-list(private$changes, value) - #} -} -) - -DataSheet$set("public", "is_metadata", function(str) { - return(str %in% names(attributes(private$data))) -} -) - -DataSheet$set("public", "is_variables_metadata", function(str, col, return_vector = FALSE) { - if(str == data_type_label) return(TRUE) - if(missing(col)) { - dat <- self$get_data_frame(use_current_filter = FALSE) - return(any(sapply(dat, function(x) str %in% names(attributes(x))), na.rm = TRUE)) - } - else { - out <- sapply(col, function(x) str %in% names(attributes(self$get_columns_from_data(x, use_current_filter = FALSE)))) - if(return_vector) return(out) - else return(all(out)) - } -} -) - -DataSheet$set("public", "add_defaults_meta", function() { - if(!self$is_metadata(is_calculated_label)) self$append_to_metadata(is_calculated_label, FALSE) - if(!self$is_metadata(is_hidden_label)) self$append_to_metadata(is_hidden_label, FALSE) - if(!self$is_metadata(label_label)) self$append_to_metadata(label_label, "") -} -) - -DataSheet$set("public", "add_defaults_variables_metadata", function(column_names) { - for(column in column_names) { - self$append_to_variables_metadata(column, name_label, column) - if(!self$is_variables_metadata(is_hidden_label, column)) { - self$append_to_variables_metadata(column, property = is_hidden_label, new_val = FALSE) - } - if(!self$is_variables_metadata(label_label, column)) { - self$append_to_variables_metadata(column, label_label, "") - } - if(!self$is_variables_metadata(scientific_label, column)) { - self$append_to_variables_metadata(column, scientific_label, FALSE) - } - if(!self$is_variables_metadata(signif_figures_label, column) || is.na(self$get_variables_metadata(property = signif_figures_label, column = column))) { - self$append_to_variables_metadata(column, signif_figures_label, get_default_significant_figures(self$get_columns_from_data(column, use_current_filter = FALSE, use_column_selection = FALSE))) - } - if(self$is_variables_metadata(labels_label, column)) { - curr_labels <- self$get_variables_metadata(property = labels_label, column = column, direct_from_attributes = TRUE) - if(!is.numeric(curr_labels)) { - numeric_labs <- as.numeric(curr_labels) - if(any(is.na(numeric_labs))) { - warning("labels attribute of non numeric values is not currently supported. labels will be removed from column: ", column, " to prevent compatibility issues. removed labels: ", curr_labels) - self$append_to_variables_metadata(column, labels_label, NULL) - } - else { - adjusted_labels <- numeric_labs - names(adjusted_labels) <- names(curr_labels) - self$append_to_variables_metadata(column, labels_label, adjusted_labels) - } - } - } - } -} -) - -DataSheet$set("public", "remove_rows_in_data", function(row_names) { - curr_data <- self$get_data_frame(use_current_filter = FALSE) - self$save_state_to_undo_history() - - if(!all(row_names %in% rownames(curr_data))) stop("Some of the row_names not found in data") - rows_to_remove <- which(rownames(curr_data) %in% row_names) - #Prefer not to use dplyr::slice as it produces a tibble - #tibbles remove row names e.g. for filtering - #but cannot use standard curr_data[-rows_to_remove, ] - #since it removes column attributes - - self$set_data(dplyr::slice(curr_data, -rows_to_remove, .preserve = TRUE)) - self$append_to_changes(list(Removed_row, row_names)) - #Added this line to fix the bug of having the variable names in the metadata changinng to NA - # This affects factor columns only - we need to find out why and how to solve it best - self$add_defaults_variables_metadata(self$get_column_names()) - self$data_changed <- TRUE -} -) - -DataSheet$set("public", "get_next_default_column_name", function(prefix) { - return(next_default_item(prefix = prefix, existing_names = self$get_column_names(use_current_column_selection = FALSE))) -} -) - -DataSheet$set("public", "reorder_columns_in_data", function(col_order) { - if (ncol(self$get_data_frame(use_current_filter = FALSE, use_column_selection = FALSE)) != length(col_order)) stop("Columns to order should be same as columns in the data.") - - # Save the current state to undo_history before making modifications - self$save_state_to_undo_history() - - if(is.numeric(col_order)) { - if(!(identical(sort(col_order), sort(as.numeric(1:ncol(data)))))) { - stop("Invalid column order") - } - } - else if(is.character(col_order)) { - if(!(dplyr::setequal(col_order,names(private$data)))) stop("Invalid column order") - } - else stop("column order must be a numeric or character vector") - old_metadata <- attributes(private$data) - self$set_data(private$data[ ,col_order]) - for(name in names(old_metadata)) { - if(!name %in% c("names", "class", "row.names")) { - self$append_to_metadata(name, old_metadata[[name]]) - } - } - self$append_to_changes(list(Col_order, col_order)) -} -) - -DataSheet$set("public", "insert_row_in_data", function(start_row, row_data = c(), number_rows = 1, before = FALSE) { - curr_data <- self$get_data_frame(use_current_filter = FALSE) - self$save_state_to_undo_history() - curr_row_names <- rownames(curr_data) - if (!start_row %in% curr_row_names) { - stop(paste(start_row, " not found in rows")) - } - row_position = which(curr_row_names == start_row) - row_data <- curr_data[0, ] - for(i in 1:number_rows) { - row_data[i, ] <- NA - } - #row_data <- data.frame(matrix(NA, nrow = number_rows, ncol = ncol(curr_data))) - #colnames(row_data) <- colnames(curr_data) - if(length(curr_row_names[!is.na(as.numeric(curr_row_names))]) > 0) { - rownames(row_data) <- max(as.numeric(curr_row_names), na.rm = TRUE) + 1:number_rows - } - else rownames(row_data) <- nrow(curr_data) + 1:(number_rows - 1) - old_attr <- attributes(private$data) - # Need to use rbind.fill (not bind_rows) because it preserves column attributes - if(before && row_position == 1) { - # This transfers attributes to new data so that they are kept after rbind.fill - # Only needed when row_data is first argument to rbind.fill - for(i in seq_along(row_data)) { - attributes(row_data[[i]]) <- attributes(curr_data[[i]]) - } - self$set_data(rbind.fill(row_data, curr_data)) - } - else if(!before && row_position == nrow(curr_data)) { - self$set_data(rbind.fill(curr_data, row_data)) - } - else { - if(before) { - self$set_data(plyr::rbind.fill(dplyr::slice(curr_data,(1:(row_position - 1))), row_data, dplyr::slice(curr_data,row_position:nrow(curr_data)))) - } - else { - self$set_data(plyr::rbind.fill(dplyr::slice(curr_data, (1:row_position)), row_data, dplyr::slice(curr_data,(row_position + 1):nrow(curr_data)))) - } - } - for(attr_name in names(old_attr)) { - if(!attr_name %in% c("names", "class", "row.names")) { - self$append_to_metadata(attr_name, old_attr[[attr_name]]) - } - } - self$append_to_changes(list(Inserted_row, number_rows)) - #Added this line to fix the bug of having the variable names in the metadata changinng to NA - # This affects factor columns only - we need to find out why and how to solve it best - self$add_defaults_variables_metadata(self$get_column_names()) - self$data_changed <- TRUE -} -) - -DataSheet$set("public", "get_data_frame_length", function(use_current_filter = FALSE) { - return(nrow(self$get_data_frame(use_current_filter = use_current_filter))) -} -) - -DataSheet$set("public", "get_factor_data_frame", function(col_name = "", include_levels = TRUE, include_NA_level = FALSE) { - if(!(col_name %in% self$get_column_names())) stop(col_name, " is not a column name,") - col_data <- self$get_columns_from_data(col_name, use_current_filter = TRUE) - if(!(is.factor(col_data))) stop(col_name, " is not a factor column") - - counts <- data.frame(table(col_data)) - counts <- plyr::rename(counts, replace = c("col_data" = "Label")) - counts[["Label"]] <- as.character(counts[["Label"]]) - counts[["Ord."]] <- 1:nrow(counts) - if(include_levels) { - if(self$is_variables_metadata(str = labels_label, col = col_name)) { - curr_levels <- self$get_variables_metadata(property = labels_label, column = col_name, direct_from_attributes = TRUE) - curr_levels <- data.frame(Label = names(curr_levels), Level = as.vector(curr_levels), stringsAsFactors = FALSE) - counts <- dplyr::left_join(counts, curr_levels, by = "Label") - } - else { - curr_levels <- counts[["Ord."]] - counts[["Level"]] <- curr_levels - } - counts <- counts[c("Ord.", "Label", "Level", "Freq")] - } - else counts <- counts[c("Ord.", "Label", "Freq")] - if(include_NA_level) { - missing_count <- sum(is.na(col_data)) - if(include_levels) counts[nrow(counts) + 1, ] <- c("-", "NA", "-", missing_count) - else counts[nrow(counts) + 1, ] <- c("-", "(NA)", missing_count) - } - return(counts) -} -) - -DataSheet$set("public", "get_column_factor_levels", function(col_name = "") { - if(!(col_name %in% self$get_column_names())) { - stop(col_name, " is not a column in", get_metadata(data_name_label)) - } - - if(!(is.factor(self$get_columns_from_data(col_name, use_current_filter = FALSE)))){ - stop(col_name, " is not a factor column") - } - - return(levels(self$get_columns_from_data(col_name, use_current_filter = FALSE))) -} -) - -DataSheet$set("public", "sort_dataframe", function(col_names = c(), decreasing = FALSE, na.last = TRUE, by_row_names = FALSE, row_names_as_numeric = TRUE) { - curr_data <- self$get_data_frame(use_current_filter = FALSE) - - # Check for missing or empty column names - if (missing(col_names) || length(col_names) == 0) { - if (by_row_names) { - row_names_sort <- if (row_names_as_numeric) as.numeric(row.names(curr_data)) else row.names(curr_data) - if (decreasing) self$set_data(arrange(curr_data, desc(row_names_sort))) - else self$set_data(arrange(curr_data, row_names_sort)) - } else { - message("No sorting to be done.") - } - } else { - if (by_row_names) warning("Cannot sort by columns and row names. Sorting will be done by given columns only.") - - if (decreasing) self$set_data(dplyr::arrange(curr_data, dplyr::across(dplyr::all_of(col_names), desc))) - else self$set_data(dplyr::arrange(curr_data, dplyr::across(dplyr::all_of(col_names)))) - } - self$data_changed <- TRUE -} -) - -DataSheet$set("public", "convert_column_to_type", function(col_names = c(), to_type, factor_values = NULL, set_digits, set_decimals = FALSE, keep_attr = TRUE, ignore_labels = FALSE, keep.labels = TRUE) { - if(!all(col_names %in% self$get_column_names())) stop("Some column names not found in the data") - - if(length(to_type) !=1 ) { - stop("to_type must be a character of length one") - } - - if(!(to_type %in% c("integer", "factor", "numeric", "character", "ordered_factor", "logical"))) { - stop(to_type, " is not a valid type to convert to") - } - - if(!is.null(factor_values) && !(factor_values %in% c("force_ordinals", "force_values"))) { - stop(factor_values, " must be either 'force_ordinals' or 'force_values'") - } - - for(col_name in col_names) { - curr_col <- self$get_columns_from_data(col_name, use_current_filter = FALSE) - if(keep_attr) { - tmp_attr <- get_column_attributes(curr_col) - } - if(!is.null(factor_values) && is.factor(curr_col) && to_type %in% c("integer", "numeric")) { - if(factor_values == "force_ordinals") new_col <- as.numeric(curr_col) - else if(factor_values == "force_values") new_col <- as.numeric(levels(curr_col))[curr_col] - } - else if(to_type %in% c("factor", "ordered_factor")) { - ordered <- (to_type == "ordered_factor") - # TODO This looks like it may not work if curr_col is not numeric. - # If this is not currently used anywhere possibly remove or modify. - if(set_decimals) curr_col <- round(curr_col, digits = set_digits) - if(ignore_labels) { - new_col <- make_factor(curr_col, ordered = ordered) - } - else { - if(self$is_variables_metadata(labels_label, col_name)) { - new_col <- sjlabelled::as_label(curr_col, add.non.labelled = TRUE) - # Adds "ordered" to the class in the same way as factor(). - # factor(ordered = TURE) is not used as this drops all attributes of x. - if(ordered) class(new_col) <- c("ordered", class(new_col)) - else class(new_col) <- class(new_col)[class(new_col) != "ordered"] - } - else { - new_col <- make_factor(curr_col, ordered = ordered) - if(is.numeric(curr_col) && !self$is_variables_metadata(labels_label, col_name)) { - labs <- sort(unique(curr_col)) - names(labs) <- labs - # temporary fix to issue of add_columns not retaining attributes of new columns - tmp_attr[[labels_label]] <- labs - } - } - } - } - else if(to_type == "integer") { - new_col <- as.integer(curr_col) - } - else if(to_type == "numeric") { - if(ignore_labels) { - if (is.factor(curr_col)) new_col <- as.numeric(levels(curr_col))[curr_col] - else new_col <- as.numeric(curr_col) - } - else { - if(self$is_variables_metadata(labels_label, col_name) && !is.numeric(curr_col)) { - #TODO WARNING: need to test this on columns of different types to check for strange behaviour - curr_labels <- self$get_variables_metadata(property = labels_label, column = col_name, direct_from_attributes = TRUE) - if(!all(curr_col %in% names(curr_labels))) { - additional_names <- sort(unique(na.omit(curr_col[!curr_col %in% names(curr_labels)]))) - additonal <- seq(max(curr_labels, na.rm = TRUE) + 1, length.out = length(additional_names)) - names(additonal) <- additional_names - curr_labels <- c(curr_labels, additonal) - # temporary fix to issue of add_columns not retaining attributes of new columns - tmp_attr[[labels_label]] <- curr_labels - } - new_col <- as.numeric(curr_labels[as.character(curr_col)]) - } - # This ensures that integer columns get type changed to numeric (not done by sjlabelled::as_numeric) - else if(is.integer(curr_col)) new_col <- as.numeric(curr_col) - else new_col <- sjlabelled::as_numeric(curr_col, keep.labels = keep.labels) - } - } - else if(to_type == "character") { - new_col <- sjmisc::to_character(curr_col) - } - else if(to_type == "logical") { - if(is.logical.like(curr_col)) new_col <- as.logical(curr_col) - else stop("Column is not numeric or contains values other than 0 and 1. Converting to logical would result in losing information.") - } - - self$add_columns_to_data(col_name = col_name, col_data = new_col) - - if(keep_attr) { - if(to_type %in% c("numeric", "integer") && signif_figures_label %in% names(tmp_attr) && is.na(tmp_attr[[signif_figures_label]])) { - tmp_attr[[signif_figures_label]] <- NULL - } - self$append_column_attributes(col_name = col_name, new_attr = tmp_attr) - } - } - self$data_changed <- TRUE - self$variables_metadata_changed <- TRUE -} -) - -DataSheet$set("public", "copy_columns", function(col_names = "") { - for(col_name in col_names){ - if(!(col_name %in% self$get_column_names())) { - stop(col_name, " is not a column in ", get_metadata(data_name_label)) - } - } - dat1 <- self$get_columns_from_data(col_names, use_current_filter = FALSE) - - for(name in col_names){ - names(dat1)[names(dat1) == name] <- self$get_next_default_column_name(prefix = paste(name, "copy", sep = "_" ) ) - } - - self$add_columns_to_data(col_name = names(dat1), col_data = dat1) - self$append_to_changes(list(Copy_cols, col_names)) -} -) - -DataSheet$set("public", "drop_unused_factor_levels", function(col_name) { - if(!col_name %in% self$get_column_names()) stop(paste(col_name,"not found in data.")) - col_data <- self$get_columns_from_data(col_name, use_current_filter = FALSE) - if(!is.factor(col_data)) stop(col_name, " is not a factor.") - level_counts <- table(col_data) - if(any(level_counts == 0)) { - if(self$is_variables_metadata(labels_label, col_name)) { - curr_labels <- self$get_variables_metadata(property = labels_label, column = col_name, direct_from_attributes = TRUE) - curr_labels <- curr_labels[names(level_counts[level_counts > 0])] - self$append_to_variables_metadata(property = labels_label, col_names = col_name, new_val = curr_labels) - col_data <- self$get_columns_from_data(col_name, use_current_filter = FALSE) - } - tmp_attr <- get_column_attributes(col_data) - self$add_columns_to_data(col_name, droplevels(col_data)) - self$append_column_attributes(col_name = col_name, new_attr = tmp_attr) - } -} -) - -DataSheet$set("public", "set_factor_levels", function(col_name, new_labels, new_levels, set_new_labels = TRUE) { - if(!col_name %in% self$get_column_names()) stop(col_name, " not found in data.") - col_data <- self$get_columns_from_data(col_name, use_current_filter = FALSE) - if(!is.factor(col_data)) stop(col_name, " is not a factor.") - old_labels <- levels(col_data) - if(length(new_labels) < length(old_labels)) stop("There must be at least as many new levels as current levels.") - if(!missing(new_levels) && anyDuplicated(new_levels)) stop("new levels must be unique") - # Must be private$data because setting an attribute - levels(private$data[[col_name]]) <- new_labels - - if(!missing(new_levels)) { - labels_list <- new_levels - names(labels_list) <- new_labels - self$append_to_variables_metadata(col_name, labels_label, labels_list) - } - else if(set_new_labels && self$is_variables_metadata(labels_label, col_name)) { - labels_list <- self$get_variables_metadata(property = labels_label, column = col_name, direct_from_attributes = TRUE) - names(labels_list) <- as.character(new_labels[1:length(old_labels)]) - if(length(new_labels) > length(old_lables)) { - extra_labels <- seq(from = max(labels_list) + 1, length.out = (length(new_labels) - length(old_labels))) - names(extra_labels) <- new_labels[!new_labels %in% names(labels_list)] - labels_list <- c(labels_list, extra_labels) - } - self$append_to_variables_metadata(col_name, labels_label, labels_list) - } - self$data_changed <- TRUE - self$variables_metadata_changed <- TRUE -} -) - -DataSheet$set("public", "edit_factor_level", function(col_name, old_level, new_level) { - if(!col_name %in% self$get_column_names()) stop(col_name, " not found in data.") - if(!is.factor(self$get_columns_from_data(col_name, use_current_filter = FALSE))) stop(col_name, " is not a factor.") - self$add_columns_to_data(col_name, plyr::mapvalues(x = self$get_columns_from_data(col_name, use_current_filter = FALSE), from = old_level, to = new_level)) - self$data_changed <- TRUE - self$variables_metadata_changed <- TRUE -} -) - - -DataSheet$set("public", "set_factor_reference_level", function(col_name, new_ref_level) { - if(!col_name %in% self$get_column_names()) stop(col_name, " not found in data.") - col_data <- self$get_columns_from_data(col_name, use_current_filter = FALSE) - if(!is.factor(col_data)) stop(col_name, " is not a factor.") - if(!new_ref_level %in% levels(col_data)) stop(new_ref_level, " is not a level of ", col_name) - tmp_attr <- get_column_attributes(col_data) - self$add_columns_to_data(col_name, relevel(col_data, new_ref_level)) - self$append_column_attributes(col_name = col_name, new_attr = tmp_attr) -} -) - -DataSheet$set("public", "reorder_factor_levels", function(col_name, new_level_names) { - if(!col_name %in% self$get_column_names()) stop(col_name, " not found in data.") - curr_column <- self$get_columns_from_data(col_name, use_current_filter = FALSE) - if(!is.factor(curr_column)) stop(col_name, " is not a factor.") - curr_levels <- levels(curr_column) - if(length(new_level_names) != length(curr_levels)) stop("Incorrect number of new levels given.") - if(!all(new_level_names %in% curr_levels)) stop("new_level_names must be a reordering of the current levels:", paste(levels(curr_column), collapse = ", ")) - new_column <- factor(curr_column, levels = new_level_names, ordered = is.ordered(curr_column)) - #TODO are these the only attributes we don't want to manually set? - curr_attr <- attributes(curr_column)[!names(attributes(curr_column)) %in% c("levels", "class")] - for(i in seq_along(curr_attr)) { - attr(new_column, names(curr_attr)[i]) <- curr_attr[[i]] - } - self$add_columns_to_data(col_name = col_name, col_data = new_column) - self$variables_metadata_changed <- TRUE -} -) - -DataSheet$set("public", "get_column_count", function(use_column_selection = FALSE) { - return(ncol(self$get_data_frame(use_column_selection = use_column_selection))) -} -) - -DataSheet$set("public", "get_column_names", function(as_list = FALSE, include = list(), exclude = list(), excluded_items = c(), max_no, use_current_column_selection = TRUE) { - if(length(include) == 0 && length(exclude) == 0 && (!use_current_column_selection || !self$column_selection_applied())) out <- names(private$data) - else { - if(data_type_label %in% names(include) && "numeric" %in% include[[data_type_label]]) { - include[[data_type_label]] = c(include[[data_type_label]], "integer") - } - if(data_type_label %in% names(exclude) && "numeric" %in% exclude[[data_type_label]]) { - exclude[[data_type_label]] = c(exclude[[data_type_label]], "integer") - } - - if (use_current_column_selection) col_names <- self$current_column_selection - else col_names <- names(private$data) - out <- c() - i <- 1 - for(col in col_names) { - if(length(include) > 0 || length(exclude) > 0) { - curr_var_metadata <- self$get_variables_metadata(column = col, direct_from_attributes = TRUE) - if(!data_type_label %in% names(curr_var_metadata)) curr_var_metadata[[data_type_label]] <- class(private$data[[col]]) - #TODO this is a temp compatibility solution for how the class of ordered factor used to be shown when getting metadata - if(length(curr_var_metadata[[data_type_label]]) == 2 && all(curr_var_metadata[[data_type_label]] %in% c("ordered", "factor"))) curr_var_metadata[[data_type_label]] <- "ordered,factor" - if(all(c(names(include), names(exclude)) %in% names(curr_var_metadata)) && all(sapply(names(include), function(prop) any(curr_var_metadata[[prop]] %in% include[[prop]]))) - && all(sapply(names(exclude), function(prop) !any(curr_var_metadata[[prop]] %in% exclude[[prop]])))) { - out <- c(out, col) - } - } - else out <- c(out, col) - i = i + 1 - } - if(!missing(max_no) && max_no < length(out)) out <- out[1:max_no] - } - if(length(excluded_items) > 0) { - ex_ind = which(out %in% excluded_items) - if(length(ex_ind) != length(excluded_items)) warning("Some of the excluded_items were not found in the data") - if(length(ex_ind) > 0) out = out[-ex_ind] - } - if(as_list) { - lst = list() - lst[[self$get_metadata(data_name_label)]] <- out - return(lst) - } - else return(out) -} -) - -#TODO: Are there other types needed here? -DataSheet$set("public", "get_data_type", function(col_name = "") { - if(!(col_name %in% self$get_column_names())) { - stop(paste(col_name, "is not a column in", self$get_metadata(data_name_label))) - } - type <- "" - curr_col <- self$get_columns_from_data(col_name, use_current_filter = TRUE) - if(is.character(curr_col)) { - type = "character" - } - else if(is.logical(curr_col)) { - type = "logical" - } - # Question: Why is the using private$data[[col_name]] instead of curr_col? - else if(lubridate::is.Date(private$data[[col_name]])){ - # #TODO - #we can add options for other forms of dates serch as POSIXct, POSIXlt, Date, chron, yearmon, yearqtr, zoo, zooreg, timeDate, xts, its, ti, jul, timeSeries, and fts objects. - type = "Date" - } - else if(is.numeric(curr_col)) { - #TODO vectors with integer values but stored as numeric will return numeric. - # Is that desirable? - if(is.binary(curr_col)) { - type = "two level numeric" - } - else if(all(curr_col == as.integer(curr_col), na.rm = TRUE)) { - if(all(curr_col > 0, na.rm = TRUE)) { - type = "positive integer" - } - else type = "integer" - } - else type = "numeric" - } - else if(is.factor(curr_col)) { - if(nlevels(curr_col) == 2 || nlevels(factor(curr_col)) == 2) type = "two level factor" - else if(length(levels(curr_col)) > 2) type = "multilevel factor" - else type = "factor" - } - return(type) -} -) - -DataSheet$set("public", "set_hidden_columns", function(col_names = c()) { - if(length(col_names) == 0) self$unhide_all_columns() - else { - if(!all(col_names %in% self$get_column_names())) stop("Not all col_names found in data") - - self$append_to_variables_metadata(col_names, is_hidden_label, TRUE) - hidden_cols = self$get_column_names()[!self$get_column_names() %in% col_names] - self$append_to_variables_metadata(hidden_cols, is_hidden_label, FALSE) - } -} -) - -DataSheet$set("public", "unhide_all_columns", function() { - self$append_to_variables_metadata(self$get_column_names(), is_hidden_label, FALSE) -} -) - -DataSheet$set("public", "set_row_names", function(row_names) { - if(missing(row_names)) row_names = 1:nrow(self$get_data_frame(use_current_filter = FALSE)) - if(length(row_names) != nrow(self$get_data_frame(use_current_filter = FALSE))) stop("row_names must be a vector of same length as the data") - if(anyDuplicated(row_names) != 0) stop("row_names must be unique") - rownames(private$data) <- row_names - self$data_changed <- TRUE -} -) - -DataSheet$set("public", "set_col_names", function(col_names) { - if(missing(col_names)) col_names = 1:ncol(self$get_data_frame(use_current_filter = FALSE)) - if(length(col_names) != ncol(self$get_data_frame(use_current_filter = FALSE))) stop("col_names must be a vector of same length as the data") - if(anyDuplicated(col_names) != 0) stop("col_names must be unique") - names(private$data) <- make.names(iconv(col_names, to = "ASCII//TRANSLIT", sub = ".")) - self$data_changed <- TRUE -} -) - -DataSheet$set("public", "get_row_names", function() { - return(rownames(private$data)) -} -) - -DataSheet$set("public", "get_dim_dataframe", function() { - return(dim(self$get_data_frame(use_current_filter = FALSE))) -} -) - -DataSheet$set("public", "set_protected_columns", function(col_names) { - if(!all(col_names %in% self$get_column_names())) stop("Not all col_names found in data") - - self$append_to_variables_metadata(col_names, is_protected_label, TRUE) - other_cols = self$get_column_names()[!self$get_column_names() %in% col_names] - self$append_to_variables_metadata(other_cols, is_protected_label, FALSE) -} -) - - -# Filters ----------------------------------------------------------------- - -DataSheet$set("public", "add_filter", function(filter, filter_name = "", replace = TRUE, set_as_current = FALSE, na.rm = TRUE, is_no_filter = FALSE, and_or = "&", inner_not = FALSE, outer_not = FALSE) { - if(missing(filter)) stop("filter is required") - if(filter_name == "") filter_name = next_default_item("Filter", names(private$filters)) - - for(condition in filter) { - if(length(condition) < 2 || length(condition) > 3 || !all(names(condition) %in% c("column", "operation", "value"))) { - stop("filter must be a list of conditions containing: column, operation and (sometimes) value") - } - if(!condition[["column"]] %in% self$get_column_names()) stop(condition[["column"]], " not found in data.") - } - if(filter_name %in% names(private$filters) && !replace) { - warning("A filter named ", filter_name, " already exists. It will not be replaced.") - } - else { - if(filter_name %in% names(private$filters)) message("A filter named ", filter_name, " already exists. It will be replaced by the new filter.") - filter_calc = calculation$new(type = "filter", filter_conditions = filter, name = filter_name, parameters = list(na.rm = na.rm, is_no_filter = is_no_filter, and_or = and_or, inner_not = inner_not, outer_not = outer_not)) - private$filters[[filter_name]] <- filter_calc - self$append_to_changes(list(Added_filter, filter_name)) - if(set_as_current) { - self$current_filter <- filter_calc - self$data_changed <- TRUE - } - } -} -) - -DataSheet$set("public","add_filter_as_levels", function(filter_levels, column){ - for (i in seq_along(filter_levels)) { - filter_cond <- list(C0 = list(column = column, operation = "==", value = filter_levels[i])) - self$add_filter(filter = filter_cond, filter_name = filter_levels[i]) - } -}) - -DataSheet$set("public", "get_current_filter", function() { - return(private$.current_filter) -} -) - -DataSheet$set("public", "set_current_filter", function(filter_name = "") { - if(!filter_name %in% names(private$filters)) stop(filter_name, " not found.") - self$current_filter <- private$filters[[filter_name]] -} -) - -DataSheet$set("public", "get_filter_names", function(as_list = FALSE, include = list(), exclude = list(), excluded_items = c()) { - out = names(private$filters) - if(length(excluded_items) > 0) { - ex_ind = which(out %in% excluded_items) - if(length(ex_ind) != length(excluded_items)) warning("Some of the excluded_items were not found in the list of filters") - if(length(ex_ind) > 0) out = out[-ex_ind] - } - if(as_list) { - lst = list() - lst[[self$get_metadata(data_name_label)]] <- out - return(lst) - } - else return(out) -} -) - -DataSheet$set("public", "get_filter", function(filter_name) { - if(missing(filter_name)) return(private$filters) - if(!filter_name %in% names(private$filters)) stop(filter_name, " not found.") - return(private$filters[[filter_name]]) -} -) - -DataSheet$set("public", "get_filter_as_logical", function(filter_name) { - curr_filter <- self$get_filter(filter_name) - and_or <- curr_filter$parameters[["and_or"]] - # This should no longer be needed as default will be set in check_filter() - if (is.null(and_or)) and_or <- "&" - outer_not <- curr_filter$parameters[["outer_not"]] - i <- 1 - if (!isTRUE(outer_not)) { - if (length(curr_filter$filter_conditions) == 0) { - out <- rep(TRUE, nrow(self$get_data_frame(use_current_filter = FALSE))) - } else { - result <- matrix(nrow = nrow(self$get_data_frame(use_current_filter = FALSE)), ncol = length(curr_filter$filter_conditions)) - for (condition in curr_filter$filter_conditions) { - # Prevents crash if column no longer exists - # TODO still shows filter is applied - if (!condition[["column"]] %in% self$get_column_names()) { - return(TRUE) - } - if (condition[["operation"]] == "is.na" || condition[["operation"]] == "! is.na") { - col_is_na <- is.na(self$get_columns_from_data(condition[["column"]], use_current_filter = FALSE)) - if (condition[["operation"]] == "is.na") { - result[, i] <- col_is_na - } else { - result[, i] <- !col_is_na - } - } else if (condition[["operation"]] == "is.empty" || condition[["operation"]] == "! is.empty"){ - col_is_empty <- self$get_columns_from_data(condition[["column"]], use_current_filter = FALSE) == "" - if (condition[["operation"]] == "is.empty") { - result[, i] <- col_is_empty - } else { - result[, i] <- !col_is_empty - } - } - else { - func <- match.fun(condition[["operation"]]) - if (any(is.na(condition[["value"]])) && condition[["operation"]] != "%in%") { - stop("Cannot create a filter on missing values with operation: ", condition[["operation"]]) - } else { - logical_vec <- func(self$get_columns_from_data(condition[["column"]], use_current_filter = FALSE), condition[["value"]]) - } - if (! isTRUE(curr_filter$parameters[["inner_not"]])) { - result[, i] <- logical_vec - } else { - result[, i] <- !logical_vec - } - } - i <- i + 1 - } - if (and_or == "&") { - out <- apply(result, 1, all) - } else if (and_or == "|") { - out <- apply(result, 1, any) - } else { - stop(and_or, " should be & or |.") - } - out[is.na(out)] <- !curr_filter$parameters[["na.rm"]] - } - } else { - dat <- self$get_data_frame(use_current_filter = FALSE) - str_out <- "(" - for (condition in curr_filter$filter_conditions) { - str_out <- paste0(str_out, paste0("dat", "$", condition[["column"]], condition[["operation"]], ifelse(is.numeric(condition[["value"]]), condition[["value"]], paste0("'", condition[["value"]], "'")))) - if (i != length(curr_filter$filter_conditions)) str_out <- paste0(str_out, curr_filter$parameters[["and_or"]]) - i <- i + 1 - } - str_out <- paste0("!", str_out, ")") - out <- eval(parse(text = str_out)) - } - return(out) -}) - -DataSheet$set("public", "get_filter_column_names", function(filter_name) { - curr_filter <- self$get_filter(filter_name) - column_names <- c() - for(i in seq_along(curr_filter$filter_conditions)) { - column_names <- c(column_names, curr_filter$filter_conditions[[i]][["column"]]) - } - return(column_names) -} -) - -DataSheet$set("public", "get_current_filter_column_names", function() { - return(self$get_filter_column_names(private$.current_filter$name)) -} -) - -DataSheet$set("public", "filter_applied", function() { - return(!private$.current_filter$parameters[["is_no_filter"]]) -} -) - -DataSheet$set("public", "remove_current_filter", function() { - self$set_current_filter("no_filter") -} -) - -DataSheet$set("public", "filter_string", function(filter_name) { - if (!filter_name %in% names(private$filters)) stop(filter_name, " not found.") - curr_filter <- self$get_filter(filter_name) - out <- "(" - i <- 1 - for (condition in curr_filter$filter_conditions) { - if (i != 1) out <- paste(out, curr_filter$parameters[["and_or"]]) - out <- ifelse(!curr_filter$parameters[["inner_not"]], paste0(out, " (", condition[["column"]], " ", condition[["operation"]]), paste0(out, " !(", condition[["column"]], " ", condition[["operation"]])) - if (condition[["operation"]] == "%in%") { - out <- paste0(out, " c(", paste(paste0("'", condition[["value"]], "'"), collapse = ","), ")") - } else { - out <- paste(out, condition[["value"]]) - } - out <- paste0(out, ")") - i <- i + 1 - } - out <- paste(out, ")") - if (isTRUE(curr_filter$parameters[["outer_not"]])) { - out <- gsub("[!()]", "", out) - out <- paste0("!(", out, ")") - } - return(out) -}) - -DataSheet$set("public", "get_filter_as_instat_calculation", function(filter_name) { - if(!filter_name %in% names(private$filters)) stop(filter_name, " not found.") - curr_filter <- self$get_filter(filter_name) - filter_string <- self$filter_string(filter_name) - calc_from <- list() - for(condition in curr_filter$filter_conditions) { - calc_from[[length(calc_from) + 1]] <- condition[["column"]] - } - names(calc_from) <- rep(self$get_metadata(data_name_label), length(calc_from)) - calc <- instat_calculation$new(type="filter", function_exp = filter_string, calculated_from = calc_from) - return(calc) -} -) - - -# Column Selection -------------------------------------------------------- - -DataSheet$set("public", "add_column_selection", function(column_selection, name = "", replace = TRUE, set_as_current = FALSE, is_everything = FALSE, and_or = "|") { - if(missing(column_selection)) stop("column_selection is required") - if(name == "") name <- next_default_item("sel", names(private$column_selections)) - if(name %in% names(private$column_selections) && !replace) { - warning("The column selection was not added. A column selection named ", name, " already exists. Specify replace = TRUE to overwrite it.") - return() - } - for(condition in column_selection) { - if(!length(condition) %in% c(2, 3) || !all(names(condition) %in% c("operation", "parameters", "negation"))) { - stop("column_selection must be a list of conditions containing: operation and parameters (list)") - } - if (!condition[["operation"]] %in% column_selection_operations) stop("Unkown operation. Operation must be one of ", paste(column_selection_operations, collapse = ", ")) - if (!is.list(condition[["parameters"]])) stop("parameters must be a list.") - if (is.null(condition[["negation"]])) condition[["negation"]] <- FALSE - if (!is.logical(condition[["negation"]])) stop("negative must be either TRUE or FALSE.") - } - if(name %in% names(private$column_selection)) message("A column selection named ", name, " already exists. It will be replaced by the new column selection.") - column_selection_obj <- list(name = name, - conditions = column_selection, - is_everything = is_everything, - and_or = and_or - ) - private$column_selections[[name]] <- column_selection_obj - self$append_to_changes(list(Added_column_selection, name)) - if(set_as_current) { - self$current_column_selection <- column_selection_obj - self$data_changed <- TRUE - } -} -) - -DataSheet$set("public", "get_current_column_selection", function() { - return(private$.current_column_selection) -} -) - -DataSheet$set("public", "set_current_column_selection", function(name = "") { - if (!name %in% names(private$column_selections)) stop(name, " not found as a column selection.") - if (length(self$get_column_selection_column_names(name)) == 0) { - cat(name, " has no columns selected.") - } else { - self$current_column_selection <- private$column_selections[[name]] - } -}) - -DataSheet$set("public", "get_column_selection_names", function(as_list = FALSE, include = list(), exclude = list(), excluded_items = c()) { - out <- names(private$column_selections) - if(length(excluded_items) > 0) { - ex_ind <- which(out %in% excluded_items) - if(length(ex_ind) != length(excluded_items)) warning("Some of the excluded_items were not found in the list of column selections.") - if(length(ex_ind) > 0) out = out[-ex_ind] - } - if(as_list) { - lst = list() - lst[[self$get_metadata(data_name_label)]] <- out - return(lst) - } - else return(out) -} -) - -DataSheet$set("public", "get_column_selection", function(name) { - if(missing(name)) return(private$column_selections) - if(!name %in% names(private$column_selections)) stop(name, " not found as a column selection.") - return(private$column_selections[[name]]) -} -) - -DataSheet$set("public", "get_column_selection_column_names", function(name) { - curr_column_selection <- self$get_column_selection(name) - all_column_names <- names(private$data) - if (length(curr_column_selection[["conditions"]]) == 0) return(all_column_names) - and_or <- curr_column_selection[["and_or"]] - i <- 1 - res <- vector("list", length(curr_column_selection[["conditions"]])) - for (condition in curr_column_selection[["conditions"]]) { - op <- condition[["operation"]] - args <- condition[["parameters"]] - neg <- condition[["negation"]] - if (is.null(neg)) neg <- FALSE - fn <- switch(op, - "base::match" = base::match, - "tidyselect::starts_with" = tidyselect::starts_with, - "tidyselect::ends_with" = tidyselect::ends_with, - "tidyselect::contains" = tidyselect::contains, - "tidyselect::matches" = tidyselect::matches, - "tidyselect::num_range" = tidyselect::num_range, - "tidyselect::last_col" = tidyselect::last_col, - "tidyselect::where" = NULL, - NULL - ) - if (op == "base::match") { - args$table <- all_column_names - res[[i]] <- do.call(fn, args) - }else if (op == "tidyselect::where"){ - selected_columns <- private$data |> - dplyr::select(where(args$fn)) |> - colnames() - res[[i]] <- which(all_column_names %in% selected_columns) - }else{ - args$vars <- all_column_names - res[[i]] <- do.call(fn, args) - } - if (neg) res[[i]] <- setdiff(1:length(all_column_names), res[[i]]) - i <- i + 1 - } - if (and_or == "&") { - out <- Reduce(intersect, res) - } else if (and_or == "|") { - out <- Reduce(union, res) - } else { - stop("and_or must be & or |") - } - return(all_column_names[out]) -}) - -DataSheet$set("public", "get_column_selected_column_names", function(column_selection_name = "") { - if(column_selection_name != "") { - selected_columns <- self$get_column_selection_column_names(column_selection_name) - return(selected_columns) - } -} -) - -DataSheet$set("public", "column_selection_applied", function() { - curr_sel <- private$.current_column_selection - if (is.null(curr_sel) || length(curr_sel) == 0) { - return(FALSE) - } else return(!curr_sel[["is_everything"]]) -} -) - -DataSheet$set("public", "remove_current_column_selection", function() { - self$set_current_column_selection(".everything") - self$append_to_variables_metadata(self$get_column_names(), is_hidden_label, FALSE) - private$.variables_metadata_changed <- TRUE -} -) - -DataSheet$set("public", "get_variables_metadata_fields", function(as_list = FALSE, include = c(), exclude = c(), excluded_items = c()) { - out = names(self$get_variables_metadata()) - if(length(excluded_items) > 0){ - ex_ind = which(out %in% excluded_items) - if(length(ex_ind) != length(excluded_items)) warning("Some of the excluded_items were not found in the list of objects") - if(length(ex_ind) > 0) out = out[-ex_ind] - } - if(as_list) { - lst = list() - lst[[self$get_metadata(data_name_label)]] <- out - return(lst) - } - else return(out) -} -) - -#objects names are expected to be unique. Objects are in a nested list. -#see comments in issue #7808 for more details -DataSheet$set("public", "add_object", function(object_name, object_type_label, object_format, object) { - - if(missing(object_name)){ - object_name <- next_default_item("object", names(private$objects)) - } - - if(object_name %in% names(private$objects)){ - message("An object called ", object_name, " already exists. It will be replaced.") - } - - #add the object with its metadata to the list of objects and add an "Added_object" change - private$objects[[object_name]] <- list(object_type_label = object_type_label, object_format = object_format, object = object) - self$append_to_changes(list(Added_object, object_name)) -} -) - -DataSheet$set("public", "get_object_names", function(object_type_label = NULL, - as_list = FALSE) { - - out <- get_data_book_output_object_names(output_object_list = private$objects, - object_type_label = object_type_label, - as_list = as_list, - list_label= self$get_metadata(data_name_label) ) - return(out) - -} -) - -DataSheet$set("public", "get_objects", function(object_type_label = NULL) { - out <- - private$objects[self$get_object_names(object_type_label = object_type_label)] - return(out) -} -) - -#object name must be character -#returns NULL if object is not found -DataSheet$set("public", "get_object", function(object_name) { - #make sure supplied object name is a character, prevents return of unexpected object - if(is.character(object_name) ){ - return(private$objects[[object_name]]) - }else{ - return(NULL) - } - -} -) - -DataSheet$set("public", "rename_object", function(object_name, new_name, object_type = "object") { - if(!object_type %in% c("object", "filter", "calculation", "graph", "table","model","structure","summary", "column_selection", "scalar")) stop(object_type, " must be either object (graph, table or model), filter, column selection, calculation or scalar.") - - - #Temp fix:: added graph, table and model so as to distinguish this when implementing it in the dialog. Otherwise they remain as objects - if (object_type %in% c("object", "graph", "table","model","structure","summary")){ - - if(!object_name %in% names(private$objects)) stop(object_name, " not found in objects list") - if(new_name %in% names(private$objects)) stop(new_name, " is already an object name. Cannot rename ", object_name, " to ", new_name) - names(private$objects)[names(private$objects) == object_name] <- new_name - } - else if (object_type == "filter"){ - if(!object_name %in% names(private$filters)) stop(object_name, " not found in filters list") - if(new_name %in% names(private$filters)) stop(new_name, " is already a filter name. Cannot rename ", object_name, " to ", new_name) - if("no_filter" == object_name) stop("Renaming no_filter is not allowed.") - names(private$filters)[names(private$filters) == object_name] <- new_name - if(private$.current_filter$name == object_name){private$.current_filter$name <- new_name} - } - else if (object_type == "calculation") { - if(!object_name %in% names(private$calculations)) stop(object_name, " not found in calculations list") - if(new_name %in% names(private$calculations)) stop(new_name, " is already a calculation name. Cannot rename ", object_name, " to ", new_name) - names(private$calculations)[names(private$calculations) == object_name] <- new_name - } - else if (object_type == "column_selection"){ - if(!object_name %in% names(private$column_selections)) stop(object_name, " not found in column selections list") - if(new_name %in% names(private$column_selections)) stop(new_name, " is already a column selection name. Cannot rename ", object_name, " to ", new_name) - if(".everything" == object_name) stop("Renaming .everything is not allowed.") - names(private$column_selections)[names(private$column_selections) == object_name] <- new_name - if(private$.current_column_selection$name == object_name){private$.current_column_selection$name <- new_name} - } else if (object_type == "scalar") { - if(!object_name %in% names(private$scalars)) stop(object_name, " not found in calculations list") - if(new_name %in% names(private$scalars)) stop(new_name, " is already a calculation name. Cannot rename ", object_name, " to ", new_name) - names(private$scalars)[names(private$scalars) == object_name] <- new_name - self$append_to_metadata(scalar, private$scalars) - } -} -) - -DataSheet$set("public", "delete_objects", function(data_name, object_names, object_type = "object") { - if(!object_type %in% c("object", "graph", "table","model","structure","summary","filter", "calculation", "column_selection", "scalar")) stop(object_type, " must be either object (graph, table or model), filter, column selection, calculation or scala.") - - - if(any(object_type %in% c("object", "graph", "table","model","structure","summary"))){ - - if(!all(object_names %in% names(private$objects))) stop("Not all object_names found in overall objects list.") - private$objects[names(private$objects) %in% object_names] <- NULL - }else if(object_type == "filter"){ - if(!all(object_names %in% names(private$filters))) stop(object_names, " not found in filters list.") - if("no_filter" %in% object_names) stop("no_filter cannot be deleted.") - if(any(private$.current_filter$name %in% object_names))stop(private$.current_filter$name, " is currently in use and cannot be deleted.") - private$filters[names(private$filters) %in% object_names] <- NULL - }else if(object_type == "calculation"){ - if(!object_names %in% names(private$calculations)) stop(object_names, " not found in calculations list.") - private$calculations[names(private$calculations) %in% object_names] <- NULL - }else if(object_type == "scalar"){ - if(!object_names %in% names(private$scalars)) stop(object_names, " not found in scalars list.") - private$scalars[names(private$scalars) %in% object_names] <- NULL - self$append_to_metadata(scalar, private$scalars) - }else if(object_type == "column_selection"){ - if(!all(object_names %in% names(private$column_selections))) stop(object_names, " not found in column selections list.") - if(".everything" %in% object_names) stop(".everything cannot be deleted.") - if(any(private$.current_column_selection$name %in% object_names))stop(private$.current_column_selection$name, " is currently in use and cannot be deleted.") - private$column_selections[names(private$column_selections) %in% object_names] <- NULL - } - if(!is.null(private$.last_graph) && length(private$.last_graph) == 2 && private$.last_graph[1] == data_name && private$.last_graph[2] %in% object_names) { - private$.last_graph <- NULL - } -} -) - -DataSheet$set("public", "reorder_objects", function(new_order) { - if(length(new_order) != length(private$objects) || !setequal(new_order, names(private$objects))) stop("new_order must be a permutation of the current object names.") - self$set_objects(private$objects[new_order]) -} -) - -# Any data_clone method must have ... argument to ensure that when arguments are added in future this is still compatible with older versions of this code -DataSheet$set("public", "data_clone", function(include_objects = TRUE, include_metadata = TRUE, include_logs = TRUE, include_filters = TRUE, include_column_selections = TRUE, include_calculations = TRUE, include_comments = TRUE, ...) { - if(include_objects) new_objects <- private$objects - else new_objects <- list() - if(include_filters) new_filters <- lapply(private$filters, function(x) x$data_clone()) - else new_filters <- list() - if(include_column_selections) new_column_selections <- private$column_selections - else new_column_selections <- list() - if(include_calculations) new_calculations <- lapply(private$calculations, function(x) x$data_clone()) - else new_calculations <- list() - if(include_comments) new_comments <- lapply(private$comments, function(x) x$data_clone()) - else new_comments <- list() - - ret <- DataSheet$new(data = private$data, data_name = self$get_metadata(data_name_label), filters = new_filters, column_selections = new_column_selections, objects = new_objects, calculations = new_calculations, keys = private$keys, comments = new_comments, keep_attributes = include_metadata) - if(include_logs) ret$set_changes(private$changes) - else ret$set_changes(list()) - if(include_filters) ret$current_filter <- self$get_current_filter() - else { - ret$remove_current_filter() - } - if(include_column_selections) ret$current_column_selection <- self$get_current_column_selection() - else { - ret$remove_current_column_selection() - } - if(!include_metadata) { - self$clear_metadata() - self$clear_variables_metadata() - } - ret$data_changed <- TRUE - ret$metadata_changed <- TRUE - ret$variables_metadata_changed <- TRUE - return(ret) -} -) - -DataSheet$set("public", "freeze_columns", function(column) { - self$unfreeze_columns() - self$append_to_variables_metadata(column, is_frozen_label, TRUE) -} -) - -DataSheet$set("public", "unfreeze_columns", function() { - self$append_to_variables_metadata(self$get_column_names(), is_frozen_label, FALSE) -} -) - -#TODO maybe get ride of this method as that you can't create a key without -# the instat object also creating a self link -DataSheet$set("public", "add_key", function(col_names, key_name) { - cols <- self$get_columns_from_data(col_names, use_current_filter = FALSE) - if(anyDuplicated(cols) > 0) { - stop("key columns must have unique combinations") - } - if(self$is_key(col_names)) { - warning("A key with these columns already exists. No action will be taken.") - } - else { - if(missing(key_name)) key_name <- next_default_item("key", names(private$keys)) - if(key_name %in% names(private$keys)) warning("A key called", key_name, "already exists. It will be replaced.") - private$keys[[key_name]] <- col_names - self$append_to_variables_metadata(col_names, is_key_label, TRUE) - if(length(private$keys) == 1) self$append_to_variables_metadata(setdiff(self$get_column_names(), col_names), is_key_label, FALSE) - self$append_to_metadata(is_linkable, TRUE) - self$append_to_metadata(key_label, paste(private$keys[[key_name]], collapse = ",")) - cat(paste("Key name:", key_name), - paste("Key columns:", paste(private$keys[[key_name]], collapse = ", ")), - sep = "\n") - } -} -) - -DataSheet$set("public", "is_key", function(col_names) { - return(any(sapply(private$keys, function(x) setequal(col_names,x)))) -} -) - -DataSheet$set("public", "has_key", function() { - return(length(private$keys) > 0) -} -) - -DataSheet$set("public", "get_keys", function(key_name) { - if(!missing(key_name)) { - if(!key_name %in% names(private$keys)) stop(key_name, " not found.") - cat(paste("Key name:", key_name), - paste("Key columns:", paste(private$keys[[key_name]], collapse = ", ")), - sep = "\n") - } - else return(private$keys) -} -) - -DataSheet$set("public", "remove_key", function(key_name) { - if(!key_name %in% names(private$keys)) stop(key_name, " not found.") - self$append_to_variables_metadata(private$keys[[key_name]], is_key_label, FALSE) - private$keys[[key_name]] <- NULL - self$append_to_metadata(key_label, NULL) - cat("Key removed:", key_name) -} -) - -DataSheet$set("public", "get_comments", function(comment_id) { - if(!missing(comment_id)) { - if(!comment_id %in% self$get_comment_ids()) stop("Could not find comment with id: ", comment_id) - return(private$comments[[comment_id]]) - } - else return(private$comments) -} -) - -DataSheet$set("public", "remove_comment", function(key_name) { - if(!key_name %in% names(private$keys)) stop(key_name, " not found.") - private$keys[[key_name]] <- NULL -} -) - -DataSheet$set("public", "set_structure_columns", function(struc_type_1, struc_type_2, struc_type_3) { - if(!all(c(struc_type_1,struc_type_2,struc_type_3) %in% self$get_column_names())) stop("Some column names not recognised.") - if(length(intersect(struc_type_1,struc_type_2)) > 0 || length(intersect(struc_type_1,struc_type_3)) > 0 || length(intersect(struc_type_2,struc_type_3)) > 0) { - stop("Each column can only be assign one structure type.") - } - if(length(struc_type_1) > 0) self$append_to_variables_metadata(struc_type_1, structure_label, structure_type_1_label) - if(length(struc_type_2) > 0) self$append_to_variables_metadata(struc_type_2, structure_label, structure_type_2_label) - if(length(struc_type_3) > 0) self$append_to_variables_metadata(struc_type_3, structure_label, structure_type_3_label) - all <- union(union(struc_type_1, struc_type_2), struc_type_3) - other <- setdiff(self$get_column_names(), all) - self$append_to_variables_metadata(other, structure_label, NA) -} -) - -DataSheet$set("public", "add_dependent_columns", function(columns, dependent_cols) { - for(col in columns) { - if(self$is_variables_metadata(dependent_columns_label, col)) { - curr_dependents <- self$get_variables_metadata(property = dependent_columns_label, column = col, direct_from_attributes = TRUE) - for(data_frame in names(dependent_cols)) { - if(data_frame %in% names(curr_dependents)) { - curr_dependents[[data_frame]] <- union(curr_dependents[[data_frame]], dependent_cols[[data_frame]]) - } - else { - curr_dependents[[data_frame]] <- dependent_cols[[data_frame]] - } - } - } - else curr_dependents <- as.list(dependent_cols) - self$append_to_variables_metadata(col, dependent_columns_label, curr_dependents) - } -} -) - -DataSheet$set("public", "set_column_colours", function(columns, colours) { - if(missing(columns)) columns <- self$get_column_names() - if(length(columns) != length(colours)) stop("columns must be the same length as colours") - - for(i in 1:length(columns)) { - self$append_to_variables_metadata(columns[i], colour_label, colours[i]) - } - other_cols <- self$get_column_names()[!self$get_column_names() %in% columns] - self$append_to_variables_metadata(other_cols, colour_label, -1) -} -) - -DataSheet$set("public", "has_colours", function(columns) { - return(self$is_variables_metadata(str = colour_label)) -} -) - -DataSheet$set("public", "set_column_colours_by_metadata", function(data_name, columns, property) { -if(!missing(data_name) && missing(columns)) columns <- names(self$get_data_frame(data_name = data_name)) - if(missing(columns)) property_values <- self$get_variables_metadata(property = property) - else property_values <- self$get_variables_metadata(property = property, column = columns) - - new_colours <- as.numeric(make_factor(property_values)) - new_colours[is.na(new_colours)] <- -1 - if(missing(columns)) self$set_column_colours(colours = new_colours) - else self$set_column_colours(columns = columns, colours = new_colours) -} -) - -DataSheet$set("public", "remove_column_colours", function() { - if(self$is_variables_metadata(str = colour_label)) { - self$append_to_variables_metadata(property = colour_label, new_val = -1) - } -} -) - -DataSheet$set("public", "graph_one_variable", function(columns, numeric = "geom_boxplot", categorical = "geom_bar", output = "facets", free_scale_axis = FALSE, ncol = NULL, coord_flip = FALSE, ...) { - if(!all(columns %in% self$get_column_names())) { - stop("Not all columns found in the data") - } - if(!output %in% c("facets", "combine", "single")) { - stop("output must be one of: facets, combine or single") - } - if(!numeric %in% c("box_jitter", "violin_jitter", "violin_box")) { - numeric_geom <- match.fun(numeric) - } - else { - numeric_geom <- numeric - } - if(categorical %in% c("pie_chart")) { - cat_geom <- categorical - } - else { - cat_geom <- match.fun(categorical) - } - curr_data <- self$get_data_frame() - column_types <- c() - for(col in columns) { - # TODO this could be method to avoid needing to get full data frame in this method - # Everything non numeric is treated as categorical - if(is.numeric(curr_data[[col]])) { - column_types <- c(column_types, "numeric") - } - else { - column_types <- c(column_types, "cat") - } - } - if(output == "facets") { - if(length(unique(column_types)) > 1) { - warning("Cannot do facets with graphs of different types. Combine graphs will be used instead.") - output <- "combine" - } - else column_types <- unique(column_types) - } - if(output == "facets") { - # column_types will be unique by this point - column_types <- column_types[1] - if(column_types == "numeric") { - curr_geom <- numeric_geom - curr_geom_name <- numeric - } - else if(column_types == "cat") { - curr_geom <- cat_geom - curr_geom_name <- categorical - } - else { - stop("Cannot plot columns of type:", column_types[i]) - } - curr_data <- self$get_data_frame(stack_data = TRUE, measure.vars = columns) - if(curr_geom_name == "geom_boxplot" || curr_geom_name == "geom_point" || curr_geom_name == "geom_violin" || curr_geom_name == "geom_jitter" || curr_geom_name == "box_jitter" || curr_geom_name == "violin_jitter" || curr_geom_name == "violin_box") { - g <- ggplot2::ggplot(data = curr_data, mapping = aes(x = "", y = value)) + xlab("") - } - else { - g <- ggplot2::ggplot(data = curr_data, mapping = aes(x = value)) + ylab("") - } - - if(curr_geom_name == "box_jitter") { - g <- g + ggplot2::geom_boxplot() + ggplot2::geom_jitter(width = 0.2, height = 0.2) - } - else if(curr_geom_name == "violin_jitter") { - g <- g + ggplot2::geom_violin() + ggplot2::geom_jitter(width = 0.2, height = 0.2) - } - else if(curr_geom_name == "violin_box") { - g <- g + ggplot2::geom_violin() + ggplot2::geom_boxplot() - } - else if(curr_geom_name == "pie_chart") { - g <- g + ggplot2::geom_bar() + ggplot2::coord_polar(theta = "x") - } - else { - g <- g + curr_geom() - } - - if (coord_flip) { - g <- g + ggplot2::coord_flip() - } - if(free_scale_axis) { - g <- g + ggplot2::facet_wrap(facets = ~ variable, scales = "free", ncol = ncol) - } - else { - g <- g + ggplot2::facet_wrap(facets = ~ variable, scales = "free_x", ncol = ncol) - } - - return(g) - } - else { - graphs <- list() - i = 1 - for(column in columns) { - if(column_types[i] == "numeric") { - curr_geom <- numeric_geom - curr_geom_name <- numeric - } - else if(column_types[i] == "cat") { - curr_geom <- cat_geom - curr_geom_name <- categorical - } - else { - stop("Cannot plot columns of type:", column_types[i]) - } - if(curr_geom_name == "geom_boxplot" || curr_geom_name == "geom_violin" || curr_geom_name == "geom_point" || curr_geom_name == "geom_jitter" || curr_geom_name == "box_jitter" || curr_geom_name == "violin_jitter" || curr_geom_name == "violin_box") { - g <- ggplot2::ggplot(data = curr_data, mapping = aes_(x = "", y = as.name(column))) + xlab("") - } - else { - g <- ggplot2::ggplot(data = curr_data, mapping = aes_(x = as.name(column))) + ylab("") - } - if (coord_flip) { - g <- g + ggplot2::coord_flip() - } - if(curr_geom_name == "box_jitter") { - g <- g + ggplot2::geom_boxplot() + ggplot2::geom_jitter(width = 0.2, height = 0.2) - } - else if(curr_geom_name == "violin_jitter") { - g <- g + ggplot2::geom_violin() + ggplot2::geom_jitter(width = 0.2, height = 0.2) - } - else if(curr_geom_name == "violin_box") { - g <- g + ggplot2::geom_violin() + ggplot2::geom_boxplot() - } - else if(curr_geom_name == "pie_chart") { - g <- g + ggplot2::geom_bar() + ggplot2::coord_polar(theta = "x") - } - else { - g <- g + curr_geom() - } - graphs[[i]] <- g - i = i + 1 - } - if(output == "combine") { - return(patchwork::wrap_plots(graphs, ncol = ncol)) - } - else { - return(graphs) - } - } -} -) - -DataSheet$set("public","make_date_yearmonthday", function(year, month, day, f_year, f_month, f_day, year_format = "%Y", month_format = "%m") { - if(!missing(year)) year_col <- self$get_columns_from_data(year, use_current_filter = FALSE) - else if(!missing(f_year)) year_col <- f_year - else stop("One of year or f_year must be specified.") - if(!missing(month)) month_col <- self$get_columns_from_data(month, use_current_filter = FALSE) - else if(!missing(f_month)) month_col <- f_month - else stop("One of month or f_month must be specified.") - if(!missing(day)) day_col <- self$get_columns_from_data(day, use_current_filter = FALSE) - else if(!missing(f_day)) day_col <- f_day - else stop("One of day or f_day must be specified.") - - if(missing(year_format)) { - year_counts <- stringr::str_count(year_col) - if(length(unique(year_counts)) > 1) stop("Year column has inconsistent year formats") - else { - year_length <- year_counts[1] - if(year_length == 2) year_format = "%y" - else if(year_length == 4) year_format = "%Y" - else stop("Cannot detect year format with ", year_length, " digits.") - } - } - if(missing(month_format)) { - if(all(month_col %in% 1:12)) month_format = "%m" - else if(all(month_col %in% month.abb)) month_format = "%b" - else if(all(month_col %in% month.name)) month_format = "%B" - else stop("Cannot detect month format") - } - return(as.Date(paste(year_col, month_col, day_col), format = paste(year_format, month_format, "%d"))) -} -) - -# Not sure if doy_format should be a parameter? There seems to only be one format for it. -DataSheet$set("public","make_date_yeardoy", function(year, doy, base, doy_typical_length = "366") { - if(!missing(year)) year_col <- self$get_columns_from_data(year, use_current_filter = FALSE) - if(!missing(doy)) doy_col <- self$get_columns_from_data(doy, use_current_filter = FALSE) - - year_counts <- stringr::str_count(year_col) - year_length <- year_counts[1] - if(year_length == 2){ - if(missing(base)) stop("Base must be specified.") - year_col <- dplyr::if_else(year_col <= base, year_col + 2000, year_col + 1900) - } - if(doy_typical_length == "366") { - if(is.factor(year_col)) { - year_col <- as.numeric(levels(year_col))[year_col] - } - #Replacing day 60 with 0 for non-leap years.This will result into NA dates - doy_col[(!lubridate::leap_year(year_col)) & doy_col == 60] <- 0 - doy_col[(!lubridate::leap_year(year_col)) & doy_col > 60] <- doy_col[(!lubridate::leap_year(year_col)) & doy_col > 60] - 1 - } - return(temp_date <- as.Date(paste(as.character(year_col), "-", doy_col), format = "%Y - %j")) -} -) - -DataSheet$set("public","set_contrasts_of_factor", function(col_name, new_contrasts, defined_contr_matrix) { - if(!col_name %in% self$get_column_names()) stop(col_name, " not found in the data") - if(!is.factor(self$get_columns_from_data(col_name))) stop(factor, " is not a factor column.") - factor_col <- self$get_columns_from_data(col_name) - contr_col <- nlevels(factor_col) - 1 - contr_row <- nlevels(factor_col) - cat("Factor",col_name,"has",new_contrasts,"contrasts") - if(new_contrasts == "user_defined") { - if(any(is.na(defined_contr_matrix)) ||!is.numeric(defined_contr_matrix) ||nrow(defined_contr_matrix) != contr_row || ncol(defined_contr_matrix) != contr_col) stop("The contrast matrix should have ", contr_col, " column(s) and ", contr_row, " row(s) ") - } - #checks needed on contrasts before assigning - if(!(new_contrasts %in% c("contr.treatment", "contr.helmert", "contr.poly", "contr.sum", "user_defined"))) { - stop(new_contrasts, " is not a valid contrast name") - } - else if(!is.character(new_contrasts)) { - stop("New column name must be of type: character") - } - if(new_contrasts == "user_defined") new_contrasts <- defined_contr_matrix - contrasts(private$data[[col_name]]) <- new_contrasts -} -) - -#This method gets a date column and extracts part of the information such as year, month, week, weekday etc(depending on which parameters are set) and creates their respective new column(s) -DataSheet$set("public","split_date", function(col_name = "", year_val = FALSE, year_name = FALSE, leap_year = FALSE, month_val = FALSE, month_abbr = FALSE, month_name = FALSE, week_val = FALSE, week_abbr = FALSE, week_name = FALSE, weekday_val = FALSE, weekday_abbr = FALSE, weekday_name = FALSE, day = FALSE, day_in_month = FALSE, day_in_year = FALSE, day_in_year_366 = FALSE, pentad_val = FALSE, pentad_abbr = FALSE, dekad_val = FALSE, dekad_abbr = FALSE, quarter_val = FALSE, quarter_abbr = FALSE, with_year = FALSE, s_start_month = 1, s_start_day_in_month = 1, days_in_month = FALSE) { - col_data <- self$get_columns_from_data(col_name, use_current_filter = FALSE) - adjacent_column <- col_name - if(!lubridate::is.Date(col_data)) stop("This column must be a date or time!") - s_shift <- s_start_day_in_month > 1 || s_start_month > 1 - is_climatic <- self$is_climatic_data() - - if(s_shift) { - if(s_start_month %% 1 != 0 || s_start_month < 1 || s_start_month > 12) stop("shift_start_month must be an integer between 1 and 12. ", s_start_month, " is invalid.") - # TODO better checks on day in relation to month selected - if(s_start_day_in_month %% 1 != 0 || s_start_day_in_month < 1 || s_start_day_in_month > 31) stop("shift_start_day_in_month must be an integer between 1 and 31. ", s_start_day_in_month, " is invalid.") - # using a leap year as year to ensure consistent day of year across years - s_start_day <- lubridate::yday(as.Date(paste("2000", s_start_month, s_start_day_in_month), format = "%Y %m %d")) - if(is.na(s_start_day)) stop("Could not identify starting day for shift year with shift_start_month = ", s_start_month, " and shift_start_day = ", s_start_day_in_month) - if(s_start_day %% 1 != 0 || s_start_day < 2 || s_start_day > 366) stop("shift_start_day must be an integer between 2 and 366") - doy_col <- as.integer(yday_366(col_data)) - year_col <- lubridate::year(col_data) - temp_s_doy <- doy_col - s_start_day + 1 - temp_s_year <- year_col - temp_s_year[temp_s_doy < 1] <- paste(year_col[temp_s_doy < 1] - 1, year_col[temp_s_doy < 1], sep = "-") - temp_s_year[temp_s_doy > 0] <- paste(year_col[temp_s_doy > 0], year_col[temp_s_doy > 0] + 1, sep = "-") - temp_s_year <- make_factor(temp_s_year) - temp_s_year_num <- as.numeric(substr(temp_s_year, 1, 4)) - temp_s_doy[temp_s_doy < 1] <- temp_s_doy[temp_s_doy < 1] + 366 - s_year_labs <- c(min(year_col) -1, sort(unique(year_col))) - names(s_year_labs) <- paste(s_year_labs, s_year_labs + 1, sep = "-") - } - else s_start_day <- 1 - - if(weekday_name) { - weekday_name_vector <- lubridate::wday(col_data, label = TRUE, abbr = FALSE) - col_name <- next_default_item(prefix = "weekday_name", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = weekday_name_vector, adjacent_column = adjacent_column, before = FALSE) - } - if(weekday_abbr) { - weekday_abbr_vector <- lubridate::wday(col_data, label = TRUE) - col_name <- next_default_item(prefix = "weekday_abbr", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = weekday_abbr_vector, adjacent_column = adjacent_column, before = FALSE) - } - if(weekday_val) { - weekday_val_vector <- lubridate::wday(col_data) - col_name <- next_default_item(prefix = "weekday_val", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = weekday_val_vector, adjacent_column = adjacent_column, before = FALSE) - } - if(week_val) { - week_Val_vector <- lubridate::week(col_data) - col_name <- next_default_item(prefix = "week_val", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = week_Val_vector, adjacent_column = adjacent_column, before = FALSE) - } - if(pentad_abbr) { - month_abbr_vector <-forcats::fct_shift(f = (lubridate::month(col_data, label = TRUE)), n = (s_start_month - 1)) - pentad_val_vector <- ((as.integer(pentad(col_data))) - (s_start_month - 1)*6) %% 6 - pentad_val_vector <- ifelse(pentad_val_vector == 0, 6, pentad_val_vector) - month.list <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec") - month_levels <- if (s_start_month == 1) month.list else c(tail(month.list, -s_start_month + 1), head(month.list, s_start_month - 1)) - pentad_levels <- paste0(rep(month_levels, each = 6), 1:6) - pentad_abbr_vector <- factor(paste(month_abbr_vector, pentad_val_vector, sep = ""), levels = pentad_levels) - col_name <- next_default_item(prefix = "pentad_abbr", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = pentad_abbr_vector, adjacent_column = adjacent_column, before = FALSE) - } - if(pentad_val) { - pentad_val_vector <- ((as.integer(pentad(col_data))) - (s_start_month - 1)*6) %% 72 - pentad_val_vector <- ifelse(pentad_val_vector == 0, 72, pentad_val_vector) - col_name <- next_default_item(prefix = "pentad", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = pentad_val_vector, adjacent_column = adjacent_column, before = FALSE) - } - if(dekad_abbr) { - month_abbr_vector <- make_factor(forcats::fct_shift(f = (lubridate::month(col_data, label = TRUE)), n = (s_start_month - 1)), ordered = FALSE) - dekad_val_vector <- ((as.numeric(dekade(col_data))) - (s_start_month - 1)*3) %% 3 - dekad_val_vector <- ifelse(dekad_val_vector == 0, 3, dekad_val_vector) - month.list <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec") - month_levels <- if (s_start_month == 1) month.list else c(tail(month.list, -s_start_month + 1), head(month.list, s_start_month - 1)) - dekad_levels <- paste0(rep(month_levels, each = 3), 1:3) - dekad_abbr_vector <- factor(paste(month_abbr_vector, dekad_val_vector, sep = ""), levels = dekad_levels) - col_name <- next_default_item(prefix = "dekad_abbr", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = dekad_abbr_vector, adjacent_column = adjacent_column, before = FALSE) - } - if(dekad_val) { - # TODO. shift function when s_start_month > 1 - dekad_val_vector <- ((as.numeric(dekade(col_data))) - (s_start_month - 1)*3) %% 36 - dekad_val_vector <- ifelse(dekad_val_vector == 0, 36, dekad_val_vector) - col_name <- next_default_item(prefix = "dekad", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = dekad_val_vector, adjacent_column = adjacent_column, before = FALSE) - } - if(quarter_abbr){ - if(s_shift) { - s_quarter_val_vector <- lubridate::quarter(col_data, with_year = with_year, fiscal_start = s_start_month) - quarter_labels <- get_quarter_label(s_quarter_val_vector, s_start_month) - col_name <- next_default_item(prefix = "s_quarter", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = quarter_labels, adjacent_column = adjacent_column, before = FALSE) - self$append_to_variables_metadata(col_names = col_name, property = label_label, new_val = paste("Shifted quarter starting on day", s_start_day)) - } - else { - quarter_val_vector <- lubridate::quarter(col_data, with_year = with_year) - quarter_labels <- get_quarter_label(quarter_val_vector, s_start_month) - col_name <- next_default_item(prefix = "quarter_abbr", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = quarter_labels, adjacent_column = adjacent_column, before = FALSE) - } - self$append_to_variables_metadata(col_names = col_name, property = doy_start_label, new_val = s_start_day) - } - if(quarter_val) { - if(s_shift) { - s_quarter_val_vector <- lubridate::quarter(col_data, with_year = with_year, fiscal_start = s_start_month) - col_name <- next_default_item(prefix = "s_quarter", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = s_quarter_val_vector, adjacent_column = adjacent_column, before = FALSE) - self$append_to_variables_metadata(col_names = col_name, property = label_label, new_val = paste("Shifted quarter starting on day", s_start_day)) - } - else { - quarter_val_vector <- lubridate::quarter(col_data, with_year = with_year) - col_name <- next_default_item(prefix = "quarter", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = quarter_val_vector, adjacent_column = adjacent_column, before = FALSE) - } - self$append_to_variables_metadata(col_names = col_name, property = doy_start_label, new_val = s_start_day) - } - if(day_in_year) { - day_in_year_vector <- lubridate::yday(col_data) - s_start_day + 1 + (!lubridate::leap_year(col_data) & s_start_day > 59) - day_in_year_vector <- dplyr::if_else(lubridate::leap_year(col_data), day_in_year_vector %% 366, day_in_year_vector %% 365) - day_in_year_vector <- dplyr::if_else(day_in_year_vector == 0, dplyr::if_else(lubridate::leap_year(col_data), 366, 365), day_in_year_vector) - col_name <- next_default_item(prefix = "doy_365", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = day_in_year_vector, adjacent_column = adjacent_column, before = FALSE) - self$append_to_variables_metadata(col_names = col_name, property = doy_start_label, new_val = s_start_day) - if(s_shift) self$append_to_variables_metadata(col_names = col_name, property = label_label, new_val = paste("Shifted year starting on day", s_start_day)) - } - if(day_in_year_366) { - if(s_shift) { - col_name <- next_default_item(prefix = "s_doy", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = temp_s_doy, adjacent_column = adjacent_column, before = FALSE) - self$append_to_variables_metadata(col_names = col_name, property = label_label, new_val = paste("Shifted day of year starting on day", s_start_day)) - } - else { - day_in_year_366_vector <- as.integer(yday_366(col_data)) - col_name <- next_default_item(prefix = "doy", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = day_in_year_366_vector, adjacent_column = adjacent_column, before = FALSE) - } - if(is_climatic && is.null(self$get_climatic_column_name(doy_label))) { - self$append_climatic_types(types = c(doy = col_name)) - } - self$append_to_variables_metadata(col_names = col_name, property = doy_start_label, new_val = s_start_day) - } - if(days_in_month) { - days_in_month_vector <- as.numeric(lubridate::days_in_month(col_data)) - col_name <- next_default_item(prefix = "days_in_month", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = days_in_month_vector, adjacent_column = adjacent_column, before = FALSE) - } - if(day_in_month) { - day_in_month_vector <- as.numeric(lubridate::mday(col_data)) - col_name <- next_default_item(prefix = "day_in_month", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = day_in_month_vector, adjacent_column = adjacent_column, before = FALSE) - if(is_climatic && is.null(self$get_climatic_column_name(day_label))) { - self$append_climatic_types(types = c(day = col_name)) - } - } - if(month_val) { - month_val_vector <- (lubridate::month(col_data) - (s_start_month - 1)) %% 12 - month_val_vector <- ifelse(month_val_vector == 0, 12, month_val_vector) - col_name <- next_default_item(prefix = "month_val", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = month_val_vector, adjacent_column = adjacent_column, before = FALSE) - if(s_shift) self$append_to_variables_metadata(col_names = col_name, property = label_label, new_val = paste("Shifted month starting on day", s_start_day)) - if(is_climatic && is.null(self$get_climatic_column_name(month_label))) { - self$append_climatic_types(types = c(month = col_name)) - } - self$append_to_variables_metadata(col_names = col_name, property = doy_start_label, new_val = s_start_day) - } - if(month_abbr) { - month_abbr_vector <- make_factor(forcats::fct_shift(f = lubridate::month(col_data, label = TRUE), n = s_start_month - 1), ordered = FALSE) - col_name <- next_default_item(prefix = "month_abbr", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = month_abbr_vector, adjacent_column = adjacent_column, before = FALSE) - if(s_shift) self$append_to_variables_metadata(col_names = col_name, property = label_label, new_val = paste("Shifted month starting on day", s_start_day)) - if(is_climatic && is.null(self$get_climatic_column_name(month_label))) { - self$append_climatic_types(types = c(month = col_name)) - } - self$append_to_variables_metadata(col_names = col_name, property = doy_start_label, new_val = s_start_day) - } - if(month_name) { - month_name_vector <- forcats::fct_shift(f = lubridate::month(col_data, label = TRUE, abbr = FALSE), n = s_start_month - 1) - col_name <- next_default_item(prefix = "month_name", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = month_name_vector, adjacent_column = adjacent_column, before = FALSE) - if(s_shift) self$append_to_variables_metadata(col_names = col_name, property = label_label, new_val = paste("Shifted month starting on day", s_start_day)) - if(is_climatic && is.null(self$get_climatic_column_name(month_label))) { - self$append_climatic_types(types = c(month = col_name)) - } - self$append_to_variables_metadata(col_names = col_name, property = doy_start_label, new_val = s_start_day) - } - if(year_name) { - if(s_shift) { - col_name <- next_default_item(prefix = "s_year", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = temp_s_year, adjacent_column = adjacent_column, before = FALSE) - self$append_to_variables_metadata(col_names = col_name, property = label_label, new_val = paste("Shifted year starting on day", s_start_day)) - new_labels <- sort(unique(temp_s_year_num)) - names(new_labels) <- sort(unique(temp_s_year)) - self$append_to_variables_metadata(col_names = col_name, property = labels_label, new_val = new_labels) - } - else { - year_vector <- lubridate::year(col_data) - col_name <- next_default_item(prefix = "year", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = make_factor(year_vector), adjacent_column = adjacent_column, before = FALSE) - } - if(is_climatic && is.null(self$get_climatic_column_name(year_label))) { - self$append_climatic_types(types = c(year = col_name)) - } - self$append_to_variables_metadata(col_names = col_name, property = doy_start_label, new_val = s_start_day) - } - if(year_val) { - if(s_shift) { - col_name <- next_default_item(prefix = "s_year", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = temp_s_year_num, adjacent_column = adjacent_column, before = FALSE) - self$append_to_variables_metadata(col_names = col_name, property = label_label, new_val = paste("Shifted year starting on day", s_start_day)) - } - else { - year_vector <- lubridate::year(col_data) - col_name <- next_default_item(prefix = "year", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = year_vector, adjacent_column = adjacent_column, before = FALSE) - } - if(is_climatic && is.null(self$get_climatic_column_name(year_label))) { - self$append_climatic_types(types = c(year = col_name)) - } - self$append_to_variables_metadata(col_names = col_name, property = doy_start_label, new_val = s_start_day) - } - if(leap_year) { - leap_year_vector <- lubridate::leap_year(col_data) - col_name <- next_default_item(prefix = "leap_year", existing_names = self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name = col_name, col_data = leap_year_vector, adjacent_column = adjacent_column, before = FALSE) - } - } -) - -#TODO These should go in a separate climatic file -#************************************************ -# labels for climatic column types -rain_label="rain" -rain_day_label="rain_day" -rain_day_lag_label="rain_day_lag" -date_label="date" -doy_label="doy" -s_doy_label = "s_doy" -doy_start_label = "doy_start" -year_label="year" -year_month_label="year_month" -date_time_label="date_time" -dos_label="dos" ##Day of Season -season_label="season" -month_label="month" -day_label="day" -dm_label="day_month" -time_label="time" -station_label="station" -date_asstring_label="date_asstring" -temp_min_label="temp_min" -temp_max_label="temp_max" -hum_min_label="hum_min" -hum_max_label="hum_max" -temp_air_label="temp_air" -temp_range_label="temp_range" -wet_buld_label="wet_bulb" -dry_bulb_label="dry_buld" -evaporation_label="evaporation" -element_factor_label="element_type" -identifier_label = "identifier" -capacity_label = "capacity_max" -wind_speed_label="wind_speed" -wind_direction_label="wind_direction" -lat_label="lat" -lon_label="lon" -alt_label="alt" -season_station_label="season_station" -date_station_label="date_station" -sunshine_hours_label="sunshine_hours" -radiation_label="radiation" -cloud_cover_label="cloud_cover" -district_label = "district" - -all_climatic_column_types <- c(rain_label,district_label, rain_day_label, rain_day_lag_label, date_label, doy_label, s_doy_label, year_label, year_month_label, date_time_label, dos_label, season_label, month_label, day_label, dm_label, time_label, station_label, date_asstring_label, temp_min_label, temp_max_label, hum_min_label, hum_max_label, temp_air_label, temp_range_label, wet_buld_label, dry_bulb_label, evaporation_label, element_factor_label, identifier_label, capacity_label, wind_speed_label, wind_direction_label, lat_label, lon_label, alt_label, season_station_label, date_station_label, sunshine_hours_label, radiation_label, cloud_cover_label) - -# Column metadata -climatic_type_label <- "Climatic_Type" -is_element_label <- "Is_Element" - -is_climatic_element <- function(x) { - return(x %in% c(rain_label, rain_day_label, rain_day_lag_label, temp_min_label, temp_max_label, temp_air_label, - temp_range_label, wet_buld_label, dry_bulb_label, evaporation_label, capacity_label, wind_speed_label, - wind_direction_label, sunshine_hours_label, radiation_label, cloud_cover_label)) -} - -# Data frame metadata -is_climatic_label <- "Is_Climatic" - -DataBook$set("public","define_as_climatic", function(data_name, types, key_col_names, key_name) { - self$add_key(data_name = data_name, col_names = key_col_names, key_name = key_name) - self$append_to_dataframe_metadata(data_name, is_climatic_label, TRUE) - - for(curr_data_name in self$get_data_names()) { - if(!self$get_data_objects(data_name)$is_metadata(is_climatic_label)) { - self$append_to_dataframe_metadata(curr_data_name, is_climatic_label, FALSE) - } - } - self$get_data_objects(data_name)$set_climatic_types(types) -} -) - -DataSheet$set("public","set_climatic_types", function(types) { - # Clear all climatic types first - self$append_to_variables_metadata(property = climatic_type_label, new_val = NULL) - if(!all(names(types) %in% all_climatic_column_types)) stop("Cannot recognise the following climatic types: ", paste(names(types)[!names(types) %in% all_climatic_column_types], collapse = ", ")) - invisible(sapply(names(types), function(name) self$append_to_variables_metadata(types[name], climatic_type_label, name))) - element_cols <- types[is_climatic_element(names(types))] - other_cols <- setdiff(self$get_column_names(), element_cols) - self$append_to_variables_metadata(element_cols, is_element_label, TRUE) - self$append_to_variables_metadata(other_cols, is_element_label, FALSE) - - types <- types[sort(names(types))] - cat("Climatic dataset:", self$get_metadata(data_name_label), "\n") - cat("----------------\n") - cat("Definition", "\n") - cat("----------------\n") - for(i in seq_along(types)) { - cat(names(types)[i], ": ", types[i], "\n", sep = "") - } -} -) - -DataSheet$set("public","append_climatic_types", function(types) { - if(!all(names(types) %in% all_climatic_column_types)) stop("Cannot recognise the following climatic types: ", paste(names(types)[!names(types) %in% all_climatic_column_types], collapse = ", ")) - for(i in seq_along(types)) { - col <- self$get_climatic_column_name(names(types)[i]) - if(!is.null(col)) self$append_to_variables_metadata(col, climatic_type_label, NULL) - } - invisible(sapply(names(types), function(name) self$append_to_variables_metadata(types[name], climatic_type_label, name))) - cat("Climatic dataset:", self$get_metadata(data_name_label), "\n") - cat("----------------\n") - cat("Update", "\n") - cat("----------------\n") - for(i in seq_along(types)) { - cat(names(types)[i], ": ", types[i], "\n", sep = "") - } -} -) - -#Method for creating inventory plot - -DataSheet$set("public","make_inventory_plot", function(date_col, station_col = NULL, year_col = NULL, doy_col = NULL, element_cols = NULL, add_to_data = FALSE, - year_doy_plot = FALSE, coord_flip = FALSE, facet_by = NULL, facet_xsize = 9, facet_ysize = 9, facet_xangle = 90, - facet_yangle = 90, graph_title = "Inventory Plot", graph_subtitle = NULL, graph_caption = NULL, title_size = NULL, - subtitle_size = NULL, caption_size = NULL, labelXAxis, labelYAxis, xSize = NULL, ySize = NULL, - Xangle = NULL, Yangle = NULL, scale_xdate, fromXAxis = NULL, toXAxis = NULL, byXaxis = NULL, date_ylabels, - legend_position = NULL, xlabelsize = NULL, ylabelsize = NULL, scale = NULL, dir = "", row_col_number, - nrow = NULL, ncol = NULL, scale_ydate = FALSE, date_ybreaks, step = 1, key_colours = c("red", "grey"), - display_rain_days = FALSE, rain_cats = list(breaks = c(0, 0.85, Inf), labels = c("Dry", "Rain"), - key_colours = c("tan3", "blue"))) { - if(missing(date_col)) stop("Date columns must be specified.") - if(missing(element_cols)) stop("Element column(s) must be specified.") - if(!lubridate::is.Date(self$get_columns_from_data(date_col))) stop(paste(date_col, " must be of type Date.")) - - if(!all(element_cols %in% self$get_column_names())) { - stop("Not all elements columns found in the data") - } - - is_climatic <- self$is_climatic_data() - - # Add year and doy columns if doing year_doy plot - if(year_doy_plot) { - if(is.null(year_col)) { - if(is_climatic) { - if(is.null(self$get_climatic_column_name(year_label))) { - self$split_date(col_name = date_col, year_val = TRUE) - } - year_col <- self$get_climatic_column_name(year_label) - } - else { - self$split_date(col_name = date_col, year_val = TRUE) - # work around since the name of the new year column is not known from split_date - # TODO split_date could silently return a named character vector giving the columns created - col_names <- self$get_column_names() - year_col <- col_names[length(col_names)] - } - } - if(is.null(doy_col)) { - if(is_climatic) { - if(is.null(self$get_climatic_column_name(doy_label))) { - self$split_date(col_name = date_col, day_in_year_366 = TRUE) - } - doy_col <- self$get_climatic_column_name(doy_label) - } - else { - self$split_date(col_name = date_col, day_in_year_366 = TRUE) - # work around since the name of the new day_in_year column is not known from split_date - # TODO split_date could silently return a named character vector giving the columns created - col_names <- self$get_column_names() - doy_col <- col_names[length(col_names)] - } - } - } - - blank_y_axis <- ggplot2::theme(axis.text.y = ggplot2::element_blank(), axis.ticks.y = ggplot2::element_blank(), axis.line.y = ggplot2::element_blank()) - if(length(element_cols) == 1) { - curr_data <- self$get_data_frame() - elements <- curr_data[[element_cols]] - } - else { - if(!is.null(station_col)) { - curr_data <- self$get_data_frame(stack_data = TRUE, measure.vars = element_cols, id.vars=c(date_col, station_col, year_col, doy_col)) - } - else { - curr_data <- self$get_data_frame(stack_data = TRUE, measure.vars = element_cols, id.vars=c(date_col, year_col, doy_col)) - } - elements <- curr_data[["value"]] - } - - key_name <- next_default_item(prefix = "key", existing_names = names(curr_data), include_index = FALSE) - curr_data[[key_name]] <- factor(ifelse(is.na(elements), "Missing", "Present"), levels = c("Present", "Missing")) - - key <- c(key_colours) - names(key) <- c("Missing", "Present") - if(display_rain_days) { - levels(curr_data[[key_name]]) <- c(levels(curr_data[[key_name]]), rain_cats$labels) - if(is_climatic) { - rain_col <- self$get_climatic_column_name(rain_label) - } - else { - warning("Cannot determine rain column automatically. Taking first element specified as the rain column.") - #TODO allow the user to specify this in the function when the data is not climatic - rain_col <- element_cols[1] - } - if(!is.null(rain_col) && rain_col %in% element_cols) { - if(length(element_cols) > 1) { - curr_data[[key_name]][curr_data[["variable"]] == rain_col & curr_data[[key_name]] != "Missing"] <- cut(curr_data[["value"]][curr_data[["variable"]] == rain_col & curr_data[[key_name]] != "Missing"], breaks = rain_cats$breaks, labels = rain_cats$labels, right = FALSE) - key <- c(key_colours, rain_cats$key_colours) - names(key) <- c("Missing", "Present",rain_cats$labels) - } - else { - curr_data[[key_name]][curr_data[[key_name]] != "Missing"] <- cut(curr_data[[rain_col]][curr_data[[key_name]] != "Missing"], breaks = rain_cats$breaks, labels = rain_cats$labels, right = FALSE) - key <- c(key_colours[1], rain_cats$key_colours) - names(key) <- c("Missing", rain_cats$labels) - } - } - } - if(year_doy_plot) { - curr_data[["common_date"]] <- as.Date(paste0("2000-", curr_data[[doy_col]]), "%Y-%j") - g <- ggplot2::ggplot(data = curr_data, mapping = ggplot2::aes_(x = as.name(year_col), y = as.name("common_date"), colour = as.name(key_name))) + ggplot2::geom_point(size=5, shape=15) + ggplot2::scale_colour_manual(values = key) + ggplot2::scale_y_date(date_breaks = "2 month", labels = function(x) format(x, "%e %b")) - if(!is.null(station_col) && length(element_cols) > 1) { - if(is.null(facet_by)) { - message("facet_by not specified. facets will be by stations-elements.") - facet_by <- "stations-elements" - } - else if(facet_by == "stations") { - warning("facet_by = stations. facet_by must be either stations-elements or elements-stations when there are multiple of both. Using stations-elements.") - facet_by <- "stations-elements" - } - else if(facet_by == "elements") { - warning("facet_by = elements. facet_by must be either stations-elements or elements-stations when there are multiple of both. Using elements-stations.") - facet_by <- "elements-stations" - } - - if(facet_by == "stations-elements") { - if(!missing(row_col_number)){ - g <- g + ggplot2::facet_wrap(facets = as.formula(paste(".~",station_col, "+ variable")), nrow = nrow, ncol = ncol, scales = scale, dir = dir) - }else {g <- g + ggplot2::facet_grid(facets = as.formula(paste(station_col, "~variable")))} - } - else if(facet_by == "elements-stations") { - if(!missing(row_col_number)){ - g <- g + ggplot2::facet_wrap(facets = as.formula(paste(".~variable +",station_col)), nrow = nrow, ncol = ncol, scales = scale, dir = dir) - }else {g <- g + ggplot2::facet_grid(facets = as.formula(paste("variable~",station_col)))} - } - else stop("invalid facet_by value:", facet_by) - } - else if(!is.null(station_col)) { - g <- g + ggplot2::facet_grid(facets = as.formula(paste(station_col, "~."))) - if(graph_title == "Inventory Plot") { - graph_title <- paste0(graph_title, ": ", element_cols) - } - } - else if(length(element_cols) > 1) { - if(!missing(row_col_number)){ - g <- g + ggplot2::facet_wrap(.~variable, nrow = nrow, ncol = ncol, scales = scale, dir = dir) - }else {g <- g + ggplot2::facet_grid(facets = variable~.)} - - } - if(!missing(scale_xdate)){ g <- g + ggplot2::scale_x_continuous(breaks=seq(fromXAxis, toXAxis, byXaxis)) } - if(scale_ydate && !missing(date_ybreaks) && !missing(date_ylabels)){ g <- g + ggplot2::scale_y_date(breaks = seq(min(curr_data[["common_date"]]), max(curr_data[["common_date"]]), by = paste0(step," ",date_ybreaks)), date_labels = date_ylabels) } - } - else { - g <- ggplot2::ggplot(data = curr_data, ggplot2::aes_(x = as.name(date_col), y = 1, fill = as.name(key_name))) + ggplot2::geom_raster() + ggplot2::scale_fill_manual(values = key) + ggplot2::scale_x_date(date_minor_breaks = "1 year") - if(!is.null(station_col) && length(element_cols) > 1) { - if(is.null(facet_by) || facet_by == "stations") { - if(is.null(facet_by)) message("facet_by not specified. facets will be by stations.") - if(!missing(row_col_number)){ - g <- g + ggplot2::facet_wrap(facets = as.formula(paste(station_col, "+ variable~.")), nrow = nrow, ncol = ncol, scales = scale, dir = dir) + blank_y_axis + ggplot2::scale_y_continuous(breaks = NULL) + ggplot2::labs(y = NULL) - } - else{ - g <- g + ggplot2::facet_grid(facets = as.formula(paste(station_col, "+ variable~."))) + blank_y_axis + ggplot2::scale_y_continuous(breaks = NULL) + ggplot2::labs(y = NULL) - } - } - else if(facet_by == "elements") { - if(!missing(row_col_number)){ - g <- g + ggplot2::facet_wrap(facets = as.formula(paste("variable +", station_col, "~.")), nrow = nrow, ncol = ncol, scales = scale, dir = dir) + blank_y_axis + ggplot2::scale_y_continuous(breaks = NULL) + ggplot2::labs(y = NULL) - }else{ - g <- g + ggplot2::facet_grid(facets = as.formula(paste("variable +", station_col, "~."))) + blank_y_axis + ggplot2::scale_y_continuous(breaks = NULL) + ggplot2::labs(y = NULL) - } - } - else if(facet_by == "stations-elements") { - if(!missing(row_col_number)){ - g <- g + ggplot2::facet_wrap(facets = as.formula(paste(".~",station_col, "+ variable")), nrow = nrow, ncol = ncol, scales = scale, dir = dir) + blank_y_axis + ggplot2::scale_y_continuous(breaks = NULL) + ggplot2::labs(y = NULL) - - } - else{ - g <- g + ggplot2::facet_grid(facets = as.formula(paste(station_col, "~variable"))) + blank_y_axis + ggplot2::scale_y_continuous(breaks = NULL) + ggplot2::labs(y = NULL) - - } - } - else if(facet_by == "elements-stations") { - if(!missing(row_col_number)){ - g <- g + ggplot2::facet_wrap(facets = as.formula(paste(".~variable +",station_col)), nrow = nrow, ncol = ncol, scales = scale, dir = dir) + blank_y_axis + ggplot2::scale_y_continuous(breaks = NULL) + ggplot2::labs(y = NULL) - - } - else{ - g <- g + ggplot2::facet_grid(facets = as.formula(paste("variable~", station_col))) + blank_y_axis + ggplot2::scale_y_continuous(breaks = NULL) + ggplot2::labs(y = NULL) - - } - } - else stop("invalid facet_by value:", facet_by) - } - else if(!is.null(station_col)) { - if(!is.factor(curr_data[[station_col]])) curr_data[[station_col]] <- factor(curr_data[[station_col]]) - g <- ggplot2::ggplot(data = curr_data, ggplot2::aes_(x = as.name(date_col), y = as.name(station_col), fill = as.name(key_name))) + ggplot2::geom_raster() + ggplot2::scale_fill_manual(values = key) + ggplot2::scale_x_date(date_minor_breaks = "1 year") + ggplot2::geom_hline(yintercept = seq(0.5, by = 1, length.out = length(levels(curr_data[[station_col]])) + 1)) - if(graph_title == "Inventory Plot") { - graph_title <- paste0(graph_title, ": ", element_cols) - } - } - else if(length(element_cols) > 1) { - g <- ggplot2::ggplot(data = curr_data, ggplot2::aes_(x = as.name(date_col), y = as.name("variable"), fill = as.name(key_name))) + ggplot2::geom_raster() + ggplot2::scale_fill_manual(values = key) + ggplot2::scale_x_date(date_minor_breaks = "1 year") + ggplot2::geom_hline(yintercept = seq(0.5, by = 1, length.out = length(levels(curr_data[["variable"]])) + 1)) + ggplot2::labs(y = "Elements") - } - else { - g <- ggplot2::ggplot(data = curr_data, ggplot2::aes_(x = as.name(date_col), y = 1, fill = as.name(key_name))) + ggplot2::geom_raster() + ggplot2::scale_fill_manual(values = key) + ggplot2::scale_x_date(date_minor_breaks = "1 year") + ggplot2::geom_hline(yintercept = seq(0.5, by = 1, length.out = length(levels(curr_data[["variable"]])) + 1)) + blank_y_axis + ggplot2::scale_y_continuous(breaks = NULL) + ggplot2::labs(y = element_cols) - } - if(!missing(scale_xdate)){ g <- g + ggplot2::scale_x_date(breaks = paste0(byXaxis," year"), limits = c(from=as.Date(paste0(fromXAxis,"-01-01")), to = as.Date(paste0(toXAxis,"-12-31"))), date_labels = "%Y") } - } - if(coord_flip) { - g <- g + ggplot2::coord_flip() - } - if(!missing(labelXAxis)){g <- g + ggplot2::xlab(labelXAxis)}else{g <- g + ggplot2::xlab(NULL)} - if(!missing(labelYAxis)){g <- g + ggplot2::ylab(labelYAxis)}else{g <- g + ggplot2::ylab(NULL)} - return(g + ggplot2::labs(title = graph_title, subtitle = graph_subtitle, caption = graph_caption) + ggplot2::theme(strip.text.x = element_text(margin = margin(1, 0, 1, 0), size = facet_xsize, angle = facet_xangle), strip.text.y = element_text(margin = margin(1, 0, 1, 0), size = facet_ysize, angle = facet_yangle), legend.position=legend_position, plot.title = ggplot2::element_text(hjust = 0.5, size = title_size), plot.subtitle = ggplot2::element_text(size = subtitle_size), plot.caption = ggplot2::element_text(size = caption_size), axis.text.x = ggplot2::element_text(size=xSize, angle = Xangle, vjust = 0.6), axis.title.x = ggplot2::element_text(size=xlabelsize), axis.title.y = ggplot2::element_text(size=ylabelsize), axis.text.y = ggplot2::element_text(size = ySize, angle = Yangle, hjust = 0.6))) -} -) - -DataSheet$set("public","infill_missing_dates", function(date_name, factors, start_month, start_date, end_date, resort = TRUE) { - date_col <- self$get_columns_from_data(date_name) - if(!lubridate::is.Date(date_col)) stop("date_col is not a Date column.") - if(anyNA(date_col)) stop("Cannot do infilling as date column has missing values") - if(!missing(start_date) && !lubridate::is.Date(start_date)) stop("start_date is not of type Date") - if(!missing(end_date) && !lubridate::is.Date(end_date)) stop("end_date is not of type Date") - if(!missing(start_month) && !is.numeric(start_month)) stop("start_month is not numeric") - if(!missing(start_month)) end_month <- ((start_month - 2) %% 12) + 1 - - min_date <- min(date_col) - max_date <- max(date_col) - if(!missing(start_date)) { - if(start_date > min_date) stop("Start date cannot be greater than earliest date") - } - if(!missing(end_date)) { - if(end_date < max_date) stop("End date cannot be less than latest date") - } - - if(missing(factors)) { - if(anyDuplicated(date_col) > 0) stop("Cannot do infilling as date column has duplicate values.") - - if(!missing(start_date) | !missing(end_date)) { - if(!missing(start_date)) { - min_date <- start_date - } - if(!missing(end_date)) { - max_date <- end_date - } - } - else if(!missing(start_month)) { - if(start_month <= lubridate::month(min_date)) min_date <- as.Date(paste(lubridate::year(min_date), start_month, 1, sep = "-"), format = "%Y-%m-%d") - else min_date <- as.Date(paste(lubridate::year(min_date) - 1, start_month, 1, sep = "-"), format = "%Y-%m-%d") - if(end_month >= lubridate::month(max_date)) max_date <- as.Date(paste(lubridate::year(max_date), end_month, lubridate::days_in_month(as.Date(paste(lubridate::year(max_date), end_month, 1, sep = "-", format = "%Y-%m-%d"))), sep = "-"), format = "%Y-%m-%d") - else max_date <- as.Date(paste(lubridate::year(max_date) + 1, end_month, lubridate::days_in_month(as.Date(paste(lubridate::year(max_date) + 1, end_month, 1, sep = "-"))), sep = "-", format = "%Y-%m-%d"), format = "%Y-%m-%d") - } - full_dates <- seq(min_date, max_date, by = "day") - if(length(full_dates) > length(date_col)) { - cat("Added", (length(full_dates) - length(date_col)), "rows to extend data and fill date gaps", "\n") - full_dates <- data.frame(full_dates) - names(full_dates) <- date_name - by <- date_name - names(by) <- date_name - self$merge_data(full_dates, by = by, type = "full") - if(resort) self$sort_dataframe(col_names = date_name) - } - else cat("No missing dates to infill") - } - else { - merge_required <- FALSE - col_names_exp <- c() - for(i in seq_along(factors)) { - col_name <- factors[i] - col_names_exp[[i]] <- lazyeval::interp(~ var, var = as.name(col_name)) - } - all_factors <- self$get_columns_from_data(factors, use_current_filter = FALSE) - first_factor <- self$get_columns_from_data(factors[1], use_current_filter = FALSE) - if(dplyr::n_distinct(interaction(all_factors, drop = TRUE))!= dplyr::n_distinct(first_factor)) stop("The multiple factor variables are not in sync. Should have same number of levels.") - grouped_data <- self$get_data_frame(use_current_filter = FALSE) %>% dplyr::group_by_(.dots = col_names_exp) - # TODO - date_ranges <- grouped_data %>% dplyr::summarise_(.dots = setNames(list(lazyeval::interp(~ min(var), var = as.name(date_name)), lazyeval::interp(~ max(var), var = as.name(date_name))), c("min_date", "max_date"))) - date_lengths <- grouped_data %>% dplyr::summarise(count = n()) - if(!missing(start_date) | !missing(end_date)) { - if(!missing(start_date)) { - date_ranges$min_date <- start_date - } - if(!missing(end_date)) { - date_ranges$max_date <- end_date - } - } - else if(!missing(start_month)) { - date_ranges$min_date <- dplyr::if_else(lubridate::month(date_ranges$min_date) >= start_month, - as.Date(paste(lubridate::year(date_ranges$min_date), start_month, 1, sep = "-"), format = "%Y-%m-%d"), - as.Date(paste(lubridate::year(date_ranges$min_date) - 1, start_month, 1, sep = "-"), format = "%Y-%m-%d")) - date_ranges$max_date <- dplyr::if_else(lubridate::month(date_ranges$max_date) <= end_month, - as.Date(paste(lubridate::year(date_ranges$max_date), end_month, lubridate::days_in_month(as.Date(paste(lubridate::year(date_ranges$max_date), end_month, 1, sep = "-"), format = "%Y-%m-%d")), sep = "-"), format = "%Y-%m-%d"), - as.Date(paste(lubridate::year(date_ranges$max_date) + 1, end_month, lubridate::days_in_month(as.Date(paste(lubridate::year(date_ranges$max_date), end_month, 1, sep = "-"), format = "%Y-%m-%d")), sep = "-"), format = "%Y-%m-%d")) - } - full_dates_list <- list() - for(j in 1:nrow(date_ranges)) { - full_dates <- seq(date_ranges$min_date[j], date_ranges$max_date[j], by = "day") - if(length(full_dates) > date_lengths[,"count"][j,]) { - cat(paste(unlist(date_ranges[1:length(factors)][j, ]), collapse = "-"), ": Added", (length(full_dates) - unlist(date_lengths[,"count"][j,])), "rows to extend data and fill date gaps", "\n") - merge_required <- TRUE - } - full_dates <- data.frame(full_dates) - names(full_dates) <- date_name - for(k in seq_along(factors)) { - full_dates[[factors[k]]] <- date_ranges[[k]][j] - } - full_dates_list[[j]] <- full_dates - } - if(merge_required) { - all_dates_factors <- plyr::rbind.fill(full_dates_list) - by <- c(date_name, factors) - names(by) <- by - self$merge_data(all_dates_factors, by = by, type = "full") - if(resort) self$sort_dataframe(col_names = c(factors, date_name)) - } - else cat("No missing dates to infill") - } - #Added this line to fix the bug of having the variable names in the metadata changinng to NA - # This affects factor columns only - we need to find out why and how to solve it best - self$add_defaults_variables_metadata(self$get_column_names()) -} -) - -DataSheet$set("public","get_key_names", function(include_overall = TRUE, include, exclude, include_empty = FALSE, as_list = FALSE, excluded_items = c()) { - key_names <- names(private$keys) - if(as_list) { - out <- list() - out[[self$get_metadata(data_name_label)]] <- key_names - } - else out <- key_names - return(out) -} -) - -# Labels for climatic column types -### Primary corruption column types -corruption_country_label="country" -corruption_region_label="region" -corruption_procuring_authority_label="procuring_authority" -corruption_award_date_label="award_date" -corruption_fiscal_year_label="fiscal_year" -corruption_signature_date_label="signature_date" -corruption_contract_title_label="contract_title" -corruption_contract_sector_label="contract_sector" -corruption_procurement_category_label="procurement_category" -corruption_winner_name_label="winner_name" -corruption_winner_country_label="winner_country" -corruption_original_contract_value_label="original_contract_value" -corruption_no_bids_received_label="no_bids_received" -corruption_no_bids_considered_label="no_bids_considered" -corruption_method_type_label="method_type" - -all_primary_corruption_column_types <- c(corruption_country_label, - corruption_region_label, - corruption_procuring_authority_label, - corruption_award_date_label, - corruption_fiscal_year_label, - corruption_signature_date_label, - corruption_contract_title_label, - corruption_contract_sector_label, - corruption_procurement_category_label, - corruption_winner_name_label, - corruption_winner_country_label, - corruption_original_contract_value_label, - corruption_no_bids_received_label, - corruption_no_bids_considered_label, - corruption_method_type_label) - -### Calculated corruption column types -corruption_award_year_label="award_year" -corruption_procedure_type_label="procedure_type" -corruption_country_iso2_label="country_iso2" -corruption_country_iso3_label="country_iso3" -corruption_w_country_iso2_label="w_country_iso2" -corruption_w_country_iso3_label="w_country_iso3" -corruption_procuring_authority_id_label="procuring_authority_id" -corruption_winner_id_label="winner_id" -corruption_foreign_winner_label="foreign_winner" -corruption_ppp_conversion_rate_label="ppp_conversion_rate" -corruption_ppp_adjusted_contract_value_label="ppp_adjusted_contr_value" -corruption_contract_value_cats_label="contr_value_cats" -corruption_procurement_type_cats_label="procurement_type_cats" -corruption_procurement_type_2_label="procurement_type2" -corruption_procurement_type_3_label="procurement_type3" -corruption_signature_period_label="signature_period" -corruption_signature_period_corrected_label="signature_period_corrected" -corruption_signature_period_5Q_label="signature_period5Q" -corruption_signature_period_25Q_label="signature_period25Q" -corruption_signature_period_cats_label="signature_period_cats" -corruption_secrecy_score_label="secrecy_score" -corruption_tax_haven_label="tax_haven" -corruption_tax_haven2_label="tax_haven2" -corruption_tax_haven3_label="tax_haven3" -corruption_tax_haven3bi_label="tax_haven3bi" -corruption_roll_num_winner_label="roll_num_winner" -corruption_roll_num_issuer_label="roll_num_issuer" -corruption_roll_sum_winner_label="roll_sum_winner" -corruption_roll_sum_issuer_label="roll_sum_issuer" -corruption_roll_share_winner_label="roll_share_winner" -corruption_single_bidder_label="single_bidder" -corruption_all_bids_label="all_bids" -corruption_all_bids_trimmed_label="all_bids_trimmed" -corruption_contract_value_share_over_threshold_label="contract_value_share_over_threshold" - -all_calculated_corruption_column_types <- c(corruption_award_year_label, - corruption_procedure_type_label, - corruption_country_iso2_label, - corruption_country_iso3_label, - corruption_w_country_iso2_label, - corruption_w_country_iso3_label, - corruption_procuring_authority_id_label, - corruption_winner_id_label, - corruption_procedure_type_label, - corruption_foreign_winner_label, - corruption_ppp_conversion_rate_label, - corruption_ppp_adjusted_contract_value_label, - corruption_contract_value_cats_label, - corruption_procurement_type_cats_label, - corruption_procurement_type_2_label, - corruption_procurement_type_3_label, - corruption_signature_period_label, - corruption_signature_period_corrected_label, - corruption_signature_period_5Q_label, - corruption_signature_period_25Q_label, - corruption_signature_period_cats_label, - corruption_secrecy_score_label, - corruption_tax_haven_label, - corruption_tax_haven2_label, - corruption_tax_haven3_label, - corruption_tax_haven3bi_label, - corruption_roll_num_winner_label, - corruption_roll_num_issuer_label, - corruption_roll_sum_winner_label, - corruption_roll_sum_issuer_label, - corruption_roll_share_winner_label, - corruption_single_bidder_label, - corruption_all_bids_label, - corruption_all_bids_trimmed_label, - corruption_contract_value_share_over_threshold_label -) - -corruption_ctry_iso2_label="iso2" -corruption_ctry_iso3_label="iso3" -corruption_ctry_ss_2009_label="ss_2009" -corruption_ctry_ss_2011_label="ss_2011" -corruption_ctry_ss_2013_label="ss_2013" -corruption_ctry_ss_2015_label="ss_2015" -corruption_ctry_small_state_label="small_state" - -all_primary_corruption_country_level_column_types <- c(corruption_country_label, - corruption_ctry_iso2_label, - corruption_ctry_iso3_label, - corruption_ctry_ss_2009_label, - corruption_ctry_ss_2011_label, - corruption_ctry_ss_2013_label, - corruption_ctry_ss_2015_label, - corruption_ctry_small_state_label -) - -# Column metadata for corruption colums -corruption_type_label = "Procurement_Type" -corruption_output_label = "Is_Corruption_Risk_Output" -corruption_red_flag_label = "Is_Corruption_Red_Flag" -corruption_index_label = "Is_CRI_Component" - -# Data frame metadata for corruption dataframes -corruption_data_label = "Is_Procurement_Data" -corruption_contract_level_label = "Contract_Level" -corruption_country_level_label = "Country_Level" - - -DataBook$set("public","define_corruption_outputs", function(data_name, output_columns = c()) { - self$get_data_objects(data_name)$define_corruption_outputs(output_columns) -} -) - -DataSheet$set("public","define_corruption_outputs", function(output_columns = c()) { - all_cols <- self$get_column_names() - if(!self$is_metadata(corruption_data_label)) { - stop("Cannot define corruption outputs when data frame is not defined as corruption data.") - } - self$append_to_variables_metadata(output_columns, corruption_output_label, TRUE) - self$append_to_variables_metadata(output_columns, corruption_index_label, TRUE) - other_cols <- setdiff(all_cols, output_columns) - self$append_to_variables_metadata(other_cols, corruption_output_label, FALSE) -} -) - -DataBook$set("public","define_red_flags", function(data_name, red_flags = c()) { - self$get_data_objects(data_name)$define_red_flags(red_flags) -} -) - -DataSheet$set("public","define_red_flags", function(red_flags = c()) { - if(!self$is_metadata(corruption_data_label)) { - stop("Cannot define red flags when data frame is not defined as procurement data.") - } - self$append_to_variables_metadata(red_flags, corruption_red_flag_label, TRUE) - self$append_to_variables_metadata(red_flags, corruption_index_label, TRUE) - other_cols <- self$get_column_names()[!self$get_column_names() %in% red_flags] - self$append_to_variables_metadata(other_cols, corruption_red_flag_label, FALSE) -} -) - -DataBook$set("public","define_as_procurement", function(data_name, primary_types = c(), calculated_types = c(), country_data_name, country_types, auto_generate = TRUE) { - self$append_to_dataframe_metadata(data_name, corruption_data_label, corruption_contract_level_label) - self$get_data_objects(data_name)$set_procurement_types(primary_types, calculated_types, auto_generate) - if(!missing(country_data_name)) { - self$define_as_procurement_country_level_data(data_name = country_data_name, contract_level_data_name = data_name, types = country_types, auto_generate = auto_generate) - } -} -) - -DataBook$set("public","define_as_procurement_country_level_data", function(data_name, contract_level_data_name, types = c(), auto_generate = TRUE) { - self$append_to_dataframe_metadata(data_name, corruption_data_label, corruption_country_level_label) - self$get_data_objects(data_name)$define_as_procurement_country_level_data(types, auto_generate) - contract_level_country_name <- self$get_corruption_column_name(contract_level_data_name, corruption_country_label) - country_level_country_name <- self$get_corruption_column_name(data_name, corruption_country_label) - if(contract_level_country_name == "" || country_level_country_name == "") stop("country column must be defined in the contract level data and country level data.") - link_pairs <- country_level_country_name - names(link_pairs) <- contract_level_country_name - self$add_link(from_data_frame = contract_level_data_name, to_data_frame = data_name, link_pairs = link_pairs, type = keyed_link_label) -} -) - -DataSheet$set("public","define_as_procurement_country_level_data", function(types = c(), auto_generate = TRUE) { - invisible(sapply(names(types), function(x) self$append_to_variables_metadata(types[[x]], corruption_type_label, x))) -} -) - -DataSheet$set("public","is_corruption_type_present", function(type) { - return(self$is_metadata(corruption_data_label) && !is.na(self$get_metadata(corruption_data_label)) && self$is_variables_metadata(corruption_type_label) && (type %in% self$get_variables_metadata(property = corruption_type_label))) -} -) - -DataBook$set("public","get_CRI_component_column_names", function(data_name) { - self$get_data_objects(data_name)$get_CRI_component_column_names() -} -) - -DataSheet$set("public","get_CRI_component_column_names", function() { - include <- list(TRUE) - names(include) <- corruption_index_label - return(self$get_column_names(include = include)) -} -) - -DataBook$set("public","get_red_flag_column_names", function(data_name) { - self$get_data_objects(data_name)$get_red_flag_column_names() -} -) - -DataSheet$set("public","get_red_flag_column_names", function() { - include <- list(TRUE) - names(include) <- corruption_red_flag_label - return(self$get_column_names(include = include)) -} -) - -DataBook$set("public","get_CRI_column_names", function(data_name) { - self$get_data_objects(data_name)$get_CRI_column_names() -} -) - -# Temporary since metadata not added to CRI columns when calculated -DataSheet$set("public","get_CRI_column_names", function() { - col_names <- self$get_column_names() - CRI_cols <- col_names[startsWith(col_names, "CRI")] - return(CRI_cols) -} -) - -DataBook$set("public","get_corruption_column_name", function(data_name, type) { - self$get_data_objects(data_name)$get_corruption_column_name(type) -} -) - -DataSheet$set("public","get_corruption_column_name", function(type) { - if(self$is_corruption_type_present(type)) { - var_metadata <- self$get_variables_metadata() - col_name <- var_metadata[!is.na(var_metadata[[corruption_type_label]]) & var_metadata[[corruption_type_label]] == type, name_label] - if(length(col_name >= 1)) return(col_name) - else return("") - } - return("") -} -) - -DataSheet$set("public","set_procurement_types", function(primary_types = c(), calculated_types = c(), auto_generate = TRUE) { - if(!all(names(primary_types) %in% all_primary_corruption_column_types)) stop("Cannot recognise the following primary corruption data types: ", paste(names(primary_types)[!names(primary_types) %in% all_primary_corruption_column_types], collapse = ", ")) - if(!all(names(calculated_types) %in% all_calculated_corruption_column_types)) stop("Cannot recognise the following calculated corruption data types: ", paste(names(calculated_types)[!names(calculated_types) %in% all_calculated_corruption_column_types], collapse = ", ")) - if(!all(c(primary_types, calculated_types) %in% self$get_column_names())) stop("The following columns do not exist in the data:", paste(c(primary_types, calculated_types)[!(c(primary_types, calculated_types) %in% self$get_column_names())], collapse = ", ")) - invisible(sapply(names(primary_types), function(x) self$append_to_variables_metadata(primary_types[[x]], corruption_type_label, x))) - invisible(sapply(names(calculated_types), function(x) self$append_to_variables_metadata(calculated_types[[x]], corruption_type_label, x))) - if(auto_generate) { - # Tried to make these independent of order called, but need to test - self$generate_award_year() - self$generate_procedure_type() - self$generate_procuring_authority_id() - self$generate_winner_id() - self$generate_foreign_winner() - self$generate_procurement_type_categories() - self$generate_procurement_type_2() - self$generate_procurement_type_3() - self$generate_signature_period() - self$generate_signature_period_corrected() - self$generate_signature_period_5Q() - self$generate_signature_period_25Q() - self$generate_rolling_contract_no_winners() - self$generate_rolling_contract_no_issuer() - self$generate_rolling_contract_value_sum_issuer() - self$generate_rolling_contract_value_sum_winner() - self$generate_rolling_contract_value_share_winner() - self$generate_single_bidder() - self$generate_contract_value_share_over_threshold() - self$generate_all_bids() - self$generate_all_bids_trimmed() - } -} -) - -DataSheet$set("public","generate_award_year", function() { - if(!self$is_corruption_type_present(corruption_award_year_label)) { - if(!self$is_corruption_type_present(corruption_award_date_label)) message("Cannot auto generate ", corruption_award_year_label, " because ", corruption_award_date_label, " column is not present.") - else { - award_date <- self$get_columns_from_data(self$get_corruption_column_name(corruption_award_date_label)) - if(!lubridate::is.Date(award_date)) message(message("Cannot auto generate ", corruption_award_year_label, " because ", corruption_award_date_label, " column is not of type Date.")) - else { - col_name <- next_default_item(corruption_award_year_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, year(award_date)) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_award_year_label) - self$append_to_variables_metadata(col_name, "label", "Award year") - } - } - } -} -) - -DataSheet$set("public","generate_procedure_type", function() { - if(!self$is_corruption_type_present(corruption_procedure_type_label)) { - if(!self$is_corruption_type_present(corruption_method_type_label)) message("Cannot auto generate ", corruption_procedure_type_label, " because ", corruption_method_type_label, " is not defined.") - else { - procedure_type <- self$get_columns_from_data(self$get_corruption_column_name(corruption_method_type_label)) - procedure_type[procedure_type == "CQS"] <- "Selection Based On Consultant's Qualification" - procedure_type[procedure_type == "SHOP"] <- "International Shopping" - procedure_type <- factor(procedure_type, levels = c("Commercial Practices", "Direct Contracting", "Force Account", "INDB", "Individual", "International Competitive Bidding", "International Shopping", "Least Cost Selection", "Limited International Bidding", "National Competitive Bidding", "National Shopping", "Quality And Cost-Based Selection", "Quality Based Selection", "Selection Based On Consultant's Qualification", "Selection Under a Fixed Budget", "Service Delivery Contracts", "Single Source Selection")) - - col_name <- next_default_item(corruption_procedure_type_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, procedure_type) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_procedure_type_label) - self$append_to_variables_metadata(col_name, "label", "Procedure type") - } - } -} -) - -DataSheet$set("public","generate_procuring_authority_id", function() { - if(!self$is_corruption_type_present(corruption_procuring_authority_id_label)) { - if(!self$is_corruption_type_present(corruption_procuring_authority_label) | !self$is_corruption_type_present(corruption_country_label)) message("Cannot auto generate ", corruption_procuring_authority_id_label, " because ", corruption_procuring_authority_label, "or ", corruption_award_year_label, " is not defined.") - else { - id <- as.numeric(factor(paste0(self$get_columns_from_data(self$get_corruption_column_name(corruption_country_label)), self$get_columns_from_data(self$get_corruption_column_name(corruption_procuring_authority_label))), levels = unique(paste0(self$get_columns_from_data(self$get_corruption_column_name(corruption_country_label)), self$get_columns_from_data(self$get_corruption_column_name(corruption_procuring_authority_label)))))) - - col_name <- next_default_item(corruption_procuring_authority_id_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, id) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_procuring_authority_id_label) - self$append_to_variables_metadata(col_name, "label", "Procurement Auth. ID") - } - } -} -) - -DataSheet$set("public","generate_winner_id", function() { - if(!self$is_corruption_type_present(corruption_winner_id_label)) { - if(!self$is_corruption_type_present(corruption_winner_name_label)) message("Cannot auto generate ", corruption_winner_id_label, " because ", corruption_winner_name_label, " is not defined.") - else { - id <- as.numeric(factor(self$get_columns_from_data(self$get_corruption_column_name(corruption_winner_name_label)), levels = unique(self$get_columns_from_data(self$get_corruption_column_name(corruption_winner_name_label))))) - - col_name <- next_default_item(corruption_winner_id_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, id) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_winner_id_label) - self$append_to_variables_metadata(col_name, "label", "w_name ID") - } - } -} -) - -DataSheet$set("public","generate_foreign_winner", function() { - if(!self$is_corruption_type_present(corruption_foreign_winner_label)) { - if(!self$is_corruption_type_present(corruption_country_label) || !self$is_corruption_type_present(corruption_winner_country_label)) message("Cannot auto generate ", corruption_foreign_winner_label, " because ", corruption_country_label, " or ", corruption_winner_country_label, " are not defined.") - else { - f_winner <- (self$get_columns_from_data(self$get_corruption_column_name(corruption_country_label)) != self$get_columns_from_data(self$get_corruption_column_name(corruption_winner_country_label))) - - col_name <- next_default_item(corruption_foreign_winner_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, f_winner) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_foreign_winner_label) - self$append_to_variables_metadata(col_name, "label", "Foreign w_name dummy") - } - } -} -) - -DataSheet$set("public","generate_procurement_type_categories", function() { - if(!self$is_corruption_type_present(corruption_procurement_type_cats_label)) { - if(!self$is_corruption_type_present(corruption_procedure_type_label)) message("Cannot auto generate ", corruption_procurement_type_cats_label, " because ", corruption_procedure_type_label, " are not defined.") - else { - procedure_type <- self$get_columns_from_data(self$get_corruption_column_name(corruption_procedure_type_label)) - procurement_type <- "other, missing" - procurement_type[procedure_type == "Direct Contracting" | procedure_type == "Individual" | procedure_type == "Single Source Selection"] <- "single source" - procurement_type[procedure_type == "Force Account" | procedure_type == "Service Delivery Contracts"] <- "own provision" - procurement_type[procedure_type == "International Competitive Bidding" | procedure_type == "National Competitive Bidding"] <- "open" - procurement_type[procedure_type == "International Shopping" | procedure_type == "Limited International Bidding" | procedure_type == "National Shopping"] <- "restricted" - procurement_type[procedure_type == "Quality And Cost-Based Selection" | procedure_type == "Quality Based Selection" | procedure_type == "Selection Under a Fixed Budget"] <- "consultancy,cost" - procurement_type[procedure_type == "Least Cost Selection" | procedure_type == "Selection Based On Consultant's Qualification"] <- "consultancy,cost" - procurement_type <- factor(procurement_type, levels = c("open", "restricted", "single source", "consultancy,quality", "consultancy,cost", "own provision", "other, missing")) - - col_name <- next_default_item(corruption_procurement_type_cats_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, procurement_type) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_procurement_type_cats_label) - self$append_to_variables_metadata(col_name, "label", "Main procurement type category") - } - } -} -) - -DataSheet$set("public","generate_procurement_type_2", function() { - if(!self$is_corruption_type_present(corruption_procurement_type_2_label)) { - if(!self$is_corruption_type_present(corruption_procurement_type_cats_label)) message("Cannot auto generate ", corruption_procurement_type_2_label, " because ", corruption_procurement_type_cats_label, " are not defined.") - else { - procurement_type_cats <- self$get_columns_from_data(self$get_corruption_column_name(corruption_procurement_type_cats_label)) - procurement_type2 <- NA - procurement_type2[procurement_type_cats == "open"] <- FALSE - procurement_type2[procurement_type_cats == "restricted" | procurement_type_cats == "single source" | procurement_type_cats == "consultancy,quality" | procurement_type_cats == "consultancy,cost"] <- TRUE - - col_name <- next_default_item(corruption_procurement_type_2_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, procurement_type2) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_procurement_type_2_label) - self$append_to_variables_metadata(col_name, "label", "Proc. type is restricted, single source, consultancy") - } - } -} -) - -DataSheet$set("public","generate_procurement_type_3", function() { - if(!self$is_corruption_type_present(corruption_procurement_type_3_label)) { - if(!self$is_corruption_type_present(corruption_procurement_type_cats_label)) message("Cannot auto generate ", corruption_procurement_type_3_label, " because ", corruption_procurement_type_cats_label, " are not defined.") - else { - procurement_type_cats <- self$get_columns_from_data(self$get_corruption_column_name(corruption_procurement_type_cats_label)) - procurement_type3 <- NA - procurement_type3[procurement_type_cats == "open"] <- "open procedure" - procurement_type3[procurement_type_cats == "restricted" | procurement_type_cats == "single source"] <- "closed procedure risk" - procurement_type3[procurement_type_cats == "consultancy,quality" | procurement_type_cats == "consultancy,cost"] <- "consultancy spending risk" - procurement_type3 <- factor(procurement_type3, levels = c("open procedure", "closed procedure risk", "consultancy spending risk")) - - col_name <- next_default_item(corruption_procurement_type_3_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, procurement_type3) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_procurement_type_3_label) - self$append_to_variables_metadata(col_name, "label", "Procedure type (open, closed, consultancy)") - } - } -} -) - -DataSheet$set("public","generate_signature_period", function() { - if(!self$is_corruption_type_present(corruption_signature_period_label)) { - if(!self$is_corruption_type_present(corruption_award_date_label) || !self$is_corruption_type_present(corruption_signature_date_label)) message("Cannot auto generate ", corruption_signature_period_label, " because ", corruption_award_date_label, "or", corruption_signature_date_label, " are not defined.") - award_date <- self$get_columns_from_data(self$get_corruption_column_name(corruption_award_date_label)) - sign_date <- self$get_columns_from_data(self$get_corruption_column_name(corruption_signature_date_label)) - if(!lubridate::is.Date(award_date) || !lubridate::is.Date(sign_date)) message("Cannot auto generate ", corruption_signature_period_label, " because ", corruption_award_date_label, " or ", corruption_signature_date_label, " are not of type Date.") - else { - signature_period <- self$get_columns_from_data(self$get_corruption_column_name(corruption_signature_date_label)) - self$get_columns_from_data(self$get_corruption_column_name(corruption_award_date_label)) - col_name <- next_default_item(corruption_signature_period_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, signature_period) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_signature_period_label) - self$append_to_variables_metadata(col_name, "label", "Signature period") - } - } -} -) - -DataSheet$set("public","generate_signature_period_corrected", function() { - if(!self$is_corruption_type_present(corruption_signature_period_corrected_label)) { - self$generate_signature_period() - if(!self$is_corruption_type_present(corruption_signature_period_label)) message("Cannot auto generate ", corruption_signature_period_corrected_label, " because ", corruption_signature_period_label, " is not defined.") - else { - signature_period_corrected <- self$get_columns_from_data(self$get_corruption_column_name(corruption_signature_period_label)) - signature_period_corrected[signature_period_corrected < 0 | signature_period_corrected > 730] <- NA - - col_name <- next_default_item(corruption_signature_period_corrected_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, signature_period_corrected) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_signature_period_corrected_label) - self$append_to_variables_metadata(col_name, "label", "Signature period - corrected") - } - } -} -) - -DataSheet$set("public","generate_signature_period_5Q", function() { - if(!self$is_corruption_type_present(corruption_signature_period_5Q_label)) { - self$generate_signature_period() - if(!self$is_corruption_type_present(corruption_signature_period_label)) message("Cannot auto generate ", corruption_signature_period_5Q_label, " because ", corruption_signature_period_label, " is not defined.") - else { - signature_period_5Q <- .bincode(self$get_columns_from_data(self$get_corruption_column_name(corruption_signature_period_label)), quantile(self$get_columns_from_data(self$get_corruption_column_name(corruption_signature_period_label)), seq(0, 1, length.out = 5 + 1), type = 2, na.rm = TRUE), include.lowest = TRUE) - - col_name <- next_default_item(corruption_signature_period_5Q_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, signature_period_5Q) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_signature_period_5Q_label) - } - } -} -) - -DataSheet$set("public","generate_signature_period_25Q", function() { - if(!self$is_corruption_type_present(corruption_signature_period_25Q_label)) { - self$generate_signature_period() - if(!self$is_corruption_type_present(corruption_signature_period_label)) message("Cannot auto generate ", corruption_signature_period_25Q_label, " because ", corruption_signature_period_label, " is not defined.") - else { - signature_period_25Q <- .bincode(self$get_columns_from_data(self$get_corruption_column_name(corruption_signature_period_label)), quantile(self$get_columns_from_data(self$get_corruption_column_name(corruption_signature_period_label)), seq(0, 1, length.out = 25 + 1), type = 2, na.rm = TRUE), include.lowest = TRUE) - - col_name <- next_default_item(corruption_signature_period_25Q_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, signature_period_25Q) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_signature_period_25Q_label) - } - } -} -) - -DataSheet$set("public","generate_rolling_contract_no_winners", function() { - if(!self$is_corruption_type_present(corruption_roll_num_winner_label)) { - self$generate_procuring_authority_id() - self$generate_winner_id() - if(!self$is_corruption_type_present(corruption_procuring_authority_id_label) | !self$is_corruption_type_present(corruption_winner_id_label) | !self$is_corruption_type_present(corruption_award_date_label)) { - message("Cannot auto generate ", corruption_roll_num_winner_label, " because ", corruption_procuring_authority_id_label, " or ", corruption_winner_id_label, " or ", corruption_award_date_label, " are not defined.") - } - else { - temp <- self$get_data_frame(use_current_filter = FALSE) - authority_id_label <- self$get_corruption_column_name(corruption_procuring_authority_id_label) - winner_id_label <- self$get_corruption_column_name(corruption_winner_id_label) - award_date_label <- self$get_corruption_column_name(corruption_award_date_label) - col_name <- next_default_item(corruption_roll_num_winner_label, self$get_column_names(), include_index = FALSE) - exp <- lazyeval::interp(~ sum(temp[[authority_id1]] == authority_id2 & temp[[winner_id1]] == winner_id2 & temp[[award_date1]] <= award_date2 & temp[[award_date1]] > award_date2 - 365), authority_id1 = authority_id_label, authority_id2 = as.name(authority_id_label), winner_id1 = winner_id_label, winner_id2 = as.name(winner_id_label), award_date1 = award_date_label, award_date2 = as.name(award_date_label)) - temp <- self$get_data_frame(use_current_filter = FALSE) - # todo - temp <- temp %>% dplyr::rowwise() %>% dplyr::mutate(!!as.name(col_name) := !!rlang::parse_expr(exp)) # or sym(exp)? - #temp <- temp %>% dplyr::rowwise() %>% dplyr::mutate_(.dots = setNames(list(exp), col_name)) - self$add_columns_to_data(col_name, temp[[col_name]]) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_roll_num_winner_label) - self$append_to_variables_metadata(col_name, "label", "12 month rolling contract number of winner for each contract awarded") - } - - } -} -) - -DataSheet$set("public","generate_rolling_contract_no_issuer", function() { - if(!self$is_corruption_type_present(corruption_roll_num_issuer_label)) { - self$generate_procuring_authority_id() - if(!self$is_corruption_type_present(corruption_procuring_authority_id_label) | !self$is_corruption_type_present(corruption_award_date_label)) { - message("Cannot auto generate ", corruption_roll_num_issuer_label, " because ", corruption_procuring_authority_id_label, " or ", corruption_award_date_label, " are not defined.") - } - else { - temp <- self$get_data_frame(use_current_filter = FALSE) - authority_id_label <- self$get_corruption_column_name(corruption_procuring_authority_id_label) - award_date_label <- self$get_corruption_column_name(corruption_award_date_label) - col_name <- next_default_item(corruption_roll_num_issuer_label, self$get_column_names(), include_index = FALSE) - exp <- lazyeval::interp(~ sum(temp[[authority_id1]] == authority_id2 & temp[[award_date1]] <= award_date2 & temp[[award_date1]] > award_date2 - 365), authority_id1 = authority_id_label, authority_id2 = as.name(authority_id_label), award_date1 = award_date_label, award_date2 = as.name(award_date_label)) - temp <- self$get_data_frame(use_current_filter = FALSE) - # todo - temp <- temp %>% dplyr::rowwise() %>% dplyr::mutate(!!as.name(col_name) := !!rlang::parse_expr(exp)) # or sym(exp)? - #temp <- temp %>% dplyr::rowwise() %>% dplyr::mutate_(.dots = setNames(list(exp), col_name)) - self$add_columns_to_data(col_name, temp[[col_name]]) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_roll_num_issuer_label) - self$append_to_variables_metadata(col_name, "label", "12 month rolling contract number of issuer for each contract awarded") - } - - } -} -) - -DataSheet$set("public","generate_rolling_contract_value_sum_issuer", function() { - if(!self$is_corruption_type_present(corruption_roll_sum_issuer_label)) { - self$generate_procuring_authority_id() - # Need better checks than just for original contract value - if(!self$is_corruption_type_present(corruption_procuring_authority_id_label) | !self$is_corruption_type_present(corruption_award_date_label) | !self$is_corruption_type_present(corruption_original_contract_value_label)) { - message("Cannot auto generate ", corruption_roll_num_issuer_label, " because ", corruption_procuring_authority_id_label, " or ", corruption_award_date_label, " are not defined.") - } - else { - temp <- self$get_data_frame(use_current_filter = FALSE) - authority_id_label <- self$get_corruption_column_name(corruption_procuring_authority_id_label) - award_date_label <- self$get_corruption_column_name(corruption_award_date_label) - if(self$is_corruption_type_present(corruption_ppp_adjusted_contract_value_label)) { - contract_value_label <- self$get_corruption_column_name(corruption_ppp_adjusted_contract_value_label) - } - else if(self$is_corruption_type_present(corruption_ppp_conversion_rate_label)) { - self$generate_ppp_adjusted_contract_value() - contract_value_label <- self$get_corruption_column_name(corruption_ppp_adjusted_contract_value_label) - } - else { - contract_value_label <- self$get_corruption_column_name(corruption_original_contract_value_label) - } - col_name <- next_default_item(corruption_roll_sum_issuer_label, self$get_column_names(), include_index = FALSE) - exp <- lazyeval::interp(~ sum(temp[[contract_value]][temp[[authority_id1]] == authority_id2 & temp[[award_date1]] <= award_date2 & temp[[award_date1]] > award_date2 - 365]), authority_id1 = authority_id_label, authority_id2 = as.name(authority_id_label), award_date1 = award_date_label, award_date2 = as.name(award_date_label), contract_value = contract_value_label) - temp <- self$get_data_frame(use_current_filter = FALSE) - temp <- temp %>% dplyr::rowwise() %>% dplyr::mutate(!!as.name(col_name) := !!rlang::parse_expr(exp)) # or sym(exp)? - #temp <- temp %>% dplyr::rowwise() %>% dplyr::mutate_(.dots = setNames(list(exp), col_name)) - self$add_columns_to_data(col_name, temp[[col_name]]) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_roll_sum_issuer_label) - self$append_to_variables_metadata(col_name, "label", "12 month rolling sum of contract value of issuer") - } - } -} -) - -DataSheet$set("public","generate_rolling_contract_value_sum_winner", function() { - if(!self$is_corruption_type_present(corruption_roll_sum_winner_label)) { - self$generate_procuring_authority_id() - self$generate_winner_id() - # Need better checks than just for original contract value - if(!self$is_corruption_type_present(corruption_procuring_authority_id_label) | !self$is_corruption_type_present(corruption_winner_id_label) | !self$is_corruption_type_present(corruption_award_date_label) | !self$is_corruption_type_present(corruption_original_contract_value_label)) { - message("Cannot auto generate ", corruption_roll_num_issuer_label, " because ", corruption_procuring_authority_id_label, " or ", corruption_winner_id_label, " or ", corruption_award_date_label, " are not defined.") - } - else { - temp <- self$get_data_frame(use_current_filter = FALSE) - authority_id_label <- self$get_corruption_column_name(corruption_procuring_authority_id_label) - winner_id_label <- self$get_corruption_column_name(corruption_winner_id_label) - award_date_label <- self$get_corruption_column_name(corruption_award_date_label) - if(self$is_corruption_type_present(corruption_ppp_adjusted_contract_value_label)) { - contract_value_label <- self$get_corruption_column_name(corruption_ppp_adjusted_contract_value_label) - } - else if(self$is_corruption_type_present(corruption_ppp_conversion_rate_label)) { - self$generate_ppp_adjusted_contract_value() - contract_value_label <- self$get_corruption_column_name(corruption_ppp_adjusted_contract_value_label) - } - else { - contract_value_label <- self$get_corruption_column_name(corruption_original_contract_value_label) - } - col_name <- next_default_item(corruption_roll_sum_winner_label, self$get_column_names(), include_index = FALSE) - exp <- lazyeval::interp(~ sum(temp[[contract_value]][temp[[authority_id1]] == authority_id2 & temp[[winner_id1]] == winner_id2 & temp[[award_date1]] <= award_date2 & temp[[award_date1]] > award_date2 - 365]), authority_id1 = authority_id_label, authority_id2 = as.name(authority_id_label), winner_id1 = winner_id_label, winner_id2 = as.name(winner_id_label), award_date1 = award_date_label, award_date2 = as.name(award_date_label), contract_value = contract_value_label) - temp <- self$get_data_frame(use_current_filter = FALSE) - temp <- temp %>% dplyr::rowwise() %>% dplyr::mutate(!!as.name(col_name) := !!rlang::parse_expr(exp)) # or sym(exp)? - #temp <- temp %>% dplyr::rowwise() %>% dplyr::mutate_(.dots = setNames(list(exp), col_name - self$add_columns_to_data(col_name, temp[[col_name]]) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_roll_sum_winner_label) - self$append_to_variables_metadata(col_name, "label", "12 month rolling sum of contract value of winner") - } - } -} -) - -DataSheet$set("public","generate_rolling_contract_value_share_winner", function() { - if(!self$is_corruption_type_present(corruption_roll_share_winner_label)) { - self$generate_rolling_contract_value_sum_issuer() - self$generate_rolling_contract_value_sum_winner() - if(!self$is_corruption_type_present(corruption_roll_sum_winner_label) | !self$is_corruption_type_present(corruption_roll_sum_issuer_label)) { - message("Cannot auto generate ", corruption_roll_share_winner_label, " because ", corruption_roll_sum_winner_label, " or ", corruption_roll_sum_issuer_label, " are not defined.") - } - else { - share <- self$get_columns_from_data(self$get_corruption_column_name(corruption_roll_sum_winner_label)) / self$get_columns_from_data(self$get_corruption_column_name(corruption_roll_sum_issuer_label)) - col_name <- next_default_item(corruption_roll_share_winner_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, share) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_roll_share_winner_label) - self$append_to_variables_metadata(col_name, "label", "12 month rolling contract share of winner for each contract awarded") - } - } -} -) - -DataSheet$set("public","generate_single_bidder", function() { - if(!self$is_corruption_type_present(corruption_single_bidder_label)) { - self$generate_all_bids_trimmed() - if(!self$is_corruption_type_present(corruption_all_bids_trimmed_label)) { - message("Cannot auto generate ", corruption_single_bidder_label, " because ", corruption_all_bids_trimmed_label, " is not defined.") - } - else { - single_bidder <- (self$get_columns_from_data(self$get_corruption_column_name(corruption_all_bids_trimmed_label)) == 1) - col_name <- next_default_item(corruption_single_bidder_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, single_bidder) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_single_bidder_label) - self$append_to_variables_metadata(col_name, "label", "Single bidder dummy") - } - } -} -) - -DataSheet$set("public","generate_contract_value_share_over_threshold", function() { - if(!self$is_corruption_type_present(corruption_contract_value_share_over_threshold_label)) { - self$generate_rolling_contract_value_share_winner() - self$generate_rolling_contract_no_issuer() - if(!self$is_corruption_type_present(corruption_roll_share_winner_label) | !self$is_corruption_type_present(corruption_roll_num_issuer_label)) { - message("Cannot auto generate ", corruption_contract_value_share_over_threshold_label, " because ", corruption_roll_share_winner_label, " or ", corruption_roll_num_issuer_label, " are not defined.") - } - else { - contr_share_over_threshold <- rep(NA, self$get_data_frame_length()) - contr_share_over_threshold[(self$get_columns_from_data(self$get_corruption_column_name(corruption_roll_num_issuer_label)) >= 3) & (self$get_columns_from_data(self$get_corruption_column_name(corruption_roll_share_winner_label)) >= 0.5)] <- TRUE - contr_share_over_threshold[(self$get_columns_from_data(self$get_corruption_column_name(corruption_roll_num_issuer_label)) >= 3) & (self$get_columns_from_data(self$get_corruption_column_name(corruption_roll_share_winner_label)) < 0.5)] <- FALSE - - col_name <- next_default_item(corruption_contract_value_share_over_threshold_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, contr_share_over_threshold) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_contract_value_share_over_threshold_label) - self$append_to_variables_metadata(col_name, "label", "Winner share at least 50% where issuers awarded at least 3 contracts") - } - } -} -) - -DataSheet$set("public","generate_all_bids", function() { - if(!self$is_corruption_type_present(corruption_all_bids_label)) { - if(!self$is_corruption_type_present(corruption_no_bids_considered_label)) { - message("Cannot auto generate ", corruption_all_bids_label, " because ", corruption_no_bids_considered_label, " is not defined.") - } - else { - all_bids <- self$get_columns_from_data(self$get_corruption_column_name(corruption_no_bids_considered_label)) - if(self$is_corruption_type_present(corruption_no_bids_received_label)) { - all_bids[is.na(all_bids)] <- self$get_columns_from_data(self$get_corruption_column_name(corruption_no_bids_received_label))[is.na(all_bids)] - } - - col_name <- next_default_item(corruption_all_bids_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, all_bids) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_all_bids_label) - self$append_to_variables_metadata(col_name, "label", "# Bids (all)") - } - } -} -) - -DataSheet$set("public","generate_all_bids_trimmed", function() { - if(!self$is_corruption_type_present(corruption_all_bids_trimmed_label)) { - self$generate_all_bids() - if(!self$is_corruption_type_present(corruption_all_bids_label)) { - message("Cannot auto generate ", corruption_all_bids_trimmed_label, " because ", corruption_all_bids_label, " is not defined.") - } - else { - all_bids_trimmed <- self$get_columns_from_data(self$get_corruption_column_name(corruption_all_bids_label)) - all_bids_trimmed[all_bids_trimmed > 50] <- 50 - - col_name <- next_default_item(corruption_all_bids_trimmed_label, self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(col_name, all_bids_trimmed) - self$append_to_variables_metadata(col_name, corruption_type_label, corruption_all_bids_trimmed_label) - self$append_to_variables_metadata(col_name, "label", "# Bids (trimmed at 50)") - } - } -} -) - -standardise_country_names <- function(country) { - country_names <- country - country_names[country_names == "Antigua and Bar"] <- "Antigua and Barbuda" - country_names[country_names == "Bosnia and Herz"] <- "Bosnia and Herzegovina" - country_names[country_names == "Cabo Verde"] <- "Cape Verde" - country_names[country_names == "Central African"] <- "Central African Republic" - country_names[country_names == "Cote d'Ivoire"] <- "Cote d'Ivoire" - country_names[country_names == "Congo, Democrat"] <- "Democratic Republic of the Congo" - country_names[country_names == "Dominican Repub"] <- "Dominican Republic" - country_names[country_names == "Egypt, Arab Rep"] <- "Egypt" - country_names[country_names == "Equatorial Guin"] <- "Equatorial Guinea" - country_names[country_names == "Gambia, The"] <- "Gambia" - country_names[country_names == "Iran, Islamic R"] <- "Iran, Islamic Republic of" - country_names[country_names == "Korea, Republic"] <- "Korea, Republic of" - country_names[country_names == "Kyrgyz Republic"] <- "Kyrgyzstan" - country_names[country_names == "Lao People's De"] <- "Lao People's Democratic Republic" - country_names[country_names == "Macedonia, form"] <- "Macedonia, the Former Yugoslav Republic of" - country_names[country_names == "Moldova"] <- "Moldova, Republic of" - country_names[country_names == "Papua New Guine"] <- "Papua New Guinea" - country_names[country_names == "Russian Federat"] <- "Russian Federation" - country_names[country_names == "St. Kitts and N"] <- "Saint Kitts and Nevis" - country_names[country_names == "St. Lucia"] <- "Saint Lucia" - country_names[country_names == "St. Vincent and"] <- "Saint Vincent and the Grenadines" - country_names[country_names == "Sao Tome and Pr"] <- "Sao Tome and Principe" - country_names[country_names == "Slovak Republic"] <- "Slovakia" - country_names[country_names == "Syrian Arab Rep"] <- "Syrian Arab Republic" - country_names[country_names == "Trinidad and To"] <- "Trinidad and Tobago" - country_names[country_names == "Tanzania"] <- "United Republic of Tanzania" - country_names[country_names == "Venezuela, Repu"] <- "Venezuela" - country_names[country_names == "Vietnam"] <- "Viet Nam" - country_names[country_names == "West Bank and G"] <- "West Bank and Gaza" - country_names[country_names == "Yemen, Republic"] <- "Yemen" - return(country_names) -} - -DataBook$set("public","standardise_country_names", function(data_name, country_columns = c()) { - self$get_data_objects(data_name)$standardise_country_names(country_columns) -} -) - -DataSheet$set("public","standardise_country_names", function(country_columns = c()) { - for(col_name in country_columns) { - corrected_col <- standardise_country_names(self$get_columns_from_data(col_name)) - new_col_name <- next_default_item(paste(col_name, "standardised", sep = "_"), self$get_column_names(), include_index = FALSE) - self$add_columns_to_data(new_col_name, corrected_col) - type <- self$get_variables_metadata(column = col_name, property = corruption_type_label) - if(!is.na(type)) { - if(type == corruption_country_label) { - self$append_to_variables_metadata(new_col_name, corruption_type_label, corruption_country_label) - self$append_to_variables_metadata(col_name, corruption_type_label, NA) - self$append_to_variables_metadata(new_col_name, "label", "Country name - standardised") - } - else if(type == corruption_winner_country_label) { - self$append_to_variables_metadata(new_col_name, corruption_type_label, corruption_winner_country_label) - self$append_to_variables_metadata(col_name, corruption_type_label, NA) - self$append_to_variables_metadata(new_col_name, "label", "Winner country name - standardised") - } - } - } -} -) - -DataSheet$set("public", "get_climatic_column_name", function(col_name) { - if(!self$get_metadata(is_climatic_label)) { - warning("Data not defined as climatic.") - return(NULL) - } - if(col_name %in% self$get_variables_metadata()$Climatic_Type){ - new_data = subset(self$get_variables_metadata(), Climatic_Type==col_name, select = Name) - return(as.character(new_data)) - } - else{ - message(paste(col_name, "column not found in the data.")) - return(NULL) - } -} -) - -DataSheet$set("public", "is_climatic_data", function() { - return(self$is_metadata(is_climatic_label) && self$get_metadata(is_climatic_label)) -} -) - -# TODO merge this with append_to_column_metadata -DataSheet$set("public", "append_column_attributes", function(col_name, new_attr) { - tmp_names <- names(new_attr) - for(i in seq_along(new_attr)) { - self$append_to_variables_metadata(property = tmp_names[i], col_names = col_name, new_val = new_attr[[i]]) - } -} -) - -#Creating display daily climatic elements graphs -DataSheet$set("public","display_daily_graph", function(data_name, date_col = NULL, station_col = NULL, year_col = NULL, doy_col = NULL, climatic_element = NULL, rug_colour = "red", bar_colour = "blue", upper_limit = 100) { - if(!self$is_climatic_data()) stop("Data is not defined as climatic.") - if(missing(date_col)) stop("Date columns must be specified.") - if(missing(climatic_element)) stop("Element column(s) must be specified.") - #if(!all(c(date_col, station_col, year_col, doy_col, climatic_element)) %in% self$get_column_names()) { - # stop("Not all specified columns found in the data") - # } - date_data <- self$get_columns_from_data(date_col) - if(!lubridate::is.Date(date_data)) stop(paste(date_col, " must be of type Date.")) - #Extracting year and day of the year - if(is.null(year_col)) { - if(is.null(self$get_climatic_column_name(year_label))) { - self$split_date(col_name = date_col, year = TRUE) - } - year_col <- self$get_climatic_column_name(year_label) - } - if(is.null(doy_col)) { - if(is.null(self$get_climatic_column_name(doy_label))) { - self$split_date(col_name = date_col, day_in_year = TRUE) - } - doy_col <- self$get_climatic_column_name(doy_label) - } - curr_data <- self$get_data_frame() - if(!is.null(station_col)) { - station_data <- self$get_columns_from_data(station_col) - } - else station_data <- 1 - year_data <- self$get_columns_from_data(year_col) - - graph_list <- list() - ngraph <- 0 - for(station_name in unique(station_data)) { - print(station_name) - if(!is.null(station_col)) curr_graph_data <- curr_data[curr_data[[station_col]] == station_name, ] - else curr_graph_data <- curr_data - if(nrow(curr_graph_data) != 0) { - g <- ggplot2::ggplot(data = curr_graph_data, mapping = ggplot2::aes_(x = as.name(doy_col), y = as.name(climatic_element))) + ggplot2::geom_bar(stat = "identity", fill = bar_colour) + ggplot2::geom_rug(data = curr_graph_data[is.na(curr_graph_data[[climatic_element]]), ], mapping = ggplot2::aes_(x = as.name(doy_col)), sides = "b", color = rug_colour) + ggplot2::theme_minimal() + ggplot2::coord_cartesian(ylim = c(0, upper_limit)) + ggplot2::scale_x_continuous(breaks = c(1, 32, 61, 92, 122, 153, 183, 214, 245, 275, 306, 336, 367), labels = c(month.abb, ""), limits = c(0, 367)) + facet_wrap(facets = as.formula(paste("~", year_col))) + ggplot2::ggtitle(paste(ifelse(station_name == 1, "", station_name), "Daily", climatic_element)) + ggplot2::theme(panel.grid.minor = element_blank(), plot.title = element_text(hjust = 0.5, size = 20), axis.title = element_text(size = 16)) + xlab("Date") + ylab(climatic_element) + ggplot2::theme(axis.text.x=ggplot2::element_text(angle=90)) - if(any(curr_graph_data[[climatic_element]] > upper_limit, na.rm = TRUE)) { - g <- g + ggplot2::geom_text(data = curr_graph_data[curr_graph_data[[climatic_element]] > upper_limit, ], mapping = ggplot2::aes_(y = upper_limit, label = as.name(climatic_element)), size = 3) - } - ngraph <- ngraph + 1 - graph_list[[length(graph_list) + 1]] <- g - } - } - if(ngraph > 1) return(gridExtra::grid.arrange(grobs = graph_list)) - else return(g) -} -) - -DataSheet$set("public", "get_variables_metadata_names", function(columns) { - if(missing(columns)) columns <- self$get_column_names() - cols <- self$get_columns_from_data(columns, force_as_data_frame = TRUE) - return(unique(as.character(unlist(sapply(cols, function(x) names(attributes(x))))))) -} -) - -DataSheet$set("public", "create_variable_set", function(set_name, columns) { - adjusted_set_name <- paste0(set_prefix, set_name) - if(adjusted_set_name %in% self$get_variables_metadata_names()) warning("A set named ", set_name, " already exists and will be replaced.") - self$append_to_variables_metadata(col_names = setdiff(self$get_column_names(), columns), property = adjusted_set_name, new_val = FALSE) - self$append_to_variables_metadata(col_names = columns, property = adjusted_set_name, new_val = TRUE) -} -) - -DataSheet$set("public", "update_variable_set", function(set_name, columns, new_set_name) { - if(!missing(new_set_name) && new_set_name != set_name) { - self$delete_variable_sets(set_names = set_name) - } - suppressWarnings(self$create_variable_set(set_name = new_set_name, columns = columns)) -} -) - -DataSheet$set("public", "delete_variable_sets", function(set_names) { - adjusted_set_names <- paste0(set_prefix, set_names) - if(!all(adjusted_set_names %in% self$get_variables_metadata_names())) { - warning("Some of the variable set names were not found. Sets will not be deleted.") - } - else { - sapply(adjusted_set_names, function(x) self$append_to_variables_metadata(col_names = self$get_column_names(), property = x, new_val = NULL)) - } -} -) - -DataSheet$set("public", "get_variable_sets_names", function(include_overall = TRUE, include, exclude, include_empty = FALSE, as_list = FALSE, excluded_items = c()) { - metadata_names <- self$get_variables_metadata_names() - set_names <- stringr::str_sub(metadata_names[startsWith(metadata_names, set_prefix)], start = nchar(set_prefix) + 1) - if(as_list) { - out <- list() - out[[self$get_metadata(data_name_label)]] <- set_names - } - else out <- set_names - return(out) -} -) - -DataSheet$set("public", "get_variable_sets", function(set_names, force_as_list) { - curr_set_names <- self$get_variable_sets_names() - if(!missing(set_names) && !all(set_names %in% curr_set_names)) stop("Not all of: ", paste(set_name, collapse = ", "), "exist as variable sets.") - include_lists <- rep(list(TRUE), length(set_names)) - names(include_lists) <- paste0(set_prefix, set_names) - out <- lapply(seq_along(include_lists), function(i) self$get_column_names(include = include_lists[i])) - if(length(set_names) == 1 && !force_as_list) { - out <- as.character(unlist(out)) - } - return(out) -} -) - -DataSheet$set("public", "patch_climate_element", function(date_col_name = "", var = "", vars = c(), max_mean_bias = NA, max_stdev_bias = NA, column_name, station_col_name, time_interval = "month") { - if (missing(date_col_name)) stop("date is missing with no default") - if (missing(var)) stop("var is missing with no default") - if (missing(vars)) stop("vars is missing with no default") - date_col <- self$get_columns_from_data(date_col_name, use_current_filter = FALSE) - min_date <- min(date_col) - max_date <- max(date_col) - full_date_range <- seq(from = min_date, to = max_date, by = "day") - if (!lubridate::is.Date(date_col)) stop("This column must be a date or time!") - curr_data <- self$get_data_frame(use_current_filter = FALSE) - if (!missing(station_col_name)) { - station_col <- self$get_columns_from_data(station_col_name, use_current_filter = FALSE) - station_names <- unique(station_col) - list_out <- list() - date_lengths <- NULL - for (i in seq_along(station_names)) { - temp_data <- curr_data[station_col == station_names[i], ] - min_date <- min(temp_data[, date_col_name]) - max_date <- max(temp_data[, date_col_name]) - full_date_range <- seq(from = min_date, to = max_date, by = "day") - date_lengths[i] <- length(full_date_range) - var_col <- temp_data[, var] - date_col <- temp_data[, date_col_name] - Year <- lubridate::year(date_col) - Month <- lubridate::month(date_col) - Day <- lubridate::day(date_col) - weather <- data.frame(Year, Month, Day, var_col) - colnames(weather)[4] <- var - patch_weather <- list() - for (j in seq_along(vars)) { - col <- temp_data[, vars[j]] - patch_weather[[j]] <- data.frame(Year, Month, Day, col) - colnames(patch_weather[[j]])[4] <- var - } - out <- chillR::patch_daily_temps(weather = weather, patch_weather = patch_weather, vars = var, max_mean_bias = max_mean_bias, max_stdev_bias = max_stdev_bias, time_interval = time_interval) - list_out[[i]] <- out[[1]][, var] - } - gaps <- sum(date_lengths) - dim(curr_data)[[1]] - } else { - gaps <- length(full_date_range) - length(date_col) - var_col <- self$get_columns_from_data(var, use_current_filter = FALSE) - Year <- lubridate::year(date_col) - Month <- lubridate::month(date_col) - Day <- lubridate::day(date_col) - weather <- data.frame(Year, Month, Day, var_col) - colnames(weather)[4] <- var - patch_weather <- list() - for (i in seq_along(vars)) { - col <- self$get_columns_from_data(vars[i], use_current_filter = FALSE) - patch_weather[[i]] <- data.frame(Year, Month, Day, col) - colnames(patch_weather[[i]])[4] <- var - } - } - if (!missing(station_col_name)) { - col <- unlist(list_out) - } - else { - out <- chillR::patch_daily_temps(weather = weather, patch_weather = patch_weather, vars = var, max_mean_bias = max_mean_bias, max_stdev_bias = max_stdev_bias, time_interval = time_interval) - col <- out[[1]][, var] - } - if (length(col) == dim(curr_data)[[1]]) { - self$add_columns_to_data(col_name = column_name, col_data = col) - gaps_remaining <- summary_count_miss(col) - gaps_filled <- (summary_count_miss(curr_data[, var]) - gaps_remaining) - cat(gaps_filled, " gaps filled", gaps_remaining, " remaining.", "\n") - } else if (gaps != 0) { - cat(gaps, " rows for date gaps are missing, fill date gaps before proceeding.", "\n") - } -}) - -DataSheet$set("public", "visualize_element_na", function(element_col_name, element_col_name_imputed, station_col_name, x_axis_labels_col_name, ncol = 2, type = "distribution", xlab = NULL, ylab = NULL, legend = TRUE, orientation = "horizontal", interval_size = 1461, x_with_truth = NULL, measure = "percent") { - curr_data <- self$get_data_frame() - if (!missing(station_col_name)) { - station_col <- self$get_columns_from_data(station_col_name) - station_names <- unique(station_col) - } - if (!missing(element_col_name)) { - element_col <- self$get_columns_from_data(element_col_name) - } - if (!missing(element_col_name_imputed)) { - element_imputed_col <- self$get_columns_from_data(element_col_name_imputed) - } - if (!(type %in% c("distribution", "gapsize", "interval", "imputation"))) stop(type, " must be either distribution, gapsize or imputation") - plt_list <- list() - if (type == "distribution") { - if (!missing(station_col_name) && dplyr::n_distinct(station_names) > 1) { - for (i in seq_along(station_names)) { - temp_data <- curr_data[station_col == station_names[i], ] - plt_list[[i]] <- imputeTS::ggplot_na_distribution(x = temp_data[, element_col_name], x_axis_labels = temp_data[, x_axis_labels_col_name], title = station_names[i], xlab = xlab, ylab = ylab) - } - } else { - plt <- imputeTS::ggplot_na_distribution(x = element_col, x_axis_labels = curr_data[, x_axis_labels_col_name], xlab = xlab, ylab = ylab) - } - } else if (type == "gapsize") { - if (!missing(station_col_name) && dplyr::n_distinct(station_names) > 1) { - for (i in seq_along(station_names)) { - temp_data <- curr_data[station_col == station_names[i], ] - plt_list[[i]] <- imputeTS::ggplot_na_gapsize(x = temp_data[, element_col_name], include_total = TRUE, title = paste0(station_names[i], ":Occurrence of gap sizes"), xlab = xlab, ylab = ylab, legend = legend, orientation = orientation) - } - } else { - plt <- imputeTS::ggplot_na_gapsize(x = element_col, include_total = TRUE, xlab = xlab, ylab = ylab, legend = legend, orientation = orientation) - } - } else if (type == "interval") { - if (!missing(station_col_name) && dplyr::n_distinct(station_names) > 1) { - for (i in seq_along(station_names)) { - temp_data <- curr_data[station_col == station_names[i], ] - plt_list[[i]] <- imputeTS::ggplot_na_intervals(x = temp_data[, element_col_name], title = paste0(station_names[i], ":Missing Values per Interval"), ylab = ylab, interval_size = interval_size, measure = measure) - } - } else { - plt <- imputeTS::ggplot_na_intervals(x = element_col, ylab = ylab, interval_size = interval_size, measure = measure) - } - } else if (type == "imputation") { - if (!missing(station_col_name) && dplyr::n_distinct(station_names) > 1) { - for (i in seq_along(station_names)) { - temp_data <- curr_data[station_col == station_names[i], ] - plt_list[[i]] <- imputeTS::ggplot_na_imputations(x_with_na = temp_data[, element_col_name], x_with_imputations = temp_data[, element_col_name_imputed], x_axis_labels = temp_data[, x_axis_labels_col_name], title = station_names[i], xlab = xlab, ylab = ylab, legend = legend, x_with_truth = x_with_truth) - } - } else { - plt <- imputeTS::ggplot_na_imputations(x_with_na = element_col, x_with_imputations = element_imputed_col, x_axis_labels = curr_data[, x_axis_labels_col_name], xlab = xlab, ylab = ylab, legend = legend, x_with_truth = x_with_truth) - } - } - if (!missing(station_col_name) && dplyr::n_distinct(station_names) > 1) { - return(patchwork::wrap_plots(plt_list, ncol = ncol)) - } - else { - return(plt) - } -}) - -DataSheet$set("public", "get_data_entry_data", function(station, date, elements, view_variables, station_name, type, start_date, end_date) { - cols <- c(date, elements) - if (!missing(view_variables)) cols <- c(cols, view_variables) - if (!missing(station)) cols <- c(station, cols) - curr_data <- self$get_columns_from_data(cols) - col_names <- c(date, elements) - if (!missing(station)) col_names <- c(station, col_names) - if (!missing(view_variables)) col_names <- c(col_names, paste(view_variables, "(view)")) - names(curr_data) <- col_names - - if (!missing(station)) curr_data <- curr_data[curr_data[[station]] == station_name, ] - if (type == "day") { - curr_data <- curr_data[curr_data[[date]] == start_date, ] - } else if (type == "month") { - if (lubridate::day(start_date) != 1) warning("type = 'month' but start_date is not 1st of the month.") - curr_data <- curr_data[curr_data[[date]] >= start_date & curr_data[[date]] <= (start_date + months(1) - 1), ] - } else if (type == "range") { - curr_data <- curr_data[curr_data[[date]] >= start_date & curr_data[[date]] <= end_date, ] - } - if (nrow(curr_data) == 0) stop("No data in range.") - # Convert to character to they display correctly in VB grid. - curr_data[[date]] <- as.character(curr_data[[date]]) - if (!missing(view_variables) && date %in% view_variables) curr_data[[paste(date, "(view)")]] <- as.character(curr_data[[paste(date, "(view)")]]) - if (!missing(station)) curr_data[[station]] <- as.character(curr_data[[station]]) - curr_data -}) - -DataSheet$set("public", "save_data_entry_data", function(new_data, rows_changed, add_flags = FALSE, ...) { - if (ncol(new_data) > 1) { - if (nrow(new_data) != length(rows_changed)) stop("new_data must have the same number of rows as length of rows_changed.") - curr_data <- self$get_data_frame(use_current_filter = FALSE) - changed_data <- curr_data - for (i in seq_along(rows_changed)) { - for (k in seq_along(names(new_data))) { - changed_data[rows_changed[i], names(new_data)[k]] <- new_data[i, names(new_data)[k]] - } - } - if (add_flags) { - for (i in names(new_data)[-c(1:2)]) { - col1 <- curr_data[, i] - col2 <- changed_data[, i] - if (paste0(i, "_fl") %in% colnames(changed_data)) { - flag_col1 <- changed_data[, paste0(i, "_fl")] - flag_col2 <- factor(x = ifelse(is.na(col1) & !is.na(col2), "add", ifelse(!is.na(col1) & is.na(col2), "edit", ifelse(col1 == col2, "data", "edit"))), levels = c("data", "add", "edit")) - changed_data[, paste0(i, "_fl")] <- factor(ifelse(flag_col1 %in% c("edit", "add"), as.character(flag_col1), as.character(flag_col2)), levels = c("data", "add", "edit")) - } else { - changed_data[, paste0(i, "_fl")] <- factor(x = ifelse(is.na(col1) & !is.na(col2), "add", ifelse(!is.na(col1) & is.na(col2), "edit", ifelse(col1 == col2, "data", "edit"))), levels = c("data", "add", "edit")) - } - } - } - if(length(nrow(new_data)) > 0) cat("Row(s) updated: ", nrow(new_data), "\n") - self$set_data(changed_data) - # Added this line to fix the bug of having the variable names in the metadata changing to NA - # This affects factor columns only - we need to find out why and how to solve it best - self$add_defaults_variables_metadata(self$get_column_names()) - self$data_changed <- TRUE - } -}) - -DataSheet$set("public", "get_column_climatic_type", function(col_name, attr_name) { - if (!is.null(private$data[[col_name]]) && !is.null(attr(private$data[[col_name]], attr_name))) { - return(attr(private$data[[col_name]], attr_name)) - } -}) - -DataSheet$set("public", "add_flag_fields", function(col_names) { - curr_data <- self$get_columns_from_data(col_names, force_as_data_frame = TRUE) - for (i in colnames(curr_data)) { - col_data <- factor(ifelse(is.na(curr_data[, i]), NA_real_, "data"), levels = c("data", "edit", "add")) - self$add_columns_to_data(col_data = col_data, col_name = paste0(i, "_fl")) - } -}) - -DataSheet$set("public", "remove_empty", function(which = c("rows", "cols")) { - curr_data <- self$get_data_frame() - old_metadata <- attributes(curr_data) - new_df <- curr_data |> - janitor::remove_empty(which = which) - row_message <- paste(nrow(curr_data) - nrow(new_df), "empty rows deleted") - cols_message <- paste(ncol(curr_data) - ncol(new_df), "empty variables deleted") - if (all(which %in% "rows")) cat(row_message, "\n") - if (all(which %in% "cols")) cat(cols_message) - if (all(c("rows", "cols") %in% which)) { - cat(row_message, "\n") - cat(cols_message) - } - - - if(self$column_selection_applied()){ - df_without_Selection <- self$get_data_frame(use_column_selection = FALSE) - df_with_Selection <- self$get_data_frame() - # Check for missing columns in new_df and remove them from df_with_Selection - missing_columns <- setdiff(names(df_with_Selection), names(new_df)) - self$remove_current_column_selection() - if (length(missing_columns) > 0 && ncol(df_with_Selection) != ncol(new_df)) { - new_df <- df_without_Selection[, !names(df_without_Selection) %in% missing_columns] - }else{ new_df <- df_without_Selection } - - } - - for (name in names(old_metadata)) { - if (!(name %in% c("names", "class", "row.names"))) { - attr(new_df, name) <- old_metadata[[name]] - } - } - for (col_name in names(new_df)) { - for (attr_name in names(attributes(private$data[[col_name]]))) { - if (!attr_name %in% c("class", "levels")) { - attr(new_df[[col_name]], attr_name) <- attr(private$data[[col_name]], attr_name) - } - } - } - - self$set_data(new_df) - self$data_changed <- TRUE - private$.variables_metadata_changed <- TRUE -}) - -DataSheet$set("public", "replace_values_with_NA", function(row_index, column_index) { - curr_data <- self$get_data_frame(use_current_filter = FALSE) - self$save_state_to_undo_history() - - if(!all(row_index %in% seq_len(nrow(curr_data)))) stop("All row indexes must be within the dataframe") - if(!all(column_index %in% seq_len(ncol(curr_data)))) stop("All column indexes must be within the dataframe") - curr_data[row_index, column_index] <- NA - self$set_data(curr_data) -} -) - -DataSheet$set("public", "has_labels", function(col_names) { - if(missing(col_names)) stop("Column name must be specified.") - return(!is.null(attr(col_names, "labels"))) -} -) - -DataSheet$set("public", "anova_tables2", function(x_col_names, y_col_name, total = FALSE, signif.stars = FALSE, sign_level = FALSE, means = FALSE, interaction = FALSE) { - if (missing(x_col_names) || missing(y_col_name)) stop("Both x_col_names and y_col_name are required") - if (sign_level || signif.stars) message("This is no longer descriptive") - - end_col <- if (sign_level) 5 else 4 - - # Construct the formula - if (length(x_col_names) == 1) { - formula_str <- paste0(as.name(y_col_name), " ~ ", as.name(x_col_names)) - } else if (interaction && length(x_col_names) > 1) { - formula_str <- paste0(as.name(y_col_name), " ~ ", as.name(paste(x_col_names, collapse = " * "))) - } else { - formula_str <- paste0(as.name(y_col_name), " ~ ", as.name(paste(x_col_names, collapse = " + "))) - } - - mod <- lm(formula = as.formula(formula_str), data = self$get_data_frame()) - anova_mod <- anova(mod)[1:end_col] - - # Process ANOVA table - anova_mod <- anova_mod %>% - dplyr::mutate( - `Sum Sq` = signif(`Sum Sq`, 3), - `Mean Sq` = signif(`Mean Sq`, 3), - `F value` = ifelse(`F value` < 100, round(`F value`, 1), round(`F value`)) - ) %>% - dplyr::mutate(`F value` = as.character(`F value`)) %>% - dplyr::mutate(across(`F value`, ~ tidyr::replace_na(., "--"))) %>% - tibble::as_tibble(rownames = " ") - - # Add the total row if requested - if (total) { - anova_mod <- anova_mod %>% - tibble::add_row(` ` = "Total", dplyr::summarise(., across(where(is.numeric), sum))) %>% - dplyr::mutate(`F value` = ifelse(` ` == "Total", "--", `F value`)) # Replace NA with "--" for Total row - } - - # Handle significance levels - if (sign_level) { - anova_mod <- anova_mod %>% - dplyr::mutate( - `Pr(>F)` = ifelse( - is.na(`Pr(>F)`) | !is.numeric(`Pr(>F)`), "--", - ifelse(`Pr(>F)` < 0.001, "<0.001", formatC(`Pr(>F)`, format = "f", digits = 3)) - ) - ) - } - - # Generate the table with a title - title <- paste0("ANOVA of ", formula_str) - formatted_table <- anova_mod %>% - knitr::kable(format = "simple", caption = title) - - print(formatted_table) - - # Add line break before means section - cat("\n") - - # Optionally print means or model coefficients - if (means) { - has_numeric <- any(sapply(x_col_names, function(x) class(mod$model[[x]]) %in% c("numeric", "integer"))) - has_factor <- any(sapply(x_col_names, function(x) class(mod$model[[x]]) == "factor")) - - if (has_numeric && has_factor) { - cat("Model coefficients:\n") - print(mod$coefficients) - } else if (class(mod$model[[x_col_names[[1]]]]) %in% c("numeric", "integer")) { - cat("Model coefficients:\n") - print(mod$coefficients) - } else { - cat(paste0("Means tables of ", y_col_name, ":\n")) - means_table <- capture.output(model.tables(aov(mod), type = "means")) - means_table <- means_table[-1] - cat(paste(means_table, collapse = "\n")) - } - } -} -) +# removed \ No newline at end of file diff --git a/instat/static/InstatObject/R/instat_object_R6.R b/instat/static/InstatObject/R/instat_object_R6.R index 98e0063279f..50aa506deb1 100644 --- a/instat/static/InstatObject/R/instat_object_R6.R +++ b/instat/static/InstatObject/R/instat_object_R6.R @@ -1,3074 +1 @@ -DataBook <- R6::R6Class("DataBook", - public = list( - initialize = function(data_tables = list(), instat_obj_metadata = list(), - data_tables_variables_metadata = rep(list(data.frame()),length(data_tables)), - data_tables_metadata = rep(list(list()),length(data_tables)), - data_tables_filters = rep(list(list()),length(data_tables)), - data_tables_column_selections = rep(list(list()),length(data_tables)), - imported_from = as.list(rep("",length(data_tables))), - messages=TRUE, convert=TRUE, create=TRUE) - { - self$set_meta(instat_obj_metadata) - self$set_objects(list()) - self$set_scalars(list()) - self$set_undo_history(list()) - - if (missing(data_tables) || length(data_tables) == 0) { - self$set_data_objects(list()) - } - - else { - self$import_data(data_tables=data_tables, data_tables_variables_metadata=data_tables_variables_metadata, - data_tables_metadata=data_tables_metadata, - imported_from=imported_from, messages=messages, convert=convert, create=create, data_tables_filters = data_tables_filters, - data_tables_column_selections = data_tables_column_selections) - } - - private$.data_sheets_changed <- FALSE - } - ), - private = list( - .data_sheets = list(), - .metadata = list(), - .objects = list(), - .scalars = list(), - .undo_history = list(), - .links = list(), - .data_sheets_changed = FALSE, - .database_connection = NULL, - .last_graph = NULL - ), - active = list( - data_objects_changed = function(new_value) { - if(missing(new_value)) return(private$.data_sheets_changed) - else { - if(new_value != TRUE && new_value != FALSE) stop("new_value must be TRUE or FALSE") - private$.data_sheets_changed <- new_value - #TODO is this behaviour we want? - invisible(sapply(self$get_data_objects(), function(x) x$data_changed <- new_value)) - } - } - ) -) - -DataBook$set("public", "import_data", function(data_tables = list(), data_tables_variables_metadata = rep(list(data.frame()),length(data_tables)), - data_tables_metadata = rep(list(list()),length(data_tables)), - data_tables_filters = rep(list(list()),length(data_tables)), - data_tables_column_selections = rep(list(list()),length(data_tables)), - imported_from = as.list(rep("",length(data_tables))), - data_names = NULL, - messages=TRUE, convert=TRUE, create=TRUE, prefix=TRUE, - add_to_graph_book = TRUE) -{ - if (missing(data_tables) || length(data_tables) == 0) { - stop("No data found. No data objects can be created.") - } - - else { - - if(!(is.list(data_tables))) { - stop("data_tables must be a list of data frames") - } - - if(anyDuplicated(names(data_tables))) { - stop("There are duplicate names in the data tables list.") - } - - if(length(data_tables_variables_metadata) != length(data_tables)) { - stop("If data_tables_variables_metadata is specified, it must be a list of metadata lists with the same length as data_tables.") - } - - if(length(data_tables_metadata) != length(data_tables)) { - stop("If data_tables_metadata is specified, it must be a list of metadata lists with the same length as data_tables.") - } - - if (length(imported_from) != length(data_tables)) { - stop("imported_from must be a list of the same length as data_tables") - } - - if(!is.null(data_names) && length(data_names) != length(data_names)) { - stop("If data_names is specified it must be a list of the same length as data_tables") - } - - # loop through the data_tables list and create a data object for each - # data.frame given - new_data_objects = list() - for ( i in (1:length(data_tables)) ) { - curr_name <- names(data_tables)[[i]] - if(is.null(curr_name) && !is.null(data_names)) curr_name <- data_names[i] - if (prefix){ - if(tolower(curr_name) %in% tolower(names(private$.data_sheets))) { - warning("Cannot have data frames with the same name only differing by case. Data frame will be renamed.") - curr_name <- next_default_item(tolower(curr_name), tolower(names(private$.data_sheets))) - } - } - - new_data = DataSheet$new(data=data_tables[[i]], data_name = curr_name, - variables_metadata = data_tables_variables_metadata[[i]], - metadata = data_tables_metadata[[i]], - imported_from = imported_from[[i]], - start_point = i, - messages = messages, convert = convert, create = create, - filters = data_tables_filters[[i]], - column_selections = data_tables_column_selections[[i]]) - # Add this new data object to our list of data objects - self$append_data_object(new_data$get_metadata(data_name_label), new_data, add_to_graph_book = add_to_graph_book) - } - } -} -) - -# TODO update parameter name to new_data_book -DataBook$set("public", "replace_instat_object", function(new_instat_object) { - self$set_data_objects(list()) - for(curr_obj in new_instat_object$get_data_objects()) { - self$append_data_object(curr_obj$get_metadata(data_name_label), curr_obj$data_clone()) - } - self$set_meta(new_instat_object$get_metadata()) - self$set_objects(new_instat_object$get_objects(data_name = overall_label)) - self$data_objects_changed <- TRUE -} -) - -DataBook$set("public", "set_data_objects", function(new_data_objects) { - # new_data_objects could be of old class type 'data_object' - if(!is.list(new_data_objects) || (length(new_data_objects) > 0 && !any(c("DataSheet", "data_object") %in% sapply(new_data_objects, class)))) { - stop("new_data_objects must be a list of data_objects") - } - else private$.data_sheets <- new_data_objects -} -) - -DataBook$set("public", "copy_data_object", function(data_name, new_name, filter_name = "", column_selection_name = "", reset_row_names = TRUE) { - new_obj <- self$get_data_objects(data_name)$data_clone() - if(filter_name != "") { - subset_data <- self$get_data_objects(data_name)$get_data_frame(use_current_filter = FALSE, filter_name = filter_name, retain_attr = TRUE) - if(reset_row_names) rownames(subset_data) <- 1:nrow(subset_data) - new_obj$remove_current_filter() - new_obj$set_data(subset_data) - } - if(column_selection_name != "") { - subset_data <- self$get_data_objects(data_name)$get_data_frame(use_current_filter = FALSE, filter_name = filter_name, column_selection_name = column_selection_name, use_column_selection = FALSE, retain_attr = TRUE) - new_obj$remove_current_column_selection() - new_obj$set_data(subset_data) - } - self$append_data_object(new_name, new_obj) -} -) - - -DataBook$set("public", "import_RDS", function(data_RDS, - keep_existing = TRUE, - overwrite_existing = FALSE, - include_objects = TRUE, - include_metadata = TRUE, - include_logs = TRUE, - include_filters = TRUE, - include_column_selections = TRUE, - include_calculations = TRUE, - include_comments = TRUE){ - # TODO add include_calculations options - - # 'instat_object' is previously used class name, some files may have this name. - if(any(c("instat_object", "DataBook") %in% class(data_RDS))) { - if(!keep_existing && include_objects && include_metadata && include_logs && include_filters && include_column_selections && include_calculations && include_comments) { - self$replace_instat_object(new_instat_object = data_RDS) - }else { - if(!keep_existing) { - self$set_data_objects(list()) - self$set_meta(list()) - self$set_objects(list()) - self$set_links(list()) - self$set_database_connection(NULL) - } - new_links_list <- data_RDS$get_links() - for(data_obj_name in data_RDS$get_data_names()) { - data_obj_clone <- self$clone_data_object(data_RDS$get_data_objects(data_obj_name), include_objects = include_objects, include_metadata = include_metadata, include_logs = include_logs, include_filters = include_filters, include_column_selections = include_column_selections, include_calculations = include_calculations, include_comments = include_comments) - if(tolower(data_obj_name) %in% tolower(self$get_data_names()) && !overwrite_existing) { - warning("Cannot have data frames with the same name only differing by case. Data frame will be renamed.") - new_name <- next_default_item(tolower(data_obj_name), tolower(self$get_data_names())) - data_obj_clone$append_to_metadata(data_name_label, new_name) - if(new_name != data_obj_name) { - for(i in seq_along(new_links_list)) { - new_links_list[[i]]$rename_data_frame_in_link(data_obj_name, new_name) - } - } - } - #if(!data_obj_clone$is_metadata(data_name_label)) data_obj_clone$append_to_metadata(data_name_label, new_name) - curr_data_name = data_obj_clone$get_metadata(data_name_label) - self$append_data_object(curr_data_name, data_obj_clone) - } - for(i in seq_along(new_links_list)) { - curr_link <- new_links_list[[i]] - for(j in seq_along(curr_link$link_columns)) { - self$add_link(from_data_frame = curr_link$from_data_frame, to_data_frame = curr_link$to_data_frame, link_pairs = curr_link$link_columns[[j]], type = curr_link$type, link_name = names(new_links_list)[i]) - } - } - new_objects_list <- data_RDS$get_objects(data_name = overall_label) - new_objects_count <- length(new_objects_list) - if(include_objects && new_objects_count > 0) { - for(i in (1:new_objects_count)) { - if(!(names(new_objects_list)[i] %in% names(private$.objects)) || overwrite_existing) { - self$add_object(object_name = names(new_objects_list)[i], - object_type_label = new_objects_list[[i]]$object_type_label, - object_format = new_objects_list[[i]]$object_format, - object = new_objects_list[[i]]$object) - } - } - } - new_metadata <- data_RDS$get_metadata() - new_metadata_count <- length(new_metadata) - if(include_metadata && new_metadata_count > 0) { - for(i in (1:new_metadata_count)) { - if(!(names(new_metadata)[i] %in% names(private$metadata)) || overwrite_existing) { - self$append_to_metadata(names(new_metadata)[i], new_metadata[[i]]) - } - } - } - } - self$data_objects_changed <- TRUE - }else if(is.data.frame(data_RDS) || is.matrix(data_RDS)) { - self$import_data(data_tables = list(data_RDS = data_RDS)) - }else stop("Cannot import an objects of class", paste(class(data_RDS), collapse = ",")) -} -) - -DataBook$set("public", "clone_data_object", function(curr_data_object, include_objects = TRUE, include_metadata = TRUE, include_logs = TRUE, include_filters = TRUE, include_column_selections = TRUE, include_calculations = TRUE, include_comments = TRUE, include_scalars = TRUE, ...) { - curr_names <- names(curr_data_object) - if("get_data_frame" %in% curr_names) new_data <- curr_data_object$get_data_frame(use_current_filter = FALSE) - else stop("Cannot import data. No 'get_data_frame' method.") - if("get_metadata" %in% curr_names) new_data_name <- curr_data_object$get_metadata(data_name_label) - if(include_objects && "get_objects" %in% curr_names) new_objects <- curr_data_object$get_objects() - else new_objects <- list() - if(include_scalars && "get_scalars" %in% curr_names) new_scalars <- curr_data_object$get_scalars() - else new_scalars <- list() - if(include_filters && "get_filter" %in% curr_names) { - new_filters <- lapply(curr_data_object$get_filter(), function(x) x$data_clone()) - new_filters <- lapply(new_filters, function(x) check_filter(x)) - } else new_filters <- list() - if(include_column_selections && "get_column_selection" %in% curr_names) new_column_selections <- curr_data_object$get_column_selection() - else new_column_selections <- list() - if(include_calculations && "get_calculations" %in% curr_names) new_calculations <- lapply(curr_data_object$get_calculations(), function(x) self$clone_instat_calculation(x)) - else new_calculations <- list() - if(include_comments && "get_comments" %in% curr_names) new_comments <- lapply(curr_data_object$get_comments(), function(x) x$data_clone()) - else new_comments <- list() - if("get_keys" %in% curr_names) new_keys <- curr_data_object$get_keys() - else new_keys <- list() - - new_data_object <- DataSheet$new(data = new_data, data_name = new_data_name, filters = new_filters, column_selections = new_column_selections, objects = new_objects, calculations = new_calculations, scalars = new_scalars, keys = new_keys, comments = new_comments, keep_attributes = include_metadata) - if(include_logs && "get_changes" %in% curr_names) { - new_changes <- curr_data_object$get_changes() - } - else new_changes <- list() - new_data_object$set_changes(new_changes) - if(include_filters && "current_filter" %in% curr_names) new_data_object$current_filter <- curr_data_object$get_current_filter() - else new_data_object$remove_current_filter() - if(include_column_selections && "current_column_selection" %in% curr_names) new_data_object$current_column_selection <- curr_data_object$get_current_column_selection() - else new_data_object$remove_current_column_selection() - if(!include_metadata) { - new_data_object$clear_metadata() - new_data_object$clear_variables_metadata() - } - new_data_object$data_changed <- TRUE - new_data_object$metadata_changed <- TRUE - new_data_object$variables_metadata_changed <- TRUE - return(new_data_object) -} -) - -DataBook$set("public", "clone_instat_calculation", function(curr_instat_calculation, ...) { - - new_manips <- lapply(curr_instat_calculation$manipulations, function(x) self$clone_instat_calculation(x)) - new_subs <- lapply(curr_instat_calculation$sub_calculations, function(x) self$clone_instat_calculation(x)) - new_instat_calculation <- instat_calculation$new(function_exp = curr_instat_calculation$function_exp, - type = curr_instat_calculation$type, - name = curr_instat_calculation$name, - result_name = curr_instat_calculation$result_name, - manipulations = new_manips, - sub_calculations = new_subs, - calculated_from = curr_instat_calculation$calculated_from, - save = curr_instat_calculation$save) - return(new_instat_calculation) -} -) - -DataBook$set("public", "import_from_ODK", function(username, form_name, platform) { - out <- import_from_ODK(username, form_name, platform) - data_list <- list(out) - names(data_list) <- form_name - self$import_data(data_tables = data_list) -} -) - -# Now appending/merging not setting so maybe should be renamed -DataBook$set("public", "set_meta", function(new_meta) { - if(!is.list(new_meta)) stop("new_meta must be of type: list") - for(name in names(attributes(self))) { - if(!name %in% c("class")) attr(self, name) <- NULL - } - for(name in names(new_meta)) { - self$append_to_metadata(name, new_meta[[name]]) - } -} -) - -DataBook$set("public", "set_objects", function(new_objects) { - if(!is.list(new_objects)) stop("new_objects must be of type: list") - private$.objects <- new_objects -} -) - -DataBook$set("public", "set_undo_history", function(new_undo_history) { - if (!is.list(new_undo_history)) stop("undo_history must be of type: list") - - private$.undo_history <- new_undo_history -} -) - -DataBook$set("public", "set_scalars", function(new_scalars) { - if(!is.list(new_scalars)) stop("new_scalars must be of type: list") - private$.scalars <- new_scalars -} -) - -DataBook$set("public", "append_data_object", function(name, obj, add_to_graph_book = TRUE) { - if(!is.character(name)) stop("name must be a character") - # obj could be of old class type 'data_object' - if(!any(c("data_object", "DataSheet") %in% class(obj))) { - stop("obj must be a data object") - } - obj$append_to_metadata(data_name_label, name) - private$.data_sheets[[name]] <- obj - if (add_to_graph_book && exists(".graph_data_book")) { - dfs <- list(data.frame()) - names(dfs) <- name - .graph_data_book$import_data(data_tables = dfs, add_to_graph_book = FALSE) - } -} -) - -DataBook$set("public", "get_data_objects", function(data_name, as_list = FALSE, ...) { - if(missing(data_name)) { - return(private$.data_sheets) - } - else{ - if(all(is.character(data_name))) type = "character" - else if(all(is.numeric(data_name)) && all((data_name %% 1) == 0)) type = "integer" - else stop("data_name must be of type character or integer") - - if(type=="character" && !all(data_name %in% names(private$.data_sheets))) stop(paste(data_name, "not found")) - if(type=="integer" && (!all(1 <= data_name) || !all(data_name <= length(private$.data_sheets)))) stop(paste(data_name, "not found")) - if(length(data_name) > 1 || as_list) return(private$.data_sheets[data_name]) - else return(private$.data_sheets[[data_name]]) - } -} -) - -DataBook$set("public", "get_data_frame", function(data_name, convert_to_character = FALSE, stack_data = FALSE, include_hidden_columns = TRUE, use_current_filter = TRUE, filter_name = "", use_column_selection = TRUE, column_selection_name = "", remove_attr = FALSE, retain_attr = FALSE, max_cols, max_rows, drop_unused_filter_levels = FALSE, start_row, start_col, ...) { - if(!stack_data) { - if(missing(data_name)) data_name <- self$get_data_names() - if(length(data_name) > 1) { - retlist <- list() - for (curr_name in data_name) { - retlist[[curr_name]] = self$get_data_objects(curr_name)$get_data_frame(convert_to_character = convert_to_character, include_hidden_columns = include_hidden_columns, use_current_filter = use_current_filter, use_column_selection = use_column_selection, filter_name = filter_name, column_selection_name = column_selection_name, remove_attr = remove_attr, retain_attr = retain_attr, max_cols = max_cols, max_rows = max_rows, drop_unused_filter_levels = drop_unused_filter_levels, start_row = start_row, start_col = start_col) - } - return(retlist) - } - else return(self$get_data_objects(data_name)$get_data_frame(convert_to_character = convert_to_character, include_hidden_columns = include_hidden_columns, use_current_filter = use_current_filter, use_column_selection = use_column_selection, filter_name = filter_name, column_selection_name = column_selection_name, remove_attr = remove_attr, retain_attr = retain_attr, max_cols = max_cols, max_rows = max_rows, drop_unused_filter_levels = drop_unused_filter_levels, start_row = start_row, start_col = start_col)) - } - else { - if(missing(data_name)) stop("data to be stacked is missing") - if(!data_name %in% names(private$.data_sheets)) stop(paste(data_name, "not found.")) - return(self$get_data_objects(data_name)$get_data_frame(include_hidden_columns = include_hidden_columns, use_current_filter = use_current_filter, use_column_selection = use_column_selection, filter_name = filter_name, stack_data = TRUE, ...)) - } -} -) - -DataBook$set("public", "get_variables_metadata", function(data_name, data_type = "all", convert_to_character = FALSE, property, column, error_if_no_property = TRUE, direct_from_attributes = FALSE, use_column_selection = TRUE) { - if(missing(data_name)) { - retlist <- list() - for (curr_obj in private$.data_sheets) { - retlist[[curr_obj$get_metadata(data_name_label)]] = curr_obj$get_variables_metadata(data_type = data_type, convert_to_character = convert_to_character, property = property, column = column, error_if_no_property = error_if_no_property, direct_from_attributes = direct_from_attributes, use_column_selection = use_column_selection) - } - return(retlist) - } - else return(self$get_data_objects(data_name)$get_variables_metadata(data_type = data_type, convert_to_character = convert_to_character, property = property, column = column, error_if_no_property = error_if_no_property, direct_from_attributes = direct_from_attributes, use_column_selection = use_column_selection)) -} -) - -DataBook$set("public", "get_column_data_types", function(data_name, columns) { - return(self$get_data_objects(data_name)$get_column_data_types(columns = columns)) -} -) - -DataBook$set("public", "get_column_labels", function(data_name, columns) { - return(self$get_data_objects(data_name)$get_column_labels(columns = columns)) -} -) - -DataBook$set("public", "get_data_frame_label", function(data_name, use_current_filter = FALSE) { - self$get_data_objects(data_name)$get_data_frame_label(use_current_filter) -} -) - -DataBook$set("public", "get_data_frame_metadata", function(data_name, label, include_calculated = TRUE, excluded_not_for_display = TRUE) { - return(self$get_data_objects(data_name)$get_metadata(label = label, include_calculated = include_calculated, excluded_not_for_display = excluded_not_for_display)) -} -) - -DataBook$set("public", "get_combined_metadata", function(convert_to_character = FALSE) { - retlist <- data.frame() - i = 1 - for (curr_obj in private$.data_sheets) { - templist = curr_obj$get_metadata() - for (j in seq_along(templist)) { - if (is.list(templist[[j]]) || length(templist[[j]]) > 1) { - if (length(templist[[j]]) > 0) { - templist[[j]] <- - paste(names(templist[[j]]), " = ", templist[[j]], collapse = ", ") - } else { - next - } - } - retlist[i, names(templist[j])] = templist[[j]] - } - if(all(c(data_name_label, label_label, row_count_label, column_count_label, - data_type_label, is_calculated_label, is_hidden_label, is_linkable, key_label) %in% names(retlist))){ - retlist <- retlist[ ,c(c(data_name_label, label_label, row_count_label, column_count_label, data_type_label, - is_calculated_label, is_hidden_label, is_linkable, key_label), - sort(setdiff(names(retlist), c(data_name_label,label_label, row_count_label, column_count_label, - data_type_label, is_calculated_label,is_hidden_label,is_linkable, key_label))))] - } - else if(data_name_label %in% names(retlist)) retlist <- retlist[ ,c(data_name_label, sort(setdiff(names(retlist), data_name_label)))] - i = i + 1 - } - if(convert_to_character) return(convert_to_character_matrix(retlist, FALSE)) - else return(retlist) -} -) - -DataBook$set("public", "get_metadata", function(name, ...) { - if(missing(name)) return(private$.metadata) - if(!is.character(name)) stop("name must be a character") - if(!name %in% names(private$.metadata)) stop(paste(name, "not found in metadata")) - return(private$.metadata[[name]]) -} -) - -DataBook$set("public", "get_data_names", function(as_list = FALSE, include, exclude, excluded_items, include_hidden = TRUE, ...) { - ret <- names(private$.data_sheets) - if(!include_hidden) { - ret <- ret[sapply(ret, function(x) !isTRUE(self$get_data_objects(x)$get_metadata(label = is_hidden_label)))] - } - if(as_list) return(list(data_names = ret)) - else return(ret) -} -) - -DataBook$set("public", "get_data_changed", function(data_name) { - if(missing(data_name)) { - if(self$data_objects_changed) return (TRUE) - for(curr_obj in private$.data_sheets) { - if(curr_obj$data_changed) return(TRUE) - } - return(FALSE) - } - else { - return(self$get_data_objects(data_name)$data_changed) - } -} -) - -DataBook$set("public", "get_variables_metadata_changed", function(data_name) { - if(missing(data_name)) { - if(private$.data_sheets_changed) return(TRUE) - return(any(sapply(private$.data_sheets, function(x) x$variables_metadata_changed))) - } - else { - return(self$get_data_objects(data_name)$variables_metadata_changed) - } -} -) - -DataBook$set("public", "get_metadata_changed", function(data_name) { - if(missing(data_name)) { - if(private$.data_sheets_changed) return(TRUE) - for(curr_obj in private$.data_sheets) { - if(curr_obj$metadata_changed) return(TRUE) - } - return(FALSE) - } - else { - return(self$get_data_objects(data_name)$metadata_changed) - } -} -) - -DataBook$set("public", "get_calculations", function(data_name) { - return(self$get_data_objects(data_name)$get_calculations()) -} -) - -DataBook$set("public", "get_calculation_names", function(data_name, as_list = FALSE, excluded_items = c()) { - return(self$get_data_objects(data_name)$get_calculation_names(as_list = as_list, excluded_items = excluded_items)) -} -) - - -DataBook$set("public", "get_scalars", function(data_name) { - if(is.null(data_name) || identical(data_name, overall_label)) { - out <- private$.scalars[self$get_scalar_names(data_name = data_name)] - }else { - out <- self$get_data_objects(data_name)$get_scalars() - } - return(out) - -} -) - -DataBook$set("public", "get_scalar_names", function(data_name, - as_list = FALSE, - excluded_items = c(),...) { - if (is.null(data_name) || identical(data_name, overall_label)) { - out <- - get_data_book_scalar_names( - scalar_list = private$.scalars, - as_list = as_list, - list_label = overall_label - ) - } else{ - out <- - self$get_data_objects(data_name)$get_scalar_names(as_list = as_list, excluded_items = excluded_items) - } - - return(out) -}) - -DataBook$set("public", "get_scalar_value", function(data_name, scalar_name) { - self$get_data_objects(data_name)$get_scalar_value(scalar_name) -} -) - -DataBook$set("public", "add_scalar", function(data_name, scalar_name = "", scalar_value) { - if (is.null(data_name) || identical(data_name, overall_label)) { - if (missing(scalar_name)) - scalar_name <- next_default_item("scalar", names(private$.scalars)) - if (scalar_name %in% names(private$.scalars)) - warning("A scalar called ", - scalar_name, - " already exists. It will be replaced.") - - #add the scalar - private$.scalars[[scalar_name]] <- scalar_value - } else{ - self$get_data_objects(data_name)$add_scalar(scalar_name, scalar_value) - } -}) - -DataBook$set("public", "dataframe_count", function() { - return(length(private$.data_sheets)) -} -) - -DataBook$set("public", "set_data_frames_changed", function(data_name = "", new_val) { - if(data_name == "") { - for(curr_obj in private$.data_sheets) { - curr_obj$data_changed <- new_val - } - } - else self$get_data_objects(data_name)$set_data_changed(new_val) -} -) - -DataBook$set("public", "set_variables_metadata_changed", function(data_name = "", new_val) { - if(data_name == "") { - for(curr_obj in private$.data_sheets) { - curr_obj$variables_metadata_changed <- new_val - } - } - else self$get_data_objects(data_name)$set_variables_metadata_changed(new_val) -} -) - -DataBook$set("public", "set_metadata_changed", function(data_name = "", new_val) { - if(data_name == "") { - for(curr_obj in private$.data_sheets) { - curr_obj$set_metadata_changed(new_val) - } - } - else self$get_data_objects(data_name)$set_metadata_changed(new_val) -} -) - -DataBook$set("public", "add_columns_to_data", function(data_name, col_name = "", col_data, use_col_name_as_prefix = FALSE, hidden = FALSE, before, adjacent_column = "", num_cols, require_correct_length = TRUE, keep_existing_position = TRUE) { - self$get_data_objects(data_name)$add_columns_to_data(col_name, col_data, use_col_name_as_prefix = use_col_name_as_prefix, hidden = hidden, before = before, adjacent_column = adjacent_column, num_cols = num_cols, require_correct_length = require_correct_length, keep_existing_position = keep_existing_position) -} -) - -DataBook$set("public", "get_columns_from_data", function(data_name, col_names, from_stacked_data = FALSE, force_as_data_frame = FALSE, use_current_filter = TRUE, remove_labels = FALSE, drop_unused_filter_levels = FALSE) { - if(missing(data_name)) stop("data_name is required") - if(!from_stacked_data) { - if(!data_name %in% names(private$.data_sheets)) stop(data_name, "not found") - self$get_data_objects(data_name)$get_columns_from_data(col_names, force_as_data_frame, use_current_filter = use_current_filter, remove_labels = remove_labels, drop_unused_filter_levels = drop_unused_filter_levels) - } - else { - if(!exists(data_name)) stop(paste(data_name, "not found.")) - if(!all(sapply(col_names, function(x) x %in% names(data_name)))) stop("Not all column names were found in data") - if(length(col_names)==1 && !force_as_data_frame) return (data_name[[col_names]]) - else return(data_name[col_names]) - } -} -) - - -#todo. deprecate -#see issue #7808 comments for more details -DataBook$set("public", "create_graph_data_book", function() { - .graph_data_book <- DataBook$new() - df_names <- self$get_data_names() - dfs <- vector("list", length(df_names)) - names(dfs) <- df_names - for (i in seq_along(dfs)) { - dfs[[i]] <- data.frame() - } - .graph_data_book$import_data(data_tables = dfs) - assign(".graph_data_book", .graph_data_book, envir = .GlobalEnv) -} -) - -#see comments in issue #7808. -DataBook$set("public", "add_object", function(data_name = NULL, - object_name = NULL, - object_type_label, - object_format, - object) { - if(is.null(data_name) || identical(data_name, overall_label)) { - if(is.null(object_name)){ - object_name <- next_default_item("object", names(private$.objects)) - } - - #notify user - if(object_name %in% names(private$.objects)){ - message(paste("An object called", object_name, "already exists. It will be replaced.")) - } - - #add the object - private$.objects[[object_name]] <- list(object_type_label = object_type_label, object_format = object_format, object = object) - } else{ - self$get_data_objects(data_name)$add_object(object_name = object_name, object_type_label = object_type_label, object_format = object_format, object = object) - } - - #todo. once get_last_object_data is refactored, then this block can be removed - #if its a graph. set it as last graph contents - if(identical(object_type_label, graph_label)){ - private$.last_graph <- c(data_name, object_name) - } - -} -) - -#see comments in issue #7808. -DataBook$set("public", "get_object_names", function(data_name = NULL, - object_type_label = NULL, - as_list = FALSE, ...) { - - if(is.null(data_name) || identical(data_name, overall_label)){ - out <- - get_data_book_output_object_names( - output_object_list = private$.objects, - object_type_label = object_type_label, - as_list = as_list, - list_label = overall_label) - }else{ - out <- - self$get_data_objects(data_name)$get_object_names( - object_type_label = object_type_label, - as_list = as_list) - } - - return(out) - -} -) - -#returns a list of objects -#see issue #7808 comments for more details -DataBook$set("public", "get_objects", function(data_name = NULL, - object_type_label = NULL) { - if(is.null(data_name) || identical(data_name, overall_label)) { - out <- private$.objects[self$get_object_names(data_name = data_name, object_type_label = object_type_label)] - }else { - out <- self$get_data_objects(data_name)$get_objects(object_type_label = object_type_label) - } - return(out) - -} -) - -#returns NULL if object is not found -#see issue #7808 comments for more details -DataBook$set("public", "get_object", function(data_name = NULL, object_name) { - out <- NULL - if(is.null(data_name) || identical(data_name, overall_label)) { - out <- private$.objects[[object_name]] - }else { - out <- self$get_data_objects(data_name)$get_object(object_name = object_name) - } - return(out) -} -) - -DataBook$set("public", "get_object_data", function(data_name = NULL, object_name, as_file = FALSE) { - out <- self$get_object(data_name = data_name, object_name = object_name) - if(is.null(out)){ - return(NULL) - }else if(as_file){ - out <- view_object_data(object = out$object, object_format = out$object_format) - }else{ - out <- out$object - } - return(out) -} -) - -#returns object data from the object_names character vector -DataBook$set("public", "get_objects_data", function(data_name = NULL, object_names = NULL, as_files = FALSE) { - out <- list() - if(is.null(object_names)){ - objects_list <- self$get_objects(data_name = data_name) - out <- self$get_objects_data(data_name = data_name, object_names = names(objects_list) ) - }else{ - for(object_name in object_names){ - object_data <- self$get_object_data(data_name = data_name, object_name = object_name, as_file = as_files) - if(!is.null(object_data)){ - out[[object_name]] <- object_data - } - } - } - - return(out) -} -) - -#todo. require data name? then do a way with private$.last_graph -#and just get it from the objects list? -DataBook$set("public", "get_last_object_data", function(object_type_label, as_file = TRUE) { - out <- NULL - #currently this function is only applicable to graphs. Implement for other objects like models, tables, summaries - if(identical(object_type_label, graph_label)){ - if(!is.null(private$.last_graph) && length(private$.last_graph) == 2) { - out <- self$get_object_data(data_name = private$.last_graph[1], object_name = private$.last_graph[2], as_file = as_file) - } - } - return(out) -} -) - - - -DataBook$set("public", "rename_object", function(data_name, object_name, new_name, object_type = "object") { - if(missing(data_name) || data_name == overall_label) { - if(!object_name %in% names(private$.objects)) stop(object_name, " not found in overall objects list") - if(new_name %in% names(private$.objects)) stop(new_name, " is already an object name. Cannot rename ", object_name, " to ", new_name) - names(private$.objects)[names(private$.objects) == object_name] <- new_name - } - else self$get_data_objects(data_name)$rename_object(object_name = object_name, new_name = new_name, object_type = object_type) -} -) - -DataBook$set("public", "delete_objects", function(data_name, object_names, object_type = "object") { - if(missing(data_name) || data_name == overall_label) { - if(!all(object_names %in% names(private$.objects))) stop("Not all object_names found in overall objects list") - } - else self$get_data_objects(data_name)$delete_objects(object_names = object_names, object_type = object_type) -} -) - -DataBook$set("public", "reorder_objects", function(data_name, new_order) { - if(missing(data_name) || data_name == overall_label) { - if(length(new_order) != length(private$.objects) || !setequal(new_order, names(private$.objects))) stop("new_order must be a permutation of the current object names.") - self$set_objects(private$.objects[new_order]) - } - else self$get_data_objects(data_name)$reorder_objects(new_order = new_order) -} -) - -DataBook$set("public", "get_from_object", function(data_name, object_name, value1, value2, value3) { - if(missing(data_name) || missing(object_name)) stop("data_name and object_name must both be specified.") - curr_object = self$get_objects(data_name = data_name, object_name = object_name) - if(missing(value1)) { - if(!missing(value2) || !missing(value3)) warning("value1 is missing so value2 and value3 will be ignored.") - return(curr_object[]) - } - if(!value1 %in% names(curr_object)) stop(value1, " not found in ", object_name) - if(missing(value2)) { - if(!missing(value3)) warning("value2 is missing so value3 will be ignored.") - return(curr_object[[value1]]) - } - else { - if(!value2 %in% names(curr_object[[value1]])) stop(paste0(value2, " not found in ", object_name,"[[\"",value1,"\"]]")) - if(missing(value3)) return(curr_object[[value1]][[value2]]) - else { - if(!value3 %in% names(curr_object[[value1]][[value2]])) stop(value3, " not found in ", object_name,"[[\"",value1,"\"]]","[[\"",value2,"\"]]") - return(curr_object[[value1]][[value2]][[value3]]) - } - } -} -) - - - - -# Filters ----------------------------------------------------------------- - -DataBook$set("public", "add_filter", function(data_name, filter, filter_name = "", replace = TRUE, set_as_current_filter = FALSE, na.rm = TRUE, is_no_filter = FALSE, and_or = "&", inner_not = FALSE, outer_not = FALSE) { - if(missing(filter)) stop("filter is required") - self$get_data_objects(data_name)$add_filter(filter, filter_name, replace, set_as_current_filter, na.rm = na.rm, is_no_filter = is_no_filter, and_or = and_or, inner_not = inner_not, outer_not = outer_not) -} -) - -DataBook$set("public","add_filter_as_levels", function(data_name, filter_levels, column){ - self$get_data_objects(data_name)$add_filter_as_levels(filter_levels, column) -}) - - -DataBook$set("public", "current_filter", function(data_name) { - return(self$get_data_objects(data_name)$current_filter) -} -) - -DataBook$set("public", "set_current_filter", function(data_name, filter_name = "") { - self$get_data_objects(data_name)$set_current_filter(filter_name) -} -) - -DataBook$set("public", "get_filter", function(data_name, filter_name) { - return(self$get_data_objects(data_name)$get_filter(filter_name)) -} -) - -DataBook$set("public", "get_filter_as_logical", function(data_name, filter_name) { - return(self$get_data_objects(data_name)$get_filter_as_logical(filter_name)) -} -) - -DataBook$set("public", "get_current_filter", function(data_name) { - self$get_data_objects(data_name)$get_current_filter() -} -) - -DataBook$set("public", "get_filter_row_names", function(data_name, filter_name) { - row_names <- row.names(self$get_data_frame(data_name, convert_to_character = FALSE, stack_data = FALSE, - include_hidden_columns = TRUE, use_current_filter = TRUE, filter_name = filter_name, - remove_attr = FALSE, retain_attr = FALSE, drop_unused_filter_levels = FALSE)) - - return(row_names) -} -) - -DataBook$set("public", "get_current_filter_name", function(data_name) { - self$get_data_objects(data_name)$get_current_filter()$name -} -) - -DataBook$set("public", "get_filter_names", function(data_name, as_list = FALSE, include = list(), exclude = list(), excluded_items = c()) { - if(missing(data_name)) { - #TODO what to do with excluded_items in this case - return(lapply(self$get_data_objects(), function(x) x$get_filter_names(include = include, exclude = exclude))) - } - else { - return(self$get_data_objects(data_name)$get_filter_names(as_list = as_list, include = include, exclude = exclude, excluded_items = excluded_items)) - } -} -) - -DataBook$set("public", "remove_current_filter", function(data_name) { - self$get_data_objects(data_name)$remove_current_filter() -} -) - -DataBook$set("public", "filter_applied", function(data_name) { - self$get_data_objects(data_name)$filter_applied() -} -) - -DataBook$set("public", "filter_string", function(data_name, filter_name) { - self$get_data_objects(data_name)$filter_string(filter_name) -} -) - -DataBook$set("public", "get_filter_as_instat_calculation", function(data_name, filter_name) { - self$get_data_objects(data_name)$get_filter_as_instat_calculation(filter_name) -} -) - - -# Column Selections ------------------------------------------------------- - -DataBook$set("public", "add_column_selection", function(data_name, column_selection, name = "", replace = TRUE, set_as_current = FALSE, is_everything = FALSE, and_or = "|") { - self$get_data_objects(data_name)$add_column_selection(column_selection = column_selection, name = name, replace = replace, set_as_current = set_as_current, is_everything = is_everything, and_or = and_or) -} -) - -DataBook$set("public", "current_column_selection", function(data_name) { - return(self$get_data_objects(data_name)$current_column_selection) -} -) - -DataBook$set("public", "set_current_column_selection", function(data_name, name = "") { - self$get_data_objects(data_name)$set_current_column_selection(name) -} -) - -DataBook$set("public", "get_column_selection", function(data_name, name) { - return(self$get_data_objects(data_name)$get_column_selection(name)) -} -) - -DataBook$set("public", "get_column_selection_column_names", function(data_name, filter_name) { - return(self$get_data_objects(data_name)$get_filter_as_logical(filter_name)) -} -) - -DataBook$set("public", "get_column_selected_column_names", function(data_name, column_selection_name = "") { - return(self$get_data_objects(data_name)$get_column_selected_column_names(column_selection_name)) -} -) - -DataBook$set("public", "get_current_column_selection", function(data_name) { - self$get_data_objects(data_name)$get_current_column_selection() -} -) - -DataBook$set("public", "get_current_column_selection_name", function(data_name) { - self$get_data_objects(data_name)$get_current_column_selection()$name -} -) - -DataBook$set("public", "get_column_selection_names", function(data_name, as_list = FALSE, include = list(), exclude = list(), excluded_items = c()) { - if(missing(data_name)) { - #TODO what to do with excluded_items in this case - return(lapply(self$get_data_objects(), function(x) x$get_column_selection_names(include = include, exclude = exclude))) - } - else { - return(self$get_data_objects(data_name)$get_column_selection_names(as_list = as_list, include = include, exclude = exclude, excluded_items = excluded_items)) - } -} -) - -DataBook$set("public", "remove_current_column_selection", function(data_name) { - self$get_data_objects(data_name)$remove_current_column_selection() -} -) - -DataBook$set("public", "column_selection_applied", function(data_name) { - self$get_data_objects(data_name)$column_selection_applied() -} -) - -DataBook$set("public", "replace_value_in_data", function(data_name, col_names, rows, old_value, old_is_missing = FALSE, start_value = NA, end_value = NA, new_value, new_is_missing = FALSE, closed_start_value = TRUE, closed_end_value = TRUE, locf = FALSE, from_last = FALSE) { - self$get_data_objects(data_name)$replace_value_in_data(col_names, rows, old_value, old_is_missing, start_value, end_value, new_value, new_is_missing, closed_start_value, closed_end_value, locf, from_last) -} -) - -DataBook$set("public", "paste_from_clipboard", function(data_name, col_names, start_row_pos = 1, first_clip_row_is_header = TRUE, clip_board_text) { - self$get_data_objects(data_name)$paste_from_clipboard(col_names, start_row_pos, first_clip_row_is_header, clip_board_text) -} -) - -DataBook$set("public", "rename_column_in_data", function(data_name, column_name = NULL, new_val = NULL, label = "", type = "single", .fn, .cols = everything(), new_column_names_df, new_labels_df, ...) { - self$get_data_objects(data_name)$rename_column_in_data(column_name, new_val, label, type, .fn, .cols, new_column_names_df, new_labels_df, ...) - self$update_links_rename_column(data_name = data_name, old_column_name = column_name, new_column_name = new_val) -}) - -DataBook$set("public", "frequency_tables", function(data_name, x_col_names, y_col_name, n_column_factors = 1, store_results = TRUE, drop = TRUE, na.rm = FALSE, summary_name = NA, include_margins = FALSE, return_output = TRUE, treat_columns_as_factor = FALSE, page_by = "default", as_html = TRUE, signif_fig = 2, na_display = "", na_level_display = "NA", weights = NULL, caption = NULL, result_names = NULL, percentage_type = "none", perc_total_columns = NULL, perc_total_factors = c(), perc_total_filter = NULL, perc_decimal = FALSE, margin_name = "(All)", additional_filter, ...) { - for(i in seq_along(x_col_names)) { - cat(x_col_names[i], "by", y_col_name, "\n") - print(data_book$summary_table(data_name = data_name, summaries = count_label, factors=c(x_col_names[i], y_col_name), n_column_factors = n_column_factors, store_results = store_results, drop = drop, na.rm = na.rm, summary_name = summary_name, include_margins = include_margins, return_output = return_output, treat_columns_as_factor = treat_columns_as_factor, page_by = page_by, as_html = as_html, signif_fig = signif_fig, na_display = na_display, na_level_display = na_level_display, weights = weights, caption = caption, result_names = result_names, percentage_type = percentage_type, perc_total_columns = perc_total_columns, perc_total_factors = perc_total_factors, perc_total_filter = perc_total_filter, perc_decimal = perc_decimal, margin_name = margin_name, additional_filter = additional_filter, ... = ...)) - cat("\n") - } -} -) - -DataBook$set("public", "anova_tables", function(data_name, x_col_names, y_col_name, signif.stars = FALSE, sign_level = FALSE, means = FALSE) { - self$get_data_objects(data_name)$anova_tables(x_col_names = x_col_names, y_col_name = y_col_name, signif.stars = signif.stars, sign_level = sign_level, means = means) -} -) - -DataBook$set("public", "cor", function(data_name, x_col_names, y_col_name, use = "everything", method = c("pearson", "kendall", "spearman")) { - self$get_data_objects(data_name)$cor(x_col_names = x_col_names, y_col_name = y_col_name, use = use, method = method) -} -) - -DataBook$set("public", "remove_columns_in_data", function(data_name, cols, allow_delete_all = FALSE) { - self$get_data_objects(data_name)$remove_columns_in_data(cols = cols, allow_delete_all = allow_delete_all) -} -) - -DataBook$set("public", "remove_rows_in_data", function(data_name, row_names) { - self$get_data_objects(data_name)$remove_rows_in_data(row_names = row_names) -} -) - -DataBook$set("public", "get_next_default_column_name", function(data_name, prefix) { - if(missing(data_name)) { - out = list() - for(curr_obj in private$.data_sheets) { - out[[curr_obj$get_metadata(data_name_label)]] = curr_obj$get_next_default_column_name(prefix) - } - return(out) - } - if(!is.character(data_name)) stop("data_name must be of type character") - if(!data_name %in% names(private$.data_sheets)) stop("dataframe: ", data_name, " not found") - return(self$get_data_objects(data_name)$get_next_default_column_name(prefix)) -} -) - -DataBook$set("public", "get_column_names", function(data_name, as_list = FALSE, include = list(), exclude = list(), excluded_items = c(), max_no, use_current_column_selection = TRUE) { - if(missing(data_name)) { - #TODO what to do with excluded items in this case? - return(lapply(self$get_data_objects(), function(x) x$get_column_names(include = include, exclude = exclude, max_no = max_no, use_current_column_selection = use_current_column_selection))) - } - else { - return(self$get_data_objects(data_name)$get_column_names(as_list, include, exclude, excluded_items = excluded_items, max_no = max_no, use_current_column_selection = use_current_column_selection)) - } -} -) - -DataBook$set("public", "reorder_columns_in_data", function(data_name, col_order){ - self$get_data_objects(data_name)$reorder_columns_in_data(col_order = col_order) -} -) - -#TODO Think how to use row_data argument -DataBook$set("public", "insert_row_in_data", function(data_name, start_row, row_data = c(), number_rows = 1, before = FALSE) { - self$get_data_objects(data_name)$insert_row_in_data(start_row = start_row, row_data = row_data, number_rows = number_rows, before = before) -} -) - -DataBook$set("public", "get_data_frame_length", function(data_name, use_current_filter = FALSE) { - self$get_data_objects(data_name)$get_data_frame_length(use_current_filter) -} -) - -DataBook$set("public", "get_next_default_dataframe_name", function(prefix, include_index = TRUE, start_index = 1) { - next_default_item(prefix = prefix, existing_names = names(private$.data_sheets), include_index = include_index, start_index = start_index) -} -) - -DataBook$set("public", "delete_dataframes", function(data_names, delete_graph_book = TRUE) { - # TODO need a set or append - for(name in data_names) { - private$.data_sheets[[name]] <- NULL - self$data_objects_changed <- TRUE - link_names <- c() - for(i in seq_along(private$.links)) { - if(private$.links[[i]]$from_data_frame == name || private$.links[[i]]$to_data_frame == name) { - link_names <- c(link_names, names(private$.links)[i]) - } - } - for(link_name in link_names) { - #TODO Should we be able to disable links instead of deleting? - self$remove_link(link_name) - } - if(!is.null(private$.last_graph) && private$.last_graph[1] %in% data_names) private$.last_graph <- NULL - } - if (delete_graph_book && exists(".graph_data_book")) .graph_data_book$delete_dataframes(data_names = data_names, delete_graph_book = FALSE) -} -) - -DataBook$set("public", "remove_link", function(link_name) { - if(!link_name %in% names(private$.links)) stop(link_name, " not found.") - private$.links[[link_name]] <- NULL - cat("Link removed:", link_name) -} -) - -DataBook$set("public", "get_column_factor_levels", function(data_name,col_name = "") { - self$get_data_objects(data_name)$get_column_factor_levels(col_name) -} -) - -DataBook$set("public", "get_factor_data_frame", function(data_name, col_name = "", include_levels = TRUE, include_NA_level = FALSE) { - self$get_data_objects(data_name)$get_factor_data_frame(col_name = col_name, include_levels = include_levels, include_NA_level = include_NA_level) -} -) - -DataBook$set("public", "sort_dataframe", function(data_name, col_names = c(), decreasing = FALSE, na.last = TRUE, by_row_names = FALSE, row_names_as_numeric = TRUE) { - self$get_data_objects(data_name)$sort_dataframe(col_names = col_names, decreasing = decreasing, na.last = na.last, by_row_names = by_row_names, row_names_as_numeric = row_names_as_numeric) -} -) - -DataBook$set("public", "rename_dataframe", function(data_name, new_value = "", label = "") { - data_obj <- self$get_data_objects(data_name) - if(data_name != new_value) { - if(tolower(new_value) %in% tolower(names(private$.data_sheets)[-which(names(private$.data_sheets) == data_name)])) stop("Cannot rename data frame since ", new_value, " is an existing data frame.") - names(private$.data_sheets)[names(private$.data_sheets) == data_name] <- new_value - data_obj$append_to_metadata(data_name_label, new_value) - self$update_links_rename_data_frame(data_name, new_value) - } - if(label != "") { - data_obj$append_to_metadata(property = "label" , new_val = label) - } - data_obj$set_data_changed(TRUE) - data_obj$set_metadata_changed(TRUE) - data_obj$set_variables_metadata_changed(TRUE) - if (exists(".graph_data_book")) .graph_data_book$rename_dataframe(data_name = data_name, new_value = new_value, label = label) -} -) - -DataBook$set("public", "convert_column_to_type", function(data_name, col_names = c(), to_type, factor_values = NULL, set_digits, set_decimals = FALSE, keep_attr = TRUE, ignore_labels = FALSE, keep.labels = TRUE) { - self$get_data_objects(data_name)$convert_column_to_type(col_names = col_names, to_type = to_type, factor_values = factor_values, set_digits = set_digits,set_decimals = set_decimals, keep_attr = keep_attr, ignore_labels = ignore_labels, keep.labels = keep.labels) -} -) - -DataBook$set("public", "append_to_variables_metadata", function(data_name, col_names, property, new_val = "") { - self$get_data_objects(data_name)$append_to_variables_metadata(col_names, property, new_val) -} -) - -DataBook$set("public", "append_to_dataframe_metadata", function(data_name, property, new_val = "") { - self$get_data_objects(data_name)$append_to_metadata(property, new_val) -} -) - -DataBook$set("public", "append_to_metadata", function(property, new_val = "", allow_override_special = FALSE) { - if(missing(property)) stop("property and new_val arguments must be specified.") - - if(!is.character(property)) stop("property must be of type character") - if(!allow_override_special && property %in% c("class")) message("Cannot override property: ", property, ". Specify allow_override_special = TRUE to replace this property.") - else { - attr(self, property) <- new_val - self$metadata_changed <- TRUE - self$append_to_changes(list(Added_metadata, property)) - } -} -) - -DataBook$set("public", "add_metadata_field", function(data_name, property, new_val = "") { - if(missing(property)) stop("property and new_val arguments must be specified.") - if(data_name == overall_label) { - invisible(sapply(self$get_data_objects(), function(x) x$append_to_metadata(property, new_val))) - } - else invisible(sapply(self$get_data_objects(data_name, as_list = TRUE), function(x) x$append_to_variables_metadata(property = property, new_val = new_val))) -} -) - -DataBook$set("public", "reorder_dataframes", function(data_frames_order) { - if(length(data_frames_order) != length(names(private$.data_sheets))) stop("number data frames to order should be equal to number of dataframes in the object") - if(!setequal(data_frames_order,names(private$.data_sheets))) stop("data_frames_order must be a permutation of the dataframe names.") - - self$set_data_objects(private$.data_sheets[data_frames_order]) - self$data_objects_changed <- TRUE -} -) - -DataBook$set("public", "copy_columns", function(data_name, col_names = "", copy_to_clipboard = FALSE) { - if(copy_to_clipboard){ - col_data_obj <- self$get_columns_from_data(data_name = data_name, col_names = col_names, force_as_data_frame = TRUE) - self$copy_to_clipboard(content = col_data_obj) - }else{ - self$get_data_objects(data_name)$copy_columns(col_names = col_names) - } - -} -) - -DataBook$set("public", "drop_unused_factor_levels", function(data_name, col_name) { - self$get_data_objects(data_name)$drop_unused_factor_levels(col_name = col_name) -} -) - -DataBook$set("public", "set_factor_levels", function(data_name, col_name, new_labels, new_levels, set_new_labels = TRUE) { - self$get_data_objects(data_name)$set_factor_levels(col_name = col_name, new_labels = new_labels, new_levels = new_levels, set_new_labels = set_new_labels) -} -) - -DataBook$set("public", "edit_factor_level", function(data_name, col_name,old_level, new_level) { - self$get_data_objects(data_name)$edit_factor_level(col_name = col_name, old_level = old_level, new_level = new_level) -} -) - -DataBook$set("public", "set_factor_reference_level", function(data_name, col_name, new_ref_level) { - self$get_data_objects(data_name)$set_factor_reference_level(col_name = col_name, new_ref_level = new_ref_level) -} -) - -DataBook$set("public", "get_column_count", function(data_name, use_column_selection = FALSE) { - return(self$get_data_objects(data_name)$get_column_count(use_column_selection)) -} -) - -DataBook$set("public", "reorder_factor_levels", function(data_name, col_name, new_level_names) { - self$get_data_objects(data_name)$reorder_factor_levels(col_name = col_name, new_level_names = new_level_names) -} -) - -DataBook$set("public","get_data_type", function(data_name, col_name) { - self$get_data_objects(data_name)$get_data_type(col_name = col_name) -} -) - -DataBook$set("public","copy_data_frame", function(data_name, new_name, label = "", copy_to_clipboard = FALSE) { - if(copy_to_clipboard){ - self$copy_to_clipboard(content = self$get_data_frame(data_name)) - }else{ - if(new_name %in% names(private$.data_sheets)) stop("Cannot copy data frame since ", new_name, " is an existing data frame.") - curr_obj <- self$get_data_objects(data_name)$clone(deep = TRUE) - - if(missing(new_name)) new_name <- next_default_item(data_name, self$get_data_names()) - self$append_data_object(new_name, curr_obj) - new_data_obj <- self$get_data_objects(new_name) - new_data_obj$data_changed <- TRUE - new_data_obj$set_data_changed(TRUE) - if(label != "") { - new_data_obj$append_to_metadata(property = "label" , new_val = label) - new_data_obj$set_metadata_changed(TRUE) - } - } -} -) - -DataBook$set("public","copy_col_metadata_to_clipboard", function(data_name, property_names) { - if(missing(property_names)){ - self$copy_to_clipboard(content = self$get_variables_metadata(data_name = data_name)) - }else{ - self$copy_to_clipboard(content = self$get_variables_metadata(data_name = data_name, property = property_names)) - } -} -) - -DataBook$set("public","copy_data_frame_metadata_to_clipboard", function(data_name, property_names) { - if(missing(property_names)){ - self$copy_to_clipboard(content = self$get_data_frame_metadata(data_name = data_name)) - }else{ - self$copy_to_clipboard(content = self$get_data_frame_metadata(data_name = data_name, label = property_names)) - } -} -) - -DataBook$set("public","copy_to_clipboard", function(content) { - clipr::write_clip(content = content) -} -) - -DataBook$set("public","set_hidden_columns", function(data_name, col_names = c()) { - self$get_data_objects(data_name)$set_hidden_columns(col_names = col_names) -} -) - -DataBook$set("public","unhide_all_columns", function(data_name) { - if(missing(data_name)) invisible(sapply(self$get_data_objects(), function(obj) obj$unhide_all_columns())) - else self$get_data_objects(data_name)$unhide_all_columns() -} -) - -DataBook$set("public","set_hidden_data_frames", function(data_names = c()) { - invisible(sapply(data_names, function(x) self$append_to_dataframe_metadata(data_name = x, property = is_hidden_label, new_val = TRUE))) - unhide_data_names <- setdiff(self$get_data_names(), data_names) - invisible(sapply(unhide_data_names, function(x) self$append_to_dataframe_metadata(data_name = x, property = is_hidden_label, new_val = FALSE))) -} -) - -DataBook$set("public","get_hidden_data_frames", function() { - all_data_names <- names(private$.data_sheets) - visible_data_names <- all_data_names[sapply(all_data_names, function(x) !isTRUE(self$get_data_objects(x)$get_metadata(label = is_hidden_label)))] - hidden_data_names <- setdiff(all_data_names, visible_data_names) - return(hidden_data_names) -} -) - -DataBook$set("public","set_row_names", function(data_name, row_names) { - self$get_data_objects(data_name)$set_row_names(row_names = row_names) -} -) - -DataBook$set("public","get_row_names", function(data_name) { - self$get_data_objects(data_name)$get_row_names() -} -) - -DataBook$set("public","set_protected_columns", function(data_name, col_names) { - self$get_data_objects(data_name)$set_protected_columns(col_names = col_names) -} -) - -DataBook$set("public","get_metadata_fields", function(data_name, include_overall, as_list = FALSE, include, exclude, excluded_items = c()) { - if(!missing(data_name)) { - if(data_name == overall_label) { - out = names(self$get_combined_metadata()) - if(length(excluded_items) > 0){ - ex_ind = which(out %in% excluded_items) - if(length(ex_ind) != length(excluded_items)) warning("Some of the excluded_items were not found in the list of objects") - if(length(ex_ind) > 0) out = out[-ex_ind] - } - if(as_list) { - lst = list() - lst[[data_name]] <- out - return(lst) - } - else return(out) - } - else return(self$get_data_objects(data_name)$get_variables_metadata_fields(as_list = as_list, include = include, exclude = exclude, excluded_items = excluded_items)) - } - else { - #TODO what to do with excluded_items in this case - out = list() - if(include_overall) out[[overall_label]] <- names(self$get_combined_metadata()) - for(data_obj_name in self$get_data_names()) { - out[[data_obj_name]] <- self$get_data_objects(data_obj_name)$get_variables_metadata_fields(as_list = FALSE, include = include, exclude = exclude) - } - return(out) - } -} -) - -DataBook$set("public","freeze_columns", function(data_name, column) { - self$get_data_objects(data_name)$freeze_columns(column = column) -} -) - -DataBook$set("public","unfreeze_columns", function(data_name) { - self$get_data_objects(data_name)$unfreeze_columns() -} -) - -DataBook$set("public","is_variables_metadata", function(data_name, property, column, return_vector = FALSE) { - self$get_data_objects(data_name)$is_variables_metadata(property, column, return_vector) -} -) - -DataBook$set("public","data_frame_exists", function(data_name) { - return(data_name %in% names(private$.data_sheets)) -} -) - -DataBook$set("public","add_key", function(data_name, col_names, key_name) { - self$get_data_objects(data_name)$add_key(col_names, key_name) - names(col_names) <- col_names - self$add_link(data_name, data_name, col_names, keyed_link_label) - invisible(sapply(self$get_data_objects(), function(x) if(!x$is_metadata(is_linkable)) x$append_to_metadata(is_linkable, FALSE))) -} -) - -DataBook$set("public","is_key", function(data_name, col_names) { - self$get_data_objects(data_name)$is_key(col_names) -} -) - -DataBook$set("public","has_key", function(data_name) { - self$get_data_objects(data_name)$has_key() -} -) - -DataBook$set("public","set_enable_disable_undo", function(data_name, disable_undo) { - self$get_data_objects(data_name)$set_enable_disable_undo(disable_undo) -} -) - -DataBook$set("public", "is_undo", function(data_name) { - self$get_data_objects(data_name)$is_undo() -}) - -DataBook$set("public","has_undo_history", function(data_name) { - self$get_data_objects(data_name)$has_undo_history() -} -) - -DataBook$set("public","undo_last_action", function(data_name) { - self$get_data_objects(data_name)$undo_last_action() -} -) - -DataBook$set("public","redo_last_action", function(data_name) { - self$get_data_objects(data_name)$redo_last_action() -} -) - -DataBook$set("public","get_keys", function(data_name, key_name) { - self$get_data_objects(data_name)$get_keys(key_name) -} -) - -# Note: This is a separate functionality to comments as defined in instat_comment.R -# This is intended to be later integrated together. -DataBook$set("public","add_new_comment", function(data_name, row = "", column = "", comment) { - if (!self$has_key(data_name)) stop("A key must be defined in the data frame to add a comment. Use the Add Key dialog to define a key.") - if (!".comment" %in% self$get_data_names()) { - comment_df <- data.frame(sheet = character(0), - row = character(0), - column = character(0), - id = numeric(0), - comment = character(0), - time_stamp = as.POSIXct(c())) - self$import_data(data_tables = list(.comment = comment_df)) - self$add_key(".comment", c("sheet", "row", "id"), "key1") - } - comment_df <- self$get_data_frame(".comment", use_current_filter = FALSE) - curr_df <- self$get_data_frame(data_name, use_current_filter = FALSE) - if(row != ""){ - curr_row <- curr_df[row.names(curr_df) == row, ] - key <- self$get_keys(data_name)[[1]] - key_cols <- as.character(key) - key_vals <- paste(sapply(curr_row[, key_cols], as.character), collapse = "__") - } else { - key_vals <- "" - } - curr_comments <- comment_df[comment_df$sheet == data_name & comment_df$row == key_vals, ] - new_id <- 1 - if (nrow(curr_comments) > 0) new_id <- max(curr_comments$id) + 1 - comment_df[nrow(comment_df) + 1, ] <- list(sheet = data_name, - row = key_vals, - column = column, - id = new_id, - comment = comment, - time_stamp = Sys.time()) - self$get_data_objects(".comment")$set_data(new_data = comment_df) -} -) - -DataBook$set("public","get_comments", function(data_name, comment_id) { - self$get_data_objects(data_name)$get_comments(comment_id) -} -) - -DataBook$set("public","get_links", function(link_name, ...) { - if(!missing(link_name)) { - if(!link_name %in% names(private$.links)) stop(link_name, " not found.") - return(private$.links[[link_name]]) - } - else return(private$.links) -} -) - -DataBook$set("public","set_structure_columns", function(data_name, struc_type_1 = c(), struc_type_2 = c(), struc_type_3 = c()) { - self$get_data_objects(data_name)$set_structure_columns(struc_type_1, struc_type_2, struc_type_3) -} -) - -DataBook$set("public","add_dependent_columns", function(data_name, columns, dependent_cols) { - self$get_data_objects(data_name)$add_dependent_columns(columns, dependent_cols) -} -) - -DataBook$set("public","set_column_colours", function(data_name, columns, colours) { - self$get_data_objects(data_name)$set_column_colours(columns, colours) -} -) - -DataBook$set("public","has_colours", function(data_name, columns) { - self$get_data_objects(data_name)$has_colours(columns) -} -) - -DataBook$set("public", "remove_column_colours", function(data_name) { - self$get_data_objects(data_name)$remove_column_colours() -} -) - -DataBook$set("public","set_column_colours_by_metadata", function(data_name, columns, property) { - self$get_data_objects(data_name)$set_column_colours_by_metadata(data_name, columns, property) -} -) - -DataBook$set("public","graph_one_variable", function(data_name, columns, numeric = "geom_boxplot", categorical = "geom_bar", character = "geom_bar", output = "facets", free_scale_axis = FALSE, ncol = NULL, coord_flip = FALSE, ...) { - self$get_data_objects(data_name)$graph_one_variable(columns = columns, numeric = numeric, categorical = categorical, output = output, free_scale_axis = free_scale_axis, ncol = ncol, coord_flip = coord_flip, ... = ...) -} -) - -DataBook$set("public","make_date_yearmonthday", function(data_name, year, month, day, f_year, f_month, f_day, year_format = "%Y", month_format = "%m") { - self$get_data_objects(data_name)$make_date_yearmonthday(year = year, month = month, day = day, f_year = f_year, f_month = f_month, f_day = f_day, year_format = year_format, month_format = month_format) -} -) - -DataBook$set("public","make_date_yeardoy", function(data_name, year, doy, base, doy_typical_length = "366") { - self$get_data_objects(data_name)$make_date_yeardoy(year = year, doy = doy, base = base, doy_typical_length = doy_typical_length) -} -) - -DataBook$set("public","set_contrasts_of_factor", function(data_name, col_name, new_contrasts, defined_contr_matrix) { - self$get_data_objects(data_name)$set_contrasts_of_factor(col_name = col_name, new_contrasts = new_contrasts, defined_contr_matrix = defined_contr_matrix) -} -) - -DataBook$set("public","create_factor_data_frame", function(data_name, factor, factor_data_frame_name, include_contrasts = FALSE, replace = FALSE, summary_count = TRUE) { - curr_data_obj <- self$get_data_objects(data_name) - if(!factor %in% names(curr_data_obj$get_data_frame())) stop(factor, " not found in the data") - if(!is.factor(curr_data_obj$get_columns_from_data(factor))) stop(factor, " is not a factor column.") - if(self$link_exists_from(data_name, factor)) { - message("Factor data frame already exists.") - if(replace) { - message("Current factor data frame will be replaced.") - factor_named <- factor - names(factor_named) <- factor - curr_factor_df_name <- self$get_linked_to_data_name(data_name, factor_named) - # TODO what if there is more than 1? - if(length(curr_factor_df_name) > 0) self$delete_dataframes(curr_factor_df_name[1]) - } - else { - warning("replace = FALSE so no action will be taken.") - } - } - - data_frame_list <- list() - if(missing(factor_data_frame_name)) factor_data_frame_name <- paste0(data_name, "_", factor) - factor_data_frame_name <- make.names(factor_data_frame_name) - factor_data_frame_name <- next_default_item(factor_data_frame_name, self$get_data_names(), include_index = FALSE) - - factor_column <- curr_data_obj$get_columns_from_data(factor) - factor_data_frame <- data.frame(levels(factor_column)) - names(factor_data_frame) <- factor - if(include_contrasts) factor_data_frame <- cbind(factor_data_frame, contrasts(factor_column)) - if(summary_count) factor_data_frame <- cbind(factor_data_frame, summary(factor_column)) - - row.names(factor_data_frame) <- 1:nrow(factor_data_frame) - names(factor_data_frame)[2:ncol(factor_data_frame)] <- paste0("C", 1:(ncol(factor_data_frame)-1)) - if(summary_count) colnames(factor_data_frame)[ncol(factor_data_frame)] <- "Frequencies" - data_frame_list[[factor_data_frame_name]] <- factor_data_frame - self$import_data(data_frame_list) - factor_data_obj <- self$get_data_objects(factor_data_frame_name) - #TODO We shoud now never call add_key directly from the data object method because it needs to add a link as well at this point to itself - factor_data_obj$add_key(factor) - names(factor) <- factor - self$add_link(from_data_frame = data_name, to_data_frame = factor_data_frame_name, link_pairs = factor, type = keyed_link_label) - } -) - -DataBook$set("public","split_date", function(data_name, col_name = "", year_val = FALSE, year_name = FALSE, leap_year = FALSE, month_val = FALSE, month_abbr = FALSE, month_name = FALSE, week_val = FALSE, week_abbr = FALSE, week_name = FALSE, weekday_val = FALSE, weekday_abbr = FALSE, weekday_name = FALSE, day = FALSE, day_in_month = FALSE, day_in_year = FALSE, day_in_year_366 = FALSE, pentad_val = FALSE, pentad_abbr = FALSE, dekad_val = FALSE, dekad_abbr = FALSE, quarter_val = FALSE, quarter_abbr = FALSE, with_year = FALSE, s_start_month = 1, s_start_day_in_month = 1, days_in_month = FALSE) { - self$get_data_objects(data_name)$split_date(col_name = col_name , year_val = year_val, year_name = year_name, leap_year = leap_year, month_val = month_val, month_abbr = month_abbr, month_name = month_name, week_val = week_val, week_abbr = week_abbr, week_name = week_name, weekday_val = weekday_val, weekday_abbr = weekday_abbr, weekday_name = weekday_name, day = day, day_in_month = day_in_month, day_in_year = day_in_year, day_in_year_366 = day_in_year_366, pentad_val = pentad_val, pentad_abbr = pentad_abbr, dekad_val = dekad_val, dekad_abbr = dekad_abbr, quarter_val = quarter_val, quarter_abbr = quarter_abbr, with_year = with_year, s_start_month = s_start_month, s_start_day_in_month = s_start_day_in_month, days_in_month = days_in_month) -} -) - -DataBook$set("public", "import_SST", function(dataset, data_from = 5, data_names = c()) { - data_list <- convert_SST(dataset, data_from) - if(length(data_list) != length(data_names))stop("data_names vector should be of length 2") - names(data_list) = data_names - self$import_data(data_tables = data_list) - self$add_key(data_names[2], c("lat", "lon")) - self$add_link(from_data_frame = data_names[1], to_data_frame = data_names[2], link_pairs = c(lat = "lat", lon = "lon"), type = keyed_link_label) -} -) - -DataBook$set("public","make_inventory_plot", function(data_name, date_col, station_col = NULL, year_col = NULL, doy_col = NULL, element_cols = NULL, add_to_data = FALSE, year_doy_plot = FALSE, coord_flip = FALSE, facet_by = NULL, graph_title = "Inventory Plot", graph_subtitle = NULL, graph_caption = NULL, title_size = NULL, subtitle_size = NULL, caption_size = NULL, labelXAxis, labelYAxis, xSize = NULL, ySize = NULL, Xangle = NULL, Yangle = NULL, scale_xdate, fromXAxis = NULL, toXAxis = NULL, byXaxis = NULL, date_ylabels, legend_position = NULL, xlabelsize = NULL, ylabelsize = NULL, scale = NULL, dir = "", row_col_number, nrow = NULL, ncol = NULL, key_colours = c("red", "grey"), display_rain_days = FALSE, facet_xsize = 9, facet_ysize = 9, facet_xangle = 90, facet_yangle = 90, scale_ydate = FALSE, date_ybreaks, step = 1, rain_cats = list(breaks = c(0, 0.85, Inf), labels = c("Dry", "Rain"), key_colours = c("tan3", "blue"))) { - self$get_data_objects(data_name)$make_inventory_plot(date_col = date_col, station_col = station_col, year_col = year_col, doy_col = doy_col, - element_cols = element_cols, add_to_data = add_to_data, year_doy_plot = year_doy_plot, - coord_flip = coord_flip, facet_by = facet_by, graph_title = graph_title, key_colours = key_colours, - display_rain_days = display_rain_days, rain_cats = rain_cats, graph_subtitle = graph_subtitle, - graph_caption = graph_caption, title_size = title_size, subtitle_size = subtitle_size, - caption_size = caption_size, labelXAxis = labelXAxis, labelYAxis = labelYAxis, xSize = xSize, - ySize = ySize, Xangle = Xangle, Yangle = Yangle, scale_xdate = scale_xdate, fromXAxis = fromXAxis, - toXAxis = toXAxis, byXaxis = byXaxis, xlabelsize = xlabelsize, scale_ydate = scale_ydate, date_ybreaks = date_ybreaks, - step = step, ylabelsize = ylabelsize, date_ylabels = date_ylabels, legend_position = legend_position, - dir = dir, row_col_number = row_col_number, nrow = nrow, ncol = ncol, scale = scale, facet_xsize = facet_xsize, - facet_ysize = facet_ysize, facet_xangle = facet_xangle, facet_yangle = facet_yangle) -} -) - -DataBook$set("public", "import_NetCDF", function(nc, path, name, only_data_vars = TRUE, keep_raw_time = TRUE, include_metadata = TRUE, boundary, lon_points = NULL, lat_points = NULL, id_points = NULL, show_requested_points = TRUE, great_circle_dist = FALSE) { - if(only_data_vars) { - all_var_names <- ncdf4.helpers::nc.get.variable.list(nc) - } - else { - all_var_names <- names(nc$var) - } - remaining_var_names <- all_var_names - var_groups <- list() - dim_groups <- list() - while(length(remaining_var_names) > 0) { - grp <- remaining_var_names[1] - dim_names <- ncdf4.helpers::nc.get.dim.names(nc, remaining_var_names[1]) - dim_groups[[length(dim_groups) + 1]] <- dim_names - for(curr_var_name in remaining_var_names[-1]) { - if(setequal(ncdf4.helpers::nc.get.dim.names(nc, curr_var_name), dim_names)) { - grp <- c(grp, curr_var_name) - } - } - remaining_var_names <- remaining_var_names[-which(remaining_var_names %in% grp)] - var_groups[[length(var_groups) + 1]] <- grp - } - - data_list <- list() - use_prefix <- (length(seq_along(var_groups)) > 1) - data_names <- c() - for(i in seq_along(var_groups)) { - if(use_prefix) curr_name <- paste0(name, "_", i) - else curr_name <- name - if(!missing(boundary)) curr_boundary <- boundary[names(boundary) %in% dim_groups[[i]]] - else curr_boundary <- NULL - curr_name <- make.names(curr_name) - curr_name <- next_default_item(curr_name, self$get_data_names(), include_index = FALSE) - if(!missing(path)) data_list[[curr_name]] <- multiple_nc_as_data_frame(path = path, vars = var_groups[[i]], keep_raw_time = keep_raw_time, include_metadata = include_metadata, boundary = curr_boundary, lon_points = lon_points, lat_points = lat_points, id_points = id_points, show_requested_points = show_requested_points, great_circle_dist = great_circle_dist) - else data_list[[curr_name]] <- nc_as_data_frame(nc = nc, vars = var_groups[[i]], keep_raw_time = keep_raw_time, include_metadata = include_metadata, boundary = curr_boundary, lon_points = lon_points, lat_points = lat_points, id_points = id_points, show_requested_points = show_requested_points, great_circle_dist = great_circle_dist) - tmp_list <- list() - tmp_list[[curr_name]] <- data_list[[curr_name]] - data_names <- c(data_names, curr_name) - self$import_data(data_tables = tmp_list) - } - for(i in seq_along(data_names)) { - for(j in seq_along(data_names)) { - if(i != j && !self$link_exists_between(data_names[i], data_names[j]) && all(dim_groups[[i]] %in% dim_groups[[j]])) { - pairs <- dim_groups[[i]] - names(pairs) <- pairs - self$add_link(data_names[j], data_names[i], pairs, keyed_link_label) - } - } - } -} -) - -# DataBook$set("public", "import_NetCDF", function(nc_data, main_data_name, loc_data_name, latitude_col_name = "", longitude_col_name = "") { -# nc_result <- open_NetCDF(nc_data = nc_data, latitude_col_name = latitude_col_name, longitude_col_name = longitude_col_name) -# if(length(nc_result) != 3)stop("Output from open_NetCDF should be a list of length 3") -# -# data_list = nc_result[c(1,2)] -# -# names(data_list) = c(main_data_name, next_default_item(prefix = loc_data_name, existing_names = self$get_data_names(), include_index = FALSE)) -# self$import_data(data_tables = data_list) -# self$add_key(names(data_list)[2], nc_result[3][[1]]) -# named_char_vec <- nc_result[3][[1]] -# names(named_char_vec) <- named_char_vec -# self$add_link(from_data_frame = names(data_list)[1], to_data_frame = names(data_list)[2], link_pairs = named_char_vec, type = keyed_link_label) -# } -# ) - -DataBook$set("public", "infill_missing_dates", function(data_name, date_name, factors, start_month, start_date, end_date, resort = TRUE) { - self$get_data_objects(data_name)$infill_missing_dates(date_name = date_name, factor = factors, start_month = start_month, start_date = start_date, end_date = end_date, resort = resort) -} -) - -DataBook$set("public", "get_key_names", function(data_name, include_overall = TRUE, include, exclude, include_empty = FALSE, as_list = FALSE, excluded_items = c()) { - self$get_data_objects(data_name)$get_key_names(include_overall = include_overall, include, exclude, include_empty = include_empty, as_list = as_list, excluded_items = excluded_items) -} -) - -DataBook$set("public", "remove_key", function(data_name, key_name) { - self$get_data_objects(data_name)$remove_key(key_name = key_name) -} -) - -DataBook$set("public", "add_climdex_indices", function(data_name, climdex_output, freq = "annual", station, year, month) { - stopifnot(freq %in% c("annual", "monthly")) - if (missing(climdex_output)) stop("climdex_output is required.") - if (missing(year)) stop("year is required.") - if (freq == "monthly" && missing(month)) stop("month is required for freq = 'monthly'.") - - col_year <- self$get_columns_from_data(data_name = data_name, col_names = year) - if (!missing(station)) col_station <- self$get_columns_from_data(data_name = data_name, col_names = station) - if (freq == "monthly") col_month <- self$get_columns_from_data(data_name = data_name, col_names = month) - links_cols <- year - if (!missing(station)) links_cols <- c(station, links_cols) - if (freq == "monthly") links_cols <- c(links_cols, month) - linked_data_name <- self$get_linked_to_data_name(data_name, links_cols) - if (length(linked_data_name) == 0) { - # The classes should be the same if climdex_output comes from climdex() function. - # If not, try to match the classes so that they are sensibly linked. - # TODO These checks are repeated and could be extracted out. - if (!missing(station) && !all(class(col_station) == class(climdex_output[[station]]))) { - if (is.numeric(col_station)) climdex_output[[station]] <- as.numeric(climdex_output[[station]]) - else if (is.factor(col_station)) climdex_output[[station]] <- make_factor(climdex_output[[station]]) - else if (is.character(col_station)) climdex_output[[station]] <- as.character(climdex_output[[station]]) - else warning("Cannot recognise the class of station column. Link between data frames may be unstable.") - } - if (!all(class(col_year) == class(climdex_output[[year]]))) { - if (is.numeric(col_year)) climdex_output[[year]] <- as.numeric(climdex_output[[year]]) - else if (is.factor(col_year)) climdex_output[[year]] <- make_factor(climdex_output[[year]]) - else if (is.character(col_year)) climdex_output[[year]] <- as.character(climdex_output[[year]]) - else warning("Cannot recognise the class of year column. Link between data frames may be unstable.") - } - if (freq == "monthly" && !all(class(col_month) == class(climdex_output[[month]]))) { - if (is.numeric(col_month)) climdex_output[[month]] <- as.numeric(climdex_output[[month]]) - else if (is.factor(col_month)) { - lvs <- levels(col_month) - if (length(lvs) == 12) climdex_output[[month]] <- factor(climdex_output[[month]], labels = lvs, ordered = is.ordered(col_month)) - else { - warning("month is a factor but does not have 12 levels. Output may not link correctly to data.") - climdex_output[[month]] <- make_factor(climdex_output[[month]]) - } - } - else if (is.character(col_month)) { - mns <- unique(col_month) - # Also check English names as month.abb and month.name constants are locale dependent. - if (length(mns) == 12) { - if (setequal(mns, month.abb)) climdex_output[[month]] <- month.abb[climdex_output[[month]]] - else if (setequal(mns, month.name)) climdex_output[[month]] <- month.name[climdex_output[[month]]] - else if (setequal(mns, month_abb_english)) climdex_output[[month]] <- month_abb_english[climdex_output[[month]]] - else if (setequal(mns, month_name_english)) climdex_output[[month]] <- month_name_english[climdex_output[[month]]] - else if (setequal(mns, tolower(month_abb_english))) climdex_output[[month]] <- tolower(month_abb_english)[climdex_output[[month]]] - else if (setequal(mns, tolower(month_name_english))) climdex_output[[month]] <- tolower(month_name_english)[climdex_output[[month]]] - else if (setequal(mns, toupper(month_abb_english))) climdex_output[[month]] <- toupper(month_abb_english)[climdex_output[[month]]] - else if (setequal(mns, toupper(month_name_english))) climdex_output[[month]] <- toupper(month_name_english)[climdex_output[[month]]] - else warning("Cannot determine format of month column in data. Output may not link correctly to data.") - } else { - warning("month does not have 12 unique values. Output may not link correctly to data.") - climdex_output[[month]] <- as.character(climdex_output[[month]]) - } - } - } - data_list <- list(climdex_output) - new_data_name <- paste(data_name, "by", paste(links_cols, collapse = "_"), sep = "_") - new_data_name <- next_default_item(prefix = new_data_name , existing_names = self$get_data_names(), include_index = FALSE) - names(data_list) <- new_data_name - self$import_data(data_tables = data_list) - self$add_key(new_data_name, links_cols) - key_list <- as.list(links_cols) - names(key_list) <- links_cols - self$add_link(from_data_frame = data_name, to_data_frame = new_data_name, link_pairs = key_list, type = keyed_link_label) - } else { - # TODO what if there are multiple linked data frames? - linked_data_name <- linked_data_name[1] - year_col_name_linked <- self$get_equivalent_columns(from_data_name = data_name, to_data_name = linked_data_name, columns = year) - by <- year - names(by) <- year_col_name_linked - if (!missing(station)) { - station_col_name_linked <- self$get_equivalent_columns(from_data_name = data_name, to_data_name = linked_data_name, columns = station) - linked_station_data <- self$get_columns_from_data(data_name = linked_data_name, col_names = station_col_name_linked) - by <- c(station, by) - names(by)[1] <- station_col_name_linked - } - if (freq == "monthly") { - month_col_name_linked <- self$get_equivalent_columns(from_data_name = data_name, to_data_name = linked_data_name, columns = month) - linked_month_data <- self$get_columns_from_data(data_name = linked_data_name, col_names = month_col_name_linked) - by <- c(by, month) - names(by)[3] <- month_col_name_linked - } - linked_year_data <- self$get_columns_from_data(data_name = linked_data_name, col_names = year_col_name_linked) - if (!missing(station) && !all(class(linked_station_data) == class(climdex_output[[station]]))) { - if (is.numeric(linked_station_data)) climdex_output[[station]] <- as.numeric(climdex_output[[station]]) - else if (is.factor(linked_station_data)) climdex_output[[station]] <- make_factor(climdex_output[[station]]) - else if (is.character(linked_station_data)) climdex_output[[station]] <- as.character(climdex_output[[station]]) - } - if (!all(class(linked_year_data) == class(climdex_output[[year]]))) { - if (is.numeric(linked_year_data)) climdex_output[[year]] <- as.numeric(climdex_output[[year]]) - else if (is.factor(linked_year_data)) climdex_output[[year]] <- make_factor(climdex_output[[year]]) - else if (is.character(linked_year_data)) climdex_output[[year]] <- as.character(climdex_output[[year]]) - } - if (freq == "monthly" && !all(class(linked_month_data) == class(climdex_output[[month]]))) { - if (is.numeric(linked_month_data)) climdex_output[[month]] <- as.numeric(climdex_output[[month]]) - else if (is.factor(linked_month_data)) { - lvs <- levels(linked_month_data) - if (length(lvs) == 12) climdex_output[[year]] <- factor(climdex_output[[month]], labels = lvs) - else { - warning("month is a factor but does not have 12 levels. Output may not link correctly to data.") - climdex_output[[month]] <- make_factor(climdex_output[[month]]) - } - } - else if (is.character(linked_month_data)) { - mns <- unique(linked_month_data) - # Also check English names as month.abb and month.name are locale dependent. - if (length(mns) == 12) { - if (setequal(mns, month.abb)) climdex_output[[month]] <- month.abb[climdex_output[[month]]] - else if (setequal(mns, month.name)) climdex_output[[month]] <- month.name[climdex_output[[month]]] - else if (setequal(mns, month_abb_english)) climdex_output[[month]] <- month_abb_english[climdex_output[[month]]] - else if (setequal(mns, month_name_english)) climdex_output[[month]] <- month_name_english[climdex_output[[month]]] - else if (setequal(mns, tolower(month_abb_english))) climdex_output[[month]] <- tolower(month_abb_english)[climdex_output[[month]]] - else if (setequal(mns, tolower(month_name_english))) climdex_output[[month]] <- tolower(month_name_english)[climdex_output[[month]]] - else if (setequal(mns, toupper(month_abb_english))) climdex_output[[month]] <- toupper(month_abb_english)[climdex_output[[month]]] - else if (setequal(mns, toupper(month_name_english))) climdex_output[[month]] <- toupper(month_name_english)[climdex_output[[month]]] - else warning("Cannot determine format of month column in data. Output may not link correctly to data.") - } else { - warning("month does not have 12 unique values. Output may not link correctly to data.") - climdex_output[[month]] <- as.character(climdex_output[[month]]) - } - } - } - # TODO could make this a try/catch and then if merging fails put data in new data frame - self$merge_data(data_name = linked_data_name, new_data = climdex_output, by = by) - } -} -) - -DataBook$set("public", "is_metadata", function(data_name, str) { - self$get_data_objects(data_name)$is_metadata(str = str) -} -) - -DataBook$set("public", "get_climatic_column_name", function(data_name, col_name) { - self$get_data_objects(data_name)$get_climatic_column_name(col_name = col_name) -} -) - -DataBook$set("public", "merge_data", function(data_name, new_data, by = NULL, type = "left", match = "all") { - self$get_data_objects(data_name)$merge_data(new_data = new_data, by = by, type = type, match = match) -} -) - -DataBook$set("public", "get_corruption_data_names", function() { - corruption_names <- c() - for(curr_name in self$get_data_names()) { - if(self$get_data_objects(curr_name)$is_metadata(corruption_data_label) && self$get_data_objects(curr_name)$get_metadata(corruption_data_label)) { - corruption_names <- c(corruption_names, curr_name) - } - } - return(corruption_names) -} -) - -DataBook$set("public", "get_corruption_contract_data_names", function() { - corruption_names <- c() - for(curr_name in self$get_data_names()) { - if(self$get_data_objects(curr_name)$is_metadata(corruption_data_label) && self$get_data_objects(curr_name)$get_metadata(corruption_data_label) == corruption_contract_level_label) { - corruption_names <- c(corruption_names, curr_name) - } - } - return(corruption_names) -} -) - -DataBook$set("public", "get_database_variable_names", function(query, data_name, include_overall = TRUE, include, exclude, include_empty = FALSE, as_list = FALSE, excluded_items = c()) { - if(self$has_database_connection()) { - temp_data <- DBI::dbGetQuery(self$get_database_connection(), query) - if(as_list) { - out <- list() - out[["database"]] <- temp_data[[1]] - return(out) - } - else return(temp_data[[1]]) - } - else return(list()) -} -) - -DataBook$set("public", "get_nc_variable_names", function(file = "", as_list = FALSE, ...) { - if(file == "") { - vars <- "" - } - else { - nc_file <- nc_open(file) - vars <- names(nc_file$dim) - } - if(as_list) { - out <- list() - out[["dim variables"]] <- vars - return(out) - } - else return(vars) -} -) - -DataBook$set("public", "has_database_connection", function() { - return(!is.null(self$get_database_connection())) -} -) - -DataBook$set("public", "database_connect", function(dbname, user, host, port, drv = RMySQL::MySQL()) { - #launches an input box prompt for entering password. - #done this way so that password characters are not displayed in the output window - password <- getPass::getPass(paste0(user, " password:")) - if(length(password) > 0){ - out <- DBI::dbConnect(drv = drv, dbname = dbname, user = user, password = password, host = host, port = port) - if(!is.null(out)) { - self$set_database_connection(out) - } - } -} -) - -DataBook$set("public", "get_database_connection", function() { - return(private$.database_connection) -} -) - -DataBook$set("public", "set_database_connection", function(dbi_connection) { - private$.database_connection <- dbi_connection -} -) - -DataBook$set("public", "database_disconnect", function() { - if(!is.null(self$get_database_connection())) { - DBI::dbDisconnect(private$.database_connection) - self$set_database_connection(NULL) - } -} -) - -#Gets the row count of the table. -DataBook$set("public", "get_db_table_row_count", function(tableName, query_condition = NULL) { - con <- self$get_database_connection() - if(is.null(con)){ - stop("No database connection") - } - - if(is.null(query_condition)){ - query_condition <- "" - } - - out <- DBI::dbGetQuery(con, paste0("SELECT COUNT(*) as result FROM ",tableName," ", query_condition, ";" )) - return(out$result) - -}) - -#Imports Climsoft metadata. -DataBook$set("public", "import_climsoft_metadata", function(import_stations = FALSE, import_elements = FALSE, import_flags = FALSE) { - - if(!import_stations && !import_elements){ - stop("No metadata selected for import") - } - - con <- self$get_database_connection() - if(is.null(con)){ - stop("No database connection") - } - - #imports metadata - #-------------------------------- - data_list <- list() - - if(import_stations){ - # TODO.(22/03/2023) 2 fields have been intentionally left out because they are yet to be released to Climsoft users. Namely; wsi and gtsWSI - # include them once the new Climsoft release has been supplied to users - stations_df <- DBI::dbGetQuery(con, "SELECT stationId AS station_id, stationName AS station_name, wmoid, icaoid, latitude, longitude, elevation, qualifier, geoLocationMethod AS geo_location_method, geoLocationAccuracy AS geo_location_accuracy, openingDatetime AS opening_date_time, closingDatetime AS closing_date_time, wacaSelection AS waca_selection, cptSelection AS cpt_selection, stationOperational AS station_operational, drainageBasin AS drainage_basin, country AS country, authority, adminRegion AS admin_region_1, adminRegion2 AS admin_region_2, adminRegion3 AS admin_region_3, adminRegion4 AS admin_region_4 FROM station;") - - columns_to_convert <- c("station_id","station_name","qualifier", "station_operational", "drainage_basin", "country", "authority", "admin_region_1", "admin_region_2", "admin_region_3", "admin_region_4") - stations_df[columns_to_convert] <- lapply(stations_df[columns_to_convert], as.factor) - - stations_df_name <- next_default_item("stations_metadata", self$get_data_names(), include_index = FALSE) - data_list[[stations_df_name]] <- stations_df - } - - if(import_elements){ - elements_df <- DBI::dbGetQuery(con, "SELECT elementId AS element_id, elementName AS element_name, abbreviation, description, elementtype AS element_type, upperLimit AS upper_limit , lowerLimit AS lower_limit, units FROM obselement;") - - columns_to_convert <- c("element_id","element_name","abbreviation","element_type") - elements_df[columns_to_convert] <- lapply(elements_df[columns_to_convert], as.factor) - - elements_df_name <- next_default_item("elements_metadata", self$get_data_names(), include_index = FALSE) - data_list[[elements_df_name]] <- elements_df - } - - if(import_flags){ - flags_df <- DBI::dbGetQuery(con, "SELECT characterSymbol AS flag_name, description FROM flags;") - - flags_df$flag_name <- as.factor(flags_df$flag_name) - - flags_df_name <- next_default_item("flags_metadata", self$get_data_names(), include_index = FALSE) - data_list[[flags_df_name]] <- flags_df - } - - # Import the data frames into the data book - self$import_data(data_tables = data_list) - -}) - -#imports data from Climsoft observation tables; initial or final. -#imports selected stations and elements metadata -DataBook$set("public", "import_climsoft_data", function(tableName, - station_filter_column, stations = c(), - element_filter_column, elements = c(), - qc_status = -1, start_date = NULL, end_date = NULL, unstack_data = FALSE, - include_element_id = FALSE, include_element_name = FALSE, - include_acquisition_type = FALSE, include_level = FALSE, include_entry_form = FALSE, include_captured_by = FALSE, - include_qc_status = FALSE, include_qc_log = FALSE, include_flag = FALSE, - import_selected_stations_metadata = FALSE, import_selected_elements_metadata = FALSE) { - #connection and parameter checks - #-------------------------------- - con <- self$get_database_connection() - if(is.null(con)){ - stop("No database connection") - } - - if(missing(tableName) || missing(station_filter_column) || missing(element_filter_column) || length(stations) == 0 || length(elements) == 0){ - stop("Missing parameters. tableName, station_filter_column, element_filter_column, stations and elements must be supplied") - } - - if (!is.null(start_date) && !lubridate::is.Date(start_date) ) { - stop("start_date must be of type Date.") - } - - if (!is.null(end_date) && !lubridate::is.Date(end_date) ) { - stop("start_date must be of type Date.") - } - #-------------------------------- - - #selects - #-------------------------------- - - sql_select<- paste0(tableName,".recordedFrom AS station_id",", station.stationName AS station_name") - - if(include_element_id){ - sql_select <-paste0(sql_select, ", ", tableName,".describedBy AS element_id") - } - - sql_select <-paste0(sql_select,", obselement.abbreviation AS element_abbrv") - - if(include_element_name){ - sql_select <-paste0(sql_select,", obselement.elementName AS element_name") - } - - if(include_acquisition_type){ - sql_select <-paste0(sql_select,", ", tableName,".acquisitionType"," AS acquisition_type") - } - - if(include_level){ - sql_select <-paste0(sql_select,", ", tableName,".obsLevel"," AS level") - } - - if(include_entry_form){ - sql_select <-paste0(sql_select,", ", tableName,".dataForm"," AS entry_form") - } - - if(include_captured_by){ - sql_select <-paste0(sql_select,", ", tableName,".capturedBy"," AS captured_by") - } - - if(include_qc_status){ - sql_select <-paste0(sql_select,", ", tableName,".qcStatus"," AS qc_status") - } - - sql_select <-paste0(sql_select,", ", tableName,".obsDatetime AS date_time") - sql_select <-paste0(sql_select,", DATE(", tableName,".obsDatetime) AS date") - - if(include_qc_log){ - sql_select <-paste0(sql_select,", ", tableName,".qcTypeLog"," AS qc_log") - } - - if(include_flag){ - sql_select <-paste0(sql_select,", ", tableName,".flag"," AS flag") - } - - sql_select <-paste0(sql_select,", ", tableName,".obsValue AS value") - - sql_select<- paste0("SELECT ", sql_select, " FROM ", tableName, - " INNER JOIN station ON ", tableName, ".recordedFrom = station.stationId", - " INNER JOIN obselement ON ",tableName,".describedBy = obselement.elementId") - #-------------------------------- - - #filters - #-------------------------------- - stations <- gsub("'", "''", stations) # Escape any apostrophes - elements <- gsub("'", "''", elements) # Escape any apostrophes - sql_stations_filter <- paste0(" station.", station_filter_column, " IN ", paste0("(", paste0("'", stations, "'", collapse = ", "), ")")) - sql_elements_filter <- paste0(" obselement.", element_filter_column, " IN ", paste0("(", paste0("'", elements, "'", collapse = ", "), ")")) - - sql_filter <- sql_stations_filter - sql_filter <- paste0(sql_filter," AND ",sql_elements_filter) - - if(qc_status>-1){ - sql_filter <- paste0(sql_filter," AND qcStatus = ", qc_status) - } - - if (!is.null(start_date)) { - sql_filter = paste0(sql_filter," AND obsDatetime >= ", sQuote(format(start_date, format = "%Y-%m-%d"))) - } - - if (!is.null(end_date)) { - sql_filter <- paste0(sql_filter," AND obsDatetime <=", sQuote(format(end_date, format = "%Y-%m-%d"))) - } - - sql_filter<- paste0(" WHERE ",sql_filter) - #-------------------------------- - - #order by - #-------------------------------- - sql_order_by <- paste0(" ORDER BY ",tableName,".recordedFrom, ",tableName, ".describedBy, ",tableName, ".obsDatetime",";") - #-------------------------------- - - # Data list to store all the imported data frames - data_list <- list() - - #import metadata - #-------------------------------- - - if(import_selected_stations_metadata){ - stations_metadata_name <- next_default_item("stations_metadata", self$get_data_names(), include_index = FALSE) - data_list[[stations_metadata_name]] <- DBI::dbGetQuery(con, paste0("SELECT * FROM station WHERE ", sql_stations_filter)) - } - - if(import_selected_elements_metadata){ - elements_metadata_name <- next_default_item("elements_metadata", self$get_data_names(), include_index = FALSE) - data_list[[elements_metadata_name]] <- DBI::dbGetQuery(con, paste0("SELECT * FROM obselement WHERE ", sql_elements_filter)) - } - - #-------------------------------- - - # import and transform observations data data - # -------------------------------- - - # Get observations data from database - observations_df <- DBI::dbGetQuery(con, paste0(sql_select, sql_filter, sql_order_by)) - - # Convert station name and abbreviation columns to factor - columns_to_convert <- c("station_id", "station_name", "element_abbrv") - observations_df[columns_to_convert] <- lapply(observations_df[columns_to_convert], as.factor) - - # Convert the date_time column to POSIXct (date-time) format - observations_df$date_time <- as.POSIXct(observations_df$date_time, format = "%Y-%m-%d %H:%M:%S") - - # convert the date column to date format - observations_df$date <- as.Date(x = observations_df$date) - - if(include_element_id){ - observations_df$element_id <- as.factor(observations_df$element_id) - } - - if(include_element_name){ - observations_df$element_name <- as.factor(observations_df$element_name) - } - - if(include_qc_status){ - observations_df$qc_status <- as.factor(observations_df$qc_status) - } - - if(include_acquisition_type){ - observations_df$acquisition_type <- as.factor(observations_df$acquisition_type) - } - - if(include_level){ - observations_df$level <- as.factor(observations_df$level) - } - - if(include_flag){ - observations_df$flag <- as.factor(observations_df$flag) - } - - if(include_entry_form){ - observations_df$entry_form <- as.factor(observations_df$entry_form) - } - - #-------------------------------- - - # Add observations data to list of data to be imported - # -------------------------------- - observations_data_name <- next_default_item("observations_data", self$get_data_names(), include_index = FALSE) - data_list[[observations_data_name]] <- observations_df - - if(unstack_data){ - observations_unstacked_data_name <- next_default_item("observations_unstacked_data", self$get_data_names(), include_index = FALSE) - data_list[[observations_unstacked_data_name]] <- tidyr::pivot_wider(data = observations_df, names_from=element_abbrv, values_from=value) - } - - # Import list of data frames to data book - self$import_data(data_tables = data_list) - -}) - -DataBook$set("public", "import_from_iri", function(download_from, data_file, data_frame_name, location_data_name, path, X1, X2 = NA, Y1, Y2 = NA, get_area_point = "area"){ - - data_list <- import_from_iri(download_from = download_from, data_file = data_file, path = path, X1 = X1, X2 = X2, Y1 = Y1, Y2 = Y2, get_area_point = get_area_point) - names(data_list) = c(next_default_item(prefix = data_frame_name , existing_names = self$get_data_names(), include_index = FALSE), next_default_item(prefix = location_data_name , existing_names = self$get_data_names(), include_index = FALSE)) - self$import_data(data_tables = data_list) - loc_col_names <- names(data_list[[2]]) - self$add_key(location_data_name, loc_col_names) - names(loc_col_names) <- loc_col_names - self$add_link(from_data_frame = names(data_list)[1], to_data_frame = names(data_list)[2], link_pairs = loc_col_names, type = keyed_link_label) -} -) - -DataBook$set("public", "export_workspace", function(data_names, file, include_graphs = TRUE, include_models = TRUE, include_metadata = TRUE) { - e <- new.env() - for(temp_name in data_names) { - e[[temp_name]] <- self$get_data_frame(temp_name, use_current_filter = FALSE) - if(include_graphs) { - graphs <- self$get_objects(data_name = temp_name, object_type_label = "graph") - graph_names <- names(graphs) - for(i in seq_along(graphs)) { - e[[paste(temp_name, graph_names[i], sep = "_")]] <- graphs[[i]] - } - } - if(include_models) { - models <- self$get_objects(data_name = temp_name, object_type_label = "model") - model_names <- names(models) - for(i in seq_along(models)) { - e[[paste(temp_name, model_names[i], sep = "_")]] <- models[[i]] - } - } - if(include_metadata) { - var_metadata <- self$get_variables_metadata(temp_name) - e[[paste(temp_name, "variables_metadata", sep = "_")]] <- var_metadata - } - } - save(list = ls(all.names = TRUE, envir = e), envir = e, file = file) -} -) - -DataBook$set("public", "set_links", function(new_links) { - private$.links <- new_links -} -) - -DataBook$set("public","display_daily_graph", function(data_name, date_col = NULL, station_col = NULL, year_col = NULL, doy_col = NULL, climatic_element = NULL, upper_limit = 100, bar_colour = "blue", rug_colour = "red") { - self$get_data_objects(data_name)$display_daily_graph(date_col = date_col,station_col = station_col, year_col = year_col, doy_col = doy_col, climatic_element = climatic_element, rug_colour = rug_colour, bar_colour = bar_colour, upper_limit = upper_limit) -} -) - -DataBook$set("public","create_variable_set", function(data_name, set_name, columns) { - self$get_data_objects(data_name)$create_variable_set(set_name = set_name, columns = columns) -} -) - -DataBook$set("public","update_variable_set", function(data_name, set_name, columns, new_set_name) { - self$get_data_objects(data_name)$update_variable_set(set_name = set_name, columns = columns, new_set_name = new_set_name) -} -) - -DataBook$set("public","delete_variable_sets", function(data_name, set_names) { - self$get_data_objects(data_name)$delete_variable_sets(set_names = set_names) -} -) - -DataBook$set("public","get_variable_sets_names", function(data_name, include_overall = TRUE, include, exclude, include_empty = FALSE, as_list = FALSE, excluded_items = c()) { - self$get_data_objects(data_name)$get_variable_sets_names(include_overall = include_overall, include = include, exclude = exclude, include_empty = include_empty, as_list = as_list, excluded_items = excluded_items) -} -) - -DataBook$set("public","get_variable_sets", function(data_name, set_names, force_as_list = FALSE) { - self$get_data_objects(data_name)$get_variable_sets(set_names = set_names, force_as_list = force_as_list) -} -) - -DataBook$set("public", "crops_definitions", function(data_name, year, station, rain, day, rain_totals, plant_days, plant_lengths, start_check = c("both", "yes", "no"), season_data_name, start_day, end_day, return_crops_table = TRUE, definition_props = TRUE){ - # Run checks - is_station <- !missing(station) - start_check <- match.arg(start_check) - - if(missing(year)) stop("Year column must be specified.") - if(!is_station) by <- year - else by <- c(year, station) - if(missing(season_data_name)) season_data_name <- data_name - if(season_data_name != data_name) { - season_by <- self$get_equivalent_columns(from_data_name = data_name, columns = by, to_data_name = season_data_name) - if(is.null(season_by)) stop("The data frames specified must be linked by the year/station columns.") - } - year_col <- self$get_columns_from_data(data_name, year) - unique_year <- na.omit(unique(year_col)) - - # creating our combinations - # if there's a station, we only want to consider the year-station combinations that actually exist - if(is_station) { - expanded_df <- unique(self$get_data_frame(data_name) %>% dplyr::select(year, station)) - } else { - expanded_df <- unique(self$get_data_frame(data_name) %>% dplyr::select(year)) - } - - # Set names - plant_day_name <- "plant_day" - plant_length_name <- "plant_length" - rain_total_name <- "rain_total" - - # Create all combinations of X and Y - condition_combinations <- expand.grid(rain_totals, plant_lengths, plant_days) - names_list <- c(rain_total_name, plant_length_name, plant_day_name) - condition_combinations <- setNames(condition_combinations, names_list) - - # Expand the df with xy_combinations - df <- merge(expanded_df, condition_combinations, by = NULL) - daily_data <- self$get_data_frame(data_name) - - if(season_data_name != data_name) { - join_by <- by - names(join_by) <- season_by - season_data <- self$get_data_frame(season_data_name) - vars <- c(season_by, start_day, end_day) - season_data <- season_data %>% dplyr::select(!!! rlang::syms(vars)) - df <- dplyr::left_join(df, season_data, by = join_by) - } else { - col_names <- c(by, start_day, end_day) - season_data <- daily_data %>% - dplyr::select(!!! rlang::syms(col_names)) %>% - dplyr::group_by(!!! rlang::syms(by)) %>% - dplyr::summarise(!! rlang::sym(start_day) := dplyr::first(!! rlang::sym(start_day)), - !! rlang::sym(end_day) := dplyr::first(!! rlang::sym(end_day))) - df <- dplyr::left_join(df, season_data, by = by) - } - - ## Onto the calculation - proportion_df <- NULL - crops_def_table <- NULL - i <- 1 - - calculate_rain_condition <- function(data){ - data <- data %>% dplyr::select(-rain_total_name) %>% unique() - - for (i in 1:nrow(data)) { - # Create a condition to filter the daily data based on the year, day, and plant day/length - ind <- daily_data[[year]] == data[[year]][i] & - daily_data[[day]] >= data[[plant_day_name]][i] & - daily_data[[day]] < (data[[plant_day_name]][i] + data[[plant_length_name]][i]) - - if (is_station) { - ind <- ind & (daily_data[[station]] == data[[station]][i]) - } - - # Filter the daily data based on the condition - rain_values <- daily_data[[rain]][ind] - - # Calculate the sum of rain values and check conditions - sum_rain <- sum(rain_values, na.rm = TRUE) - - # Set as NA for certain cases: This anyNA is fixed later when we have the total_rainfall value of interest - if (anyNA(rain_values)) sum_rain <- -1 * sum_rain # as a way to tag the sum_rain value for later, we set as -ve value. - #&& sum_rain < data[[rain_total_name]][i]){ - if (length(rain_values) + 1 < data[[plant_length_name]][i]) sum_rain <- NA - if (all(is.na(rain_values))) sum_rain <- NA - - # Assign the calculated sum to the respective row in the result dataframe - data[["rain_total_actual"]][i] <- sum_rain - } - return(data) -} - - # run by plant day and plant_length - for (day_i in plant_days){ - for (length_i in plant_lengths){ - - # for each plant length and plant day combination, calculate the total rainfall in that period. - filtered_data_1 <- df %>% filter(plant_day == day_i) %>% filter(plant_length == length_i) - - # Now run to get the rain conditions - filtered_data_1 <- calculate_rain_condition(data = filtered_data_1) - - # so: filtered_data_1 contains the total rainfall that occurred in that period for all the plant_day and plant_length combinations - # we now split by our different rain_total_actual conditions. - # we do this here to avoid calculating it multiple times. - for (rain_i in rain_totals){ - filtered_data <- filtered_data_1 %>% dplyr::mutate(rain_total = rain_i) - - # take the rows < 0 and run a check. We want to check - # if (anyNA(rain_values) && sum_rain < data[[rain_total_name]][i]) { sum_rain <- NA - # we do this here because we want to avoid running rain_total in calculate_rain_condition for efficiency purposes. - filtered_data <- filtered_data %>% - dplyr::mutate(rain_total_actual = ifelse(rain_total_actual < 0, ifelse(-1*rain_total_actual < rain_total, NA, -1*rain_total_actual), rain_total_actual)) - - if (!missing(station)) filtered_data <- filtered_data %>% dplyr::group_by(.data[[station]], .data[[year]]) - else filtered_data <- filtered_data %>% dplyr::group_by(.data[[year]]) - - filtered_data <- filtered_data %>% - # first add a column (T/F) that states that it is in the rainfall period or not. - dplyr::mutate(plant_day_cond = .data[[start_day]] <= plant_day, - length_cond = plant_day + plant_length <= .data[[end_day]], - rain_cond = rain_i <= rain_total_actual) %>% - dplyr::ungroup() - - if (start_check == "both"){ - - filtered_data <- filtered_data %>% - dplyr::mutate( - overall_cond_with_start = plant_day_cond & length_cond & rain_cond, - overall_cond_no_start = length_cond & rain_cond) - if (!missing(station)) filtered_data <- filtered_data %>% dplyr::group_by(.data[[station]]) - proportion_data <- filtered_data %>% - dplyr::summarise(prop_success_with_start = sum(overall_cond_with_start, na.rm = TRUE)/length(na.omit(overall_cond_with_start)), - prop_success_no_start = sum(overall_cond_no_start, na.rm = TRUE)/length(na.omit(overall_cond_no_start))) - } else { - filtered_data <- filtered_data %>% - dplyr::mutate( - overall_cond = case_when( - start_check == "yes" ~ plant_day_cond & length_cond & rain_cond, - start_check == "no" ~ TRUE & length_cond & rain_cond) - ) - if (!missing(station)) filtered_data <- filtered_data %>% dplyr::group_by(.data[[station]]) - proportion_data <- filtered_data %>% - dplyr::summarise(prop_success = sum(overall_cond, na.rm = TRUE)/length(na.omit(overall_cond))) - } - - if (return_crops_table){ - crops_def_table[[i]] <- filtered_data %>% dplyr::mutate(rain_total = rain_i, - plant_length = length_i, - plant_day = day_i) - } - if (definition_props){ - proportion_df[[i]] <- proportion_data %>% dplyr::mutate(rain_total = rain_i, - plant_length = length_i, - plant_day = day_i) - } - i <- i + 1 - } - } - } - - if (!missing(station)) column_order <- c(station, plant_day_name, plant_length_name, rain_total_name) - else column_order <- c(plant_day_name, plant_length_name, rain_total_name) - - if (return_crops_table){ - # here we get crop_def and import it as a new DF - crops_def_table <- dplyr::bind_rows(crops_def_table) %>% dplyr::select(c(all_of(column_order), everything())) %>% dplyr::arrange(dplyr::across(dplyr::all_of(column_order))) - crops_name <- "crop_def" - crops_name <- next_default_item(prefix = crops_name, existing_names = self$get_data_names(), include_index = FALSE) - data_tables <- list(crops_def_table) - names(data_tables) <- crops_name - if(season_data_name != data_name) { - crops_by <- season_by - names(crops_by) <- by - self$add_link(crops_name, season_data_name, crops_by, keyed_link_label) - } - self$import_data(data_tables = data_tables) - } - if (definition_props){ - prop_data_frame <- dplyr::bind_rows(proportion_df) %>% dplyr::select(c(all_of(column_order), everything())) %>% dplyr::arrange(dplyr::across(dplyr::all_of(column_order))) - - prop_name <- "crop_prop" - prop_name <- next_default_item(prefix = prop_name, existing_names = self$get_data_names(), include_index = FALSE) - data_tables <- list(prop_data_frame) - names(data_tables) <- prop_name - self$import_data(data_tables = data_tables) - - # Add Link - if (return_crops_table){ - if (!missing(station)){ - self$add_link(from_data_frame = crops_name, to_data_frame = prop_name, link_pairs=c(station = station, rain_total = rain_total_name, plant_length = plant_length_name, plant_day = plant_day_name), type="keyed_link") - } else { - self$add_link(from_data_frame = crops_name, to_data_frame = prop_name, link_pairs=c(rain_total = rain_total_name, plant_length = plant_length_name, plant_day = plant_day_name), type="keyed_link") - } - } - } -} -) - -#' Converting grid (wide) format daily climatic data into tidy (long format) data -#' @param x Input data frame -#' @param format Either "years", "months" or "days" to indicate what the stacking columns represent -#' @param day the name of the column containing day of month values (if format != "days") -#' @param month the name of the column containing month values (if format != "months") -#' @param year the name of the column containing year values (required if format != "years") -#' @param stack_years when format = "years" stack_years specifies the years. Must be same length as stack_cols -#' If not specified, the function will try to determine the years using the format "Xyyyy" where "X" is any character and "yyyy" is the year. -#' @param stack_cols a character vector of columns to stack -#' if format == "days" 31 columns (in order) for each day of the month are expected, or 62 with alternate value/flag columns -#' if format == "months" 12 columns (in order) for each month are expected -#' if format == "years" any number of year columns can be given. These should be named with format "Xyyyy" -#' where "X" is any character and "yyyy" is the year -#' @param station (optional) when format = "days" or "months" the name of a station column can be given -#' when the data is for multiple stations -#' @param element (optional) when format = "days" or "months" the name of an element column can be given -#' when the data is for multiple elements -#' @param element_name (optional) if data is for single element, element_name is the name of the column containing -#' the values. Default is "value". Ignored if element not missing. -#' @param ignore_invalid If TRUE, rows with non missing element values on invalid dates e.g. 31 Sep or 29 Feb in non leap years, will be removed. -#' If FALSE (the default) an error will be given with details of where the values occur. -#' Strongly recommended to first run with FALSE and then TRUE after examining or correcting any issues. -#' @param silent If TRUE, detailed output, such as rows with non missing element values on invalid dates or duplicate values will be suppressed. -#' @param unstack_elements If TRUE, when there are multiple elements there will be one column for each element (unstacked), otherwise there will be an element -#' column and a value column. This also applies to flag columns if included. -#' @param new_name Name for the new data frame. -#' @export -#' @examples -#' yearcols <- data.frame(month = rep(1:12, times = c(31,29,31,30,31,30,31,31,30,31,30,31)), -#' day = c(1:31,1:29,1:31,1:30,1:31,1:30,1:31,1:31,1:30,1:31,1:30,1:31), -#' X2000 = rnorm(366), X2001 = rnorm(366), X2002 = rnorm(366), X2003 = rnorm(366)) -#' yearcols[60,4:6] <- NA -#' tidy_climatic_data(x = yearcols, format = "years", stack_cols = c("X2000", "X2001", "X2002", "X2003"), element_name = "tmin") - -DataBook$set("public","tidy_climatic_data", function(x, format, stack_cols, day, month, year, stack_years, station, element, element_name="value", ignore_invalid = FALSE, silent = FALSE, unstack_elements = TRUE, new_name) { - - if(!format %in% c("days", "months", "years")) stop("format must be either 'days', 'months' or 'years'") - if(!all(stack_cols %in% names(x))) stop("Some of the stack_cols were not found in x.") - if(!missing(day) && !day %in% names(x)) stop("day column not found in x.") - if(!missing(month) && !month %in% names(x)) stop("month column not found in x.") - if(!missing(year) && !year %in% names(x)) stop("year column not found in x.") - if(!missing(station) && !station %in% names(x)) stop("station column not found in x.") - if(!missing(element) && !element %in% names(x)) stop("element column not found in x.") - # Default to FALSE and updated if format == "days" - flags <- FALSE - - # check day column is valid (if specified) - if(!missing(day)) { - day_data <- x[[day]] - if(anyNA(day_data)) stop("day column contains: ", sum(is.na(day_data)), " missing values") - if(!is.numeric(day_data)) stop("day column must be numeric") - invalid_day <- (day_data < 1 | day_data > 31 | (day_data %% 1) != 0) - if(any(invalid_day)) { - invalid_values <- unique(day_data[invalid_day]) - stop("day column contains the following invalid values: ", paste(invalid_values, collapse = ",")) - } - } - - # check month column is valid (if specified) - if(!missing(month)) { - month_data <- x[[month]] - # Initialise no month format - month_format <- "" - if(anyNA(month_data)) stop("month column contains: ", sum(is.na(month_data)), " missing values") - if(is.numeric(month_data)) { - invalid_month <- (month_data < 1 | month_data > 12 | (month_data %% 1) != 0) - if(any(invalid_month)) { - invalid_values <- unique(month_data[invalid_month]) - stop("month column contains the following invalid values: ", paste(invalid_values, collapse = ",")) - } - # Month format will be used in as.Date() - month_format <- "%m" - } - # This case is for numeric months but stored as character e.g. c("1", "2") - else if(all(!is.na(as.numeric(month_data)))) { - if(all(as.numeric(month_data) %in% 1:12)) { - month_format <- "%m" - # This ensures format is correct and removes any spaces etc. e.g. "1 " -> 1 - x[[month]] <- as.numeric(month_data) - } - } - else { - # Convert to title case to match month.name and month.abb - month_data_title <- stringr::str_to_title(month_data) - if(all(month_data_title %in% month.abb)) month_format <- "%b" - else if(all(month_data_title %in% month.name)) month_format <- "%B" - if(month_format == "") { - invalid_short <- unique(month_data[!month_data_title %in% month.abb]) - invalid_long <- unique(month_data[!month_data_title %in% month.name]) - if(length(invalid_short) < 12) { - stop("Some month values were not unrecognised.\nIf specifying short names the following are invalid: ", paste(invalid_short, collapse = ", "), "\nAlternatively use a numeric month column.") - } - else if(length(invalid_long) < 12) { - stop("Some month values were not unrecognised.\nIf specifying full names the following are invalid: ", paste(invalid_long, collapse = ", "), "\nAlternatively use a numeric month column.") - } - else stop("No values in the month column were recognised.\nUse either\n short names: ", paste(month.abb, collapse = ", "), "\nfull names: ", paste(month.name, collapse = ", "), "\nor numbers 1 to 12.") - } - # Put title case months into the data as this will be needed to make the date column - x[[month]] <- month_data_title - } - } - - # check year column is valid (if specified) - if(!missing(year)) { - year_data <- x[[year]] - if(anyNA(year_data)) stop("year column contains: ", sum(is.na(year_data)), " missing values") - year_format <- "" - if(!is.numeric(year_data)) { - if(all(!is.na(as.numeric(year_data)))) { - x[[year]] <- as.numeric(year_data) - year_data <- x[[year]] - } - else stop("Cannot recognise years from year column. Try using a numeric year column.") - } - if(all(stringr::str_length(year_data) == 4)) year_format <- "%Y" - else if(all(stringr::str_length(year_data) == 2)) year_format <- "%y" - else stop("Inconsistent values found in year column. Year column must be column of four digit years or column of two digit years") - } - - if(format == "days") { - ndays <- 31 - # month column required in this case - if(missing(month)) stop("month column is required when format == 'days'") - - # year column required in this case - if(missing(year)) stop("year column is required when format == 'days'") - - # stack column checks - if(length(stack_cols) != ndays && length(stack_cols) != 2 * ndays) stop("You have specified: ", length(stack_cols), " stack columns\nThere must be exactly ", ndays, " or ", 2 * ndays, " stack columns when format == 'days'") - - # TRUE if flag columns are included - flags <- length(stack_cols) == 2 * ndays - if(flags) { - # We assume that value/flag columns alternate and are in correct order i.e. c(value1, flag1, value2, flag2, ..., value31, flag31) - val_col_names <- stack_cols[seq(1, 2 * ndays - 1, 2)] - flag_col_names <- stack_cols[seq(2, 2 * ndays, 2)] - # TODO This should be a more global function - if(!all(sapply(x[, val_col_names], function(col) is.numeric(col) || (is.logical(col) && all(is.na(col)))))) stop("Every other column must be numeric to represent values (starting with the first columns). \nThe following value columns are not numeric: ", paste(stack_cols[!sapply(x[, val_col_names], is.numeric)], collapse = ",")) - # Name of flag column - flag_name <- "flag" - } - else { - if(!all(sapply(x[, stack_cols], function(col) is.numeric(col) || (is.logical(col) && all(is.na(col)))))) stop("All stack_cols must be numeric\nThe following stack_cols are not numeric: ", paste(stack_cols[!sapply(x[, stack_cols], is.numeric)], collapse = ",")) - } - - # This ensures all other columns are dropped - y <- data.frame(year = x[[year]], month = x[[month]], x[ , stack_cols]) - if(!missing(station)) y$station <- x[[station]] - if(!missing(element)) y$element <- x[[element]] - # In case element_name is the name of an existing column in y - if(element_name %in% names(y)) element_name <- next_default_item(prefix = element_name, existing_names = names(y)) - if(flags) { - # renaming the stack_cols with a consistent pattern makes it possible for pivot_longer to stack both sets of columns together and construct the day column correctly - # This assumes stack_cols are in the correct order i.e. c(value1, flag1, value2, flag2, ..., value31, flag31) - new_stack_cols <- paste(c("value", "flag"), rep(1:ndays, each = 2), sep = "_") - names(y)[names(y) %in% stack_cols] <- new_stack_cols - # ".value" is a special sentinel used in names_to to ensure names of value columns come from the names of cols. See ?pivot_longer values_to section for details. - y <- tidyr::pivot_longer(y, cols = tidyselect::all_of(new_stack_cols), names_to = c(".value", "day"), names_sep = "_") - } - else { - # renaming the stack_cols so that the day column can be constructed correctly - # This assumes stack_cols are in the correct order i.e. 1 - 31 - new_stack_cols <- paste0("day", 1:ndays) - names(y)[names(y) %in% stack_cols] <- new_stack_cols - y <- tidyr::pivot_longer(y, cols = tidyselect::all_of(new_stack_cols), names_to = "day", values_to = element_name) - # extract day number from e.g. "day10" - y$day <- substr(y$day, 4, 5) - } - - y$date <- as.Date(paste(y$year, y$month, y$day), format = paste(year_format, month_format, "%d")) - } - else if(format == "months") { - if(!all(sapply(x[, stack_cols], function(col) is.numeric(col) || (is.logical(col) && all(is.na(col)))))) stop("All stack_cols must be numeric\nThe following stack_cols are not numeric: ", paste(stack_cols[!sapply(x[, stack_cols], is.numeric)], collapse = ",")) - - # month column required in this case - if(missing(day)) stop("day column is required when format == 'months'") - - # year column required in this case - if(missing(year)) stop("year column is required when format == 'months'") - - # stack column checks - if(length(stack_cols) != 12) stop("You have specified: ", length(stack_cols), " stack columns\nThere must be exactly 12 stack columns when format == 'months'") - - # This ensures all other columns are dropped - y <- data.frame(year = x[[year]], day = x[[day]], x[ , stack_cols]) - if(!missing(station)) y$station <- x[[station]] - if(!missing(element)) y$element <- x[[element]] - # In case element_name is the name of an existing column in y - if(element_name %in% names(y)) element_name <- next_default_item(prefix = element_name, existing_names = names(y)) - # renaming the stack_cols so that the day column can be constructed correctly - # This assumes stack_cols are in the correct order i.e. 1 - 12 - new_stack_cols <- paste0("month", 1:12) - names(y)[names(y) %in% stack_cols] <- new_stack_cols - y <- tidyr::pivot_longer(y, cols = tidyselect::all_of(new_stack_cols), names_to = "month", values_to = element_name) - # extract month number from e.g. "month10" - y$month <- substr(y$month, 6, 7) - - y$date <- as.Date(paste(y$year, y$month, y$day), format = paste(year_format, "%m", "%d")) - } - else if(format == "years") { - if(!all(sapply(x[, stack_cols], function(col) is.numeric(col) || (is.logical(col) && all(is.na(col)))))) stop("All stack_cols must be numeric\nThe following stack_cols are not numeric: ", paste(stack_cols[!sapply(x[, stack_cols], is.numeric)], collapse = ",")) - - by_cols <- c() - if(!missing(station)) by_cols <- c(by_cols, station) - if(!missing(element)) by_cols <- c(by_cols, element) - - if(length(by_cols) > 0) { - group_lengths <- x %>% dplyr::group_by(!!! rlang::syms(by_cols)) %>% dplyr::summarise(n = n()) - if(any(group_lengths$n != 366)) stop("data must have exactly 366 rows per station per element when format = 'years'") - } - else if(nrow(x) != 366) stop("data must have exactly 366 rows for a single station and element when format = 'years'") - - if(!missing(stack_years) && length(year_list) != length(stack_cols)) stop("stack_years must be the same length as stack_cols") - - # stack_years allows to specify the years represented by stack_cols. - # If this is blank, attempt to infer stack_years by assuming stack_cols are in the format c("X1990", "X1991", ...) - if(missing(stack_years)) { - # Remove first character and convert to numeric - stack_years <- as.numeric(stringr::str_sub(stack_cols, 2)) - invalid_ind <- is.na(stack_years) | stringr::str_length(stack_years) != 4 - if(any(invalid_ind)) { - cat("Unrecognised year columns:", paste(stack_years[invalid_ind], collapse = ", ")) - stop("Cannot determine year of some columns. Year columns must be named with format 'Xyyyy' where X is any character and yyyy is the year.") - } - } - x$doy <- 1:366 - # This ensures all other columns are dropped - y <- data.frame(doy = x$doy, x[ , stack_cols]) - if(!missing(station)) y$station <- x[[station]] - if(!missing(element)) y$element <- x[[element]] - # In case element_name is the name of an existing column in y - if(element_name %in% names(y)) element_name <- next_default_item(prefix = element_name, existing_names = names(y)) - y <- tidyr::pivot_longer(y, cols = tidyselect::all_of(stack_cols), names_to = "year", values_to = element_name) - - # This assumes stack_cols and stack_years are in the same order - y$year <- plyr::mapvalues(y$year, stack_cols, stack_years) - - # Replacing day 60 with 0 for non-leap years. This will result in NA dates. - y$doy[(!lubridate::leap_year(as.numeric(y$year))) & y$doy == 60] <- 0 - y$doy[(!lubridate::leap_year(as.numeric(y$year))) & y$doy > 60] <- y$doy[(!lubridate::leap_year(as.numeric(y$year))) & y$doy > 60] - 1 - y$date <- as.Date(paste(y$year, y$doy), format = paste("%Y", "%j")) - # Put day 0 back as 60. Needed in error displaying only. - y$doy[y$doy == 0] <- 60 - } - - continue <- TRUE - # check if there are any non missing values on missing dates - # this is a problem as missing dates are invalid dates so should not have values - invalid_ind <- is.na(y$date) & !is.na(y[[element_name]]) - if(sum(invalid_ind) > 0) { - cat("There are:", sum(invalid_ind), "measurement values on invalid dates.\n") - if(!silent) { - cat("\n*** Invalid dates ***\n\n") - invalid_data <- dplyr::filter(y, invalid_ind) - if(format == "days" || format == "months") { - invalid_data_display <- invalid_data %>% dplyr::select(year, month, day) - } - else { - invalid_data_display <- invalid_data %>% dplyr::select(year, doy) - } - # Also make a data.frame (instead of tibble) so that display will show all rows. - if(!missing(station)) { - invalid_data_display <- data.frame(station = invalid_data$station, invalid_data_display) - } - if(!missing(element)) { - invalid_data_display <- data.frame(element = invalid_data$element, invalid_data_display) - } - invalid_data_display <- data.frame(invalid_data_display) - invalid_data_display[[element_name]] <- invalid_data[[element_name]] - print(invalid_data_display, row.names = FALSE) - } - if(ignore_invalid) cat("Warning: These rows have been removed.\n") - else { - # This should be a stop but then detailed output can't be displayed by R-Instat - cat("There are:", sum(invalid_ind), "measurement values on invalid dates. Correct these or specify ignore_invalid = TRUE to ignore them. See output for more details.\n") - continue <- FALSE - } - } - - # This should have been a stop above but then detailed output can't be displayed by R-Instat - if(!continue) return() - - # Standard format of slowest varying structure variables first (station then element then date) followed by measurements - if(!missing(station)) z <- data.frame(station = forcats::as_factor(y$station), date = y$date) - else z <- data.frame(date = y$date) - if(!missing(element)) z$element <- y$element - z[[element_name]] <- y[[element_name]] - if(flags) z[[flag_name]] <- y[[flag_name]] - - # Initialise id columns used for sorting data - id_cols <- c() - if(!missing(station)) id_cols <- c(id_cols, "station") - - z <- dplyr::filter(z, !is.na(date)) - - # If data contains multiple elements, optionally unstack the element column - if(!missing(element)) { - if(unstack_elements) { - # pivot_wider allows unstacking multiple column sets, used when flags included. - values_from <- c(element_name) - if(flags) values_from <- c(values_from, flag_name) - # first check for unique combinations to ensure no duplicates - z_dup <- duplicated(z %>% dplyr::select(-tidyselect::all_of(values_from))) - if(any(z_dup > 0)) { - # This should be a stop but then detailed output can't be displayed by R-Instat - cat("\nError: Cannot tidy data as some elements have multiple values on the same date. Check and resolve duplicates first.\n") - z_check <- z %>% filter(z_dup > 0) - if(!silent) { - cat("\n*** Duplicates ***\n\n") - print(z_check, row.names = FALSE) - } - continue <- FALSE - } - else z <- tidyr::pivot_wider(z, names_from = element, values_from = tidyselect::all_of(values_from)) - } - # If not unstacking then need to sort by element column - else id_cols <- c(id_cols, "element") - } - - # This should have been a stop above but then detailed output can't be displayed by R-Instat - if(!continue) return() - - # Add this last to ensure date varies fastest - id_cols <- c(id_cols, "date") - # TODO Find a better way to do this. Update if there could be more the 3 id cols. - if(length(id_cols) == 1) { - z <- z %>% dplyr::arrange(.data[[id_cols[1]]]) - } - else if(length(id_cols) == 2) { - z <- z %>% dplyr::arrange(.data[[id_cols[1]]], .data[[id_cols[2]]]) - } - else if(length(id_cols) == 3) { - z <- z %>% dplyr::arrange(.data[[id_cols[1]]], .data[[id_cols[2]]], .data[[id_cols[3]]]) - } - if(missing(new_name) || new_name == "") new_name <- next_default_item("data", existing_names = self$get_data_names()) - data_list <- list(z) - names(data_list) <- new_name - self$import_data(data_tables=data_list) -} -) - -DataBook$set("public","get_geometry", function(data) { - if(missing(data)) stop("data_name is required") - else if("sf" %in% class(data)) return(attr(data, "sf_column")) - else if("geometry" %in% colnames(data)) return("geometry") - else return("") -} -) -DataBook$set("public","package_check", function(package) { - out <- list() - av_packs <- available.packages() - av_packs <- data.frame(av_packs) - if(package %in% rownames(installed.packages())) { - out[[1]] <- 1 - v_machine <- as.character(packageVersion(package)) - v_web <- as.character(av_packs[av_packs$Package == package, "Version"]) - out[[2]] <- compareVersion(v_machine, v_web) - out[[3]] <- v_machine - out[[4]] <- v_web - return(out) - } - else { - #check if the package name is typed right - if(package %in% av_packs) { - out[[1]] <- 2 - return(out) - } - else { - #wrong spelling check you spelling - out[[1]] <- 0 - return(out) - } - } -} -) - -DataBook$set("public", "download_from_IRI", function(source, data, path = tempdir(), min_lon, max_lon, min_lat, max_lat, min_date, max_date, name, download_type = "Point", import = TRUE) { - init_URL <- "https://iridl.ldeo.columbia.edu/SOURCES/" - dim_x <- "X" - dim_y <- "Y" - dim_t <- "T" - if (source == "UCSB_CHIRPS") { - prexyaddress <- paste0(init_URL, ".UCSB/.CHIRPS/.v2p0") - if (data == "daily_improved_global_0p25_prcp") { - extension <- ".daily-improved/.global/.0p25/.prcp" - } # 1 Jan 1981 to 31 Jul 2020 - else if (data == "daily_improved_global_0p05_prcp") { - extension <- ".daily-improved/.global/.0p05/.prcp" - } # 1 Jan 1981 to 31 Jul 2020 - else if (data == "dekad_prcp") { - extension <- ".dekad/.prcp" - } # (days since 1960-01-01) ordered [ (1-10 Jan 1981) (11-20 Jan 1981) (21-31 Jan 1981) ... (21-31 Aug 2020)] - else if (data == "monthly_global_prcp") { - extension <- ".monthly/.global/.precipitation" - } # grid: /T (months since 1960-01-01) ordered (Jan 1981) to (Jul 2020) by 1.0 N= 475 pts :grid - else { - stop("Data file does not exist for CHIRPS V2P0 data") - } - } else if (source == "TAMSAT_v3.0") { - dim_x <- "lon" - dim_y <- "lat" - prexyaddress <- paste0(init_URL, ".Reading/.Meteorology/.TAMSAT/.TARCAT/.v3p0") - if (data == "daily_rfe") { - dim_t <- "time" - extension <- ".daily/.rfe" - } # grid: /time (julian_day) ordered (1 Jan 1983) to (10 Sep 2020) by 1.0 N= 13768 pts :grid - else if (data == "dekadal_rfe") { - extension <- ".dekadal/.rfe" - } # grid: /T (days since 1960-01-01) ordered [ (1-10 Jan 1983) (11-20 Jan 1983) (21-31 Jan 1983) ... (1-10 Sep 2020)] N= 1357 pts :grid - else if (data == "monthly_rfe") { - dim_t <- "time" - extension <- ".monthly/.rfe" - } # grid: /time (months since 1960-01-01) ordered (Jan 1983) to (Aug 2020) by 1.0 N= 452 pts :grid - else if (data == "monthly_rfe_calc") { - dim_t <- "time" - extension <- ".monthly/.rfe_calc" - } # grid: /time (months since 1960-01-01) ordered (Feb 1983) to (Sep 2020) by 1.0 N= 452 pts :grid - else { - stop("Data file does not exist for TAMSAT_v3.0 data") - } - } else if (source == "TAMSAT_v3.1") { - prexyaddress <- paste0(init_URL, ".Reading/.Meteorology/.TAMSAT/.TARCAT/.v3p1") - if (data == "daily_rfe") { - extension <- ".daily/.rfe" - } # grid: /T (julian_day) ordered (1 Jan 1983) to (10 Sep 2020) by 1.0 N= 13768 pts :grid - else if (data == "daily_rfe_filled") { - extension <- ".daily/.rfe_filled" - } # grid: /T (julian_day) ordered (1 Jan 1983) to (10 Sep 2020) by 1.0 N= 13768 pts :grid - else if (data == "dekadal_rfe") { - extension <- ".dekadal/.rfe" - } # grid: /T (days since 1960-01-01) ordered [ (1-10 Jan 1983) (11-20 Jan 1983) (21-31 Jan 1983) ... (1-10 Sep 2020)] N= 1357 pts :grid - else if (data == "dekadal_rfe_filled") { - extension <- ".dekadal/.rfe_filled" - } # grid: /T (days since 1960-01-01) ordered [ (1-10 Jan 1983) (11-20 Jan 1983) (21-31 Jan 1983) ... (1-10 Sep 2020)] N= 1357 pts :grid - else if (data == "monthly_rfe") { - extension <- ".monthly/.rfe" - } # grid: /T (months since 1960-01-01) ordered (Jan 1983) to (Aug 2020) by 1.0 N= 452 pts :grid - else if (data == "monthly_rfe_filled") { - extension <- ".monthly/.rfe_filled" - } # grid: /T (months since 1960-01-01) ordered (Jan 1983) to (Aug 2020) by 1.0 N= 452 pts :grid - else { - stop("Data file does not exist for TAMSAT_v3.1 data") - } - } else if (source == "NOAA") { - prexyaddress <- paste0(init_URL, ".NOAA/.NCEP/.CPC/.FEWS/.Africa") - if (data == "daily_rfev2_est_prcp") { - extension <- ".DAILY/.RFEv2/.est_prcp" - } # (days since 2000-10-31 12:00:00) ordered (31 Oct 2000) to (12 Sep 2020) - else if (data == "10day_rfev2_est_prcp") { - extension <- ".TEN-DAY/.RFEv2/.est_prcp" - } # grid: /T (days since 1960-01-01) ordered [ (1-10 Dec 1999) (11-20 Dec 1999) (21-31 Dec 1999) ... (1-10 Sep 2020)] N= 748 pts :grid - else if (data == "daily_est_prcp") { - extension <- ".DAILY/.ARC2/.daily/.est_prcp" - } # (days since 1960-01-01 12:00:00) ordered (1 Jan 1983) to (12 Sep 2020) - else if (data == "monthly_est_prcp") { - extension <- ".DAILY/.ARC2/.monthly/.est_prcp" - } # (months since 1960-01-01) ordered (Jan 1983) to (Aug 2020) - else { - stop("Data file does not exist for NOAA data") - } - } else if (source == "NOAA_CMORPH_DAILY" || source == "NOAA_CMORPH_3HOURLY" || source == "NOAA_CMORPH_DAILY_CALCULATED") { - if (source == "NOAA_CMORPH_DAILY") { - prexyaddress <- paste0(init_URL, ".NOAA/.NCEP/.CPC/.CMORPH/.daily") - } - else if (source == "NOAA_CMORPH_3HOURLY") { - prexyaddress <- paste0(init_URL, ".NOAA/.NCEP/.CPC/.CMORPH/.3-hourly") - } - else if (source == "NOAA_CMORPH_DAILY_CALCULATED") { - prexyaddress <- paste0(init_URL, ".NOAA/.NCEP/.CPC/.CMORPH/.daily_calculated") - } - if (data == "mean_microwave_only_est_prcp") { - extension <- ".mean/.microwave-only/.comb" - } - else if (data == "mean_morphed_est_prcp") { - extension <- ".mean/.morphed/.cmorph" - } - else if (data == "orignames_mean_microwave_only_est_prcp") { - extension <- ".orignames/.mean/.microwave-only/.comb" - } - else if (data == "orignames_mean_morphed_est_prcp") { - extension <- ".orignames/.mean/.morphed/.cmorph" - } - else if (data == "renamed102015_mean_microwave_only_est_prcp") { - extension <- ".renamed102015/.mean/.microwave-only/.comb" - } - else if (data == "renamed102015_mean_morphed_est_prcp") { - extension <- ".renamed102015/.mean/.morphed/.cmorph" - } - else { - stop("Data file does not exist for NOAA CMORPH data") - } - } else if (source == "NASA") { - prexyaddress <- paste0(init_URL, ".NASA/.GES-DAAC/.TRMM_L3/.TRMM_3B42/.v7") - if (data == "daily_prcp") { - extension <- ".daily/.precipitation" - } # (days since 1998-01-01 00:00:00) ordered (1 Jan 1998) to (31 May 2015) - else if (data == "3_hourly_prcp") { - extension <- ".three-hourly/.precipitation" - } # (days since 1998-01-01 00:00:00) ordered (2230 31 Dec 1997 - 0130 1 Jan 1998) to (2230 30 May 2015 - 0130 31 May 2015) - else { - stop("Data file does not exist for NASA TRMM 3B42 data") - } - } else { - stop("Source not specified correctly.") - } - prexyaddress <- paste(prexyaddress, extension, sep = "/") - if (download_type == "Area") { - URL <- add_xy_area_range(path = prexyaddress, min_lon = min_lon, min_lat = min_lat, max_lon = max_lon, max_lat = max_lat, dim_x = dim_x, dim_y = dim_y) - } - else if (download_type == "Point") { - URL <- add_xy_point_range(path = prexyaddress, min_lon = min_lon, min_lat = min_lat, dim_x = dim_x, dim_y = dim_y) - } - if (!missing(min_date) & !missing(max_date)) { - URL <- URL %>% add_t_range(min_date = min_date, max_date = max_date, dim_t = dim_t) - } - URL <- URL %>% add_nc() - file_name <- tempfile(pattern = tolower(source), tmpdir = path, fileext = ".nc") - result <- download.file(url = URL, destfile = file_name, method = "libcurl", mode = "wb", cacheOK = FALSE) - if (import && result == 0) { - nc <- ncdf4::nc_open(filename = file_name) - self$import_NetCDF(nc = nc, name = name) - ncdf4::nc_close(nc = nc) - } else if (result != 0) { - stop("No file downloaded please check your internet connection") - } - if (missing(path)) { - file.remove(file_name) - } -}) - -DataBook$set("public", "patch_climate_element", function(data_name, date_col_name = "", var = "", vars = c(), max_mean_bias = NA, max_stdev_bias = NA, time_interval = "month", column_name, station_col_name = station_col_name) { - self$get_data_objects(data_name)$patch_climate_element(date_col_name = date_col_name, var = var, vars = vars, max_mean_bias = max_mean_bias, max_stdev_bias = max_stdev_bias, time_interval = time_interval, column_name = column_name, station_col_name = station_col_name) -}) - -DataBook$set("public", "visualize_element_na", function(data_name, element_col_name, element_col_name_imputed, station_col_name, x_axis_labels_col_name, ncol = 2, type = "distribution", xlab = NULL, ylab = NULL, legend = TRUE, orientation = "horizontal", interval_size = interval_size, x_with_truth = NULL, measure = "percent") { - self$get_data_objects(data_name)$visualize_element_na(element_col_name = element_col_name, element_col_name_imputed = element_col_name_imputed, station_col_name = station_col_name, x_axis_labels_col_name = x_axis_labels_col_name, ncol = ncol, type = type, xlab = xlab, ylab = ylab, legend = legend, orientation = orientation, interval_size = interval_size, x_with_truth = x_with_truth, measure = measure) -}) - -DataBook$set("public", "get_data_entry_data", function(data_name, station, date, elements, view_variables, station_name, type, start_date, end_date) { - self$get_data_objects(data_name)$get_data_entry_data(station = station, date = date, elements = elements, view_variables = view_variables, station_name = station_name, type = type, start_date = start_date, end_date = end_date) -}) - -DataBook$set("public", "save_data_entry_data", function(data_name, new_data, rows_changed, comments_list = list(), add_flags = FALSE, ...) { - if(!missing(comments_list)){ - for (i in seq_along(comments_list)) { - com <- comments_list[[i]] - if(!("row" %in% names(com))){ - com[["row"]] <- "" - } - if(!("column" %in% names(com))){ - com[["column"]] <- "" - } - if(length(comments_list) > 0) cat("Comments added:", length(comments_list), "\n") - self$add_new_comment(data_name = data_name, row = com$row, column = com$column, comment = com$comment) - } - } - self$get_data_objects(data_name)$save_data_entry_data(new_data = new_data, rows_changed = rows_changed, add_flags = add_flags) -} -) - -DataBook$set("public", "import_from_cds", function(user, dataset, elements, start_date, end_date, lon, lat, path, import = FALSE, new_name) { - all_dates <- seq(start_date, end_date, by = 1) - all_periods <- unique(paste(lubridate::year(all_dates), sprintf("%02d", lubridate::month(all_dates)), sep = "-")) - area <- c(lat[2], lon[1], lat[1], lon[2]) - is_win <- Sys.info()['sysname'] == "Windows" - if (is_win) pb <- winProgressBar(title = "Requesting data from CDS", min = 0, max = length(all_periods)) - nc_files <- vector(mode = "character", length = length(all_periods)) - for (i in seq_along(all_periods)) { - y <- substr(all_periods[i], 1, 4) - m <- substr(all_periods[i], 6, 7) - curr_dates <- all_dates[lubridate::month(all_dates) == as.numeric(m) & lubridate::year(all_dates) == as.numeric(y)] - d <- sprintf("%02d", lubridate::day(curr_dates)) - request <- list( - dataset_short_name = dataset, - product_type = "reanalysis", - variable = elements, - year = y, - month = m, - day = d, - time = c("00:00", "01:00", "02:00", "03:00", "04:00", "05:00", "06:00", "07:00", "08:00", "09:00", "10:00", "11:00", "12:00", "13:00", "14:00", "15:00", "16:00", "17:00", "18:00", "19:00", "20:00", "21:00", "22:00", "23:00"), - format = "netcdf", - area = area, - target = paste0(dataset, "-", paste(elements, collapse = "_"), "-", all_periods[i], ".nc") - ) - info <- paste0("Requesting data for ", all_periods[i], " - ", round(100 * i / length(all_periods)), "%") - if (is_win) setWinProgressBar(pb, value = i, title = info, label = info) - ncfile <- ecmwfr::wf_request(user = user, request = request, - transfer = TRUE, path = path, - time_out = 3 * 3600) - if (import) { - nc <- ncdf4::nc_open(filename = ncfile) - self$import_NetCDF(nc = nc, name = new_name) - ncdf4::nc_close(nc = nc) - } - } - if (is_win) close(pb) -}) - -DataBook$set("public", "get_column_climatic_type", function(data_name, col_name, attr_name){ - self$get_data_objects(data_name)$get_column_climatic_type(col_name = col_name, attr_name =attr_name) -}) - -DataBook$set("public", "add_flag_fields", function(data_name, col_names, key_column_names) { - if (!self$has_key(data_name)) { - self$add_key(data_name, key_column_names) - } - self$get_data_objects(data_name)$add_flag_fields(col_names = col_names) -} -) - -DataBook$set("public", "remove_empty", function(data_name, which = c("rows","cols")) { - self$get_data_objects(data_name)$remove_empty(which = which) -}) - -DataBook$set("public", "replace_values_with_NA", function(data_name, row_index, column_index) { - self$get_data_objects(data_name)$replace_values_with_NA(row_index = row_index, column_index = column_index) -}) - -DataBook$set("public","has_labels", function(data_name, col_names) { - self$get_data_objects(data_name)$has_labels(col_names) -} -) - -DataBook$set("public","wrap_or_unwrap_data", function(data_name, col_name, column_data, width, wrap = TRUE) { - # Store the original data type of the column - original_type <- class(column_data) - desired_types <- c("factor", "numeric", "Date", "character", "integer", "list", "double") - if(original_type %in% desired_types){ - # Apply str_replace_all if "\n" is detected in the column_data - if (any(!is.na(stringr::str_detect(column_data, "\n")))) { - column_data <- stringr::str_replace_all(column_data, "\n", " ") - } - - # Apply str_wrap if width is specified - if (!is.null(width) && wrap) { - column_data <- stringr::str_wrap(column_data, width = width) - } - curr_data <- self$get_data_frame(data_name=data_name, retain_attr = TRUE) - # Convert back to the original data type if necessary - if (original_type != class(column_data)) { - if (original_type %in% c("factor", "ordered_factor")){ - column_data <- make_factor(column_data) - }else if(original_type == "list"){ - result <- curr_data %>% - dplyr::mutate(list_column = lapply(column_data, convert_to_list)) - column_data <- result$list_column - }else{ column_data <- as(column_data, original_type) } - } - # retain the attributes of the column after wrapping or unwrapping - attributes(column_data) <- attributes(curr_data[[col_name]]) - self$add_columns_to_data(data_name=data_name, col_name=col_name, col_data=column_data, before=FALSE) - } -} -) - -DataBook$set("public", "anova_tables2", function(data_name, x_col_names, y_col_name, total = TRUE, signif.stars = FALSE, sign_level = FALSE, means = FALSE, interaction=FALSE) { - self$get_data_objects(data_name)$anova_tables2(x_col_names = x_col_names, y_col_name = y_col_name, total = total, signif.stars = signif.stars, sign_level = sign_level, means = means, interaction=interaction) -} -) +# removed \ No newline at end of file diff --git a/instat/static/InstatObject/R/stand_alone_functions.R b/instat/static/InstatObject/R/stand_alone_functions.R index 4190f6fbcf7..647b6dce5b4 100644 --- a/instat/static/InstatObject/R/stand_alone_functions.R +++ b/instat/static/InstatObject/R/stand_alone_functions.R @@ -1,3449 +1,4 @@ -get_default_significant_figures <- function(data) { - default_digits <- getOption("digits") - if(is.numeric(data) || is.complex(data)) return(default_digits) - else return(NA) -} - -convert_to_character_matrix <- function(data, format_decimal_places = TRUE, decimal_places, is_scientific = FALSE, return_data_frame = TRUE, na_display = NULL, check.names = TRUE) { - if(nrow(data) == 0) { - out <- matrix(nrow = 0, ncol = ncol(data)) - colnames(out) <- colnames(data) - } - else { - out = matrix(nrow = nrow(data), ncol = ncol(data)) - if(!format_decimal_places) decimal_places=rep(NA, ncol(data)) - else if(missing(decimal_places)) decimal_places = sapply(data, get_default_significant_figures) - i = 1 - for (curr_col in colnames(data)) { - #if its a geometry list-column then convert to text using sf package. - #see issue #7165 - if ("sfc" %in% class(data[[i]])) { - out[, i] <- sf::st_as_text(data[[i]]) - } else if (is.na(decimal_places[i])) { - #use as.character() for non numeric column vales because format() adds extra spaces to the text - #which are recognised oddly by the R.Net - out[, i] <- as.character(data[[i]]) - } else { - out[,i] <- format(data[[i]], digits = decimal_places[i], scientific = is_scientific[i]) - } - if (!is.null(na_display)) { - out[is.na(data[[i]]), i] <- na_display - } - i = i + 1 - } - colnames(out) <- colnames(data) - rownames(out) <- rownames(data) - } - if(return_data_frame) out <- data.frame(out, stringsAsFactors = FALSE, check.names = check.names) - return(out) -} - -next_default_item = function(prefix, existing_names = c(), include_index = FALSE, start_index = 1) { - if(!is.character(prefix)) stop("prefix must be of type character") - - if(!include_index) { - if(!prefix %in% existing_names) return(prefix) - } - - item_name_exists = TRUE - start_index = 1 - while(item_name_exists) { - out = paste0(prefix,start_index) - if(!out %in% existing_names) { - item_name_exists = FALSE - } - start_index = start_index + 1 - } - return(out) -} - -import_from_ODK = function(username, form_name, platform) { - if(platform == "kobo") { - url <- "https://kc.kobotoolbox.org/api/v1/data" - } - else if(platform == "ona") { - url <- "https://api.ona.io/api/v1/data" - } - else stop("Unrecognised platform.") - password <- getPass::getPass(paste0(username, " password:")) - if(!missing(username) && !missing(password)) { - has_authentication <- TRUE - user <- httr::authenticate(username, password) - odk_data <- httr::GET(url, user) - } - else { - has_authentication <- FALSE - odk_data <- httr::GET(url) - } - - forms <- httr::content(odk_data, "parse") - form_names <- sapply(forms, function(x) x$title) # get_odk_form_names_results <- get_odk_form_names(username, platform) - # form_names <- get_odk_form_names_results[1] - # forms <- get_odk_form_names_results[2] - - if(!form_name %in% form_names) stop(form_name, " not found in available forms:", paste(form_names, collapse = ", ")) - form_num <- which(form_names == form_name) - form_id <- forms[[form_num]]$id - - if(has_authentication) curr_form <- httr::GET(paste0(url,"/", form_id), user) - else curr_form <- httr::GET(paste0(url,"/", form_id)) - - form_data <- httr::content(curr_form, "text") - #TODO Look at how to convert columns that are lists - # maybe use tidyr::unnest - out <- jsonlite::fromJSON(form_data, flatten = TRUE) - return(out) -} - -get_odk_form_names = function(username, platform) { - #TODO This should not be repeated - if(platform == "kobo") { - url <- "https://kc.kobotoolbox.org/api/v1/data" - } - else if(platform == "ona") { - url <- "https://api.ona.io/api/v1/data" - } - else stop("Unrecognised platform.") - password <- getPass::getPass(paste0(username, " password:")) - if(!missing(username) && !missing(password)) { - has_authentication <- TRUE - user <- httr::authenticate(username, password) - odk_data <- httr::GET(url, user) - } - else { - has_authentication <- FALSE - odk_data <- httr::GET(url) - } - - forms <- httr::content(odk_data, "parse") - form_names <- sapply(forms, function(x) x$title) - return(form_names) -} - -convert_SST <- function(datafile, data_from = 5){ - start_year <- get_years_from_data(datafile)[1] - end_year <- get_years_from_data(datafile)[length(get_years_from_data(datafile))] - duration <- get_years_from_data(datafile) - lon <- get_lon_from_data(datafile) - lat <- get_lat_from_data(datafile) - lat_lon_df <- lat_lon_dataframe(datafile) - period <- rep(get_years_from_data(datafile), each = (length(lat)*length(lon))) - SST_value <- c() - - for (k in duration){ - year <- matrix(NA, nrow = length(lat), ncol = length(lon)) - for (i in 1:length(lat)){ - for (j in 1:length(lon)){ - dat <- as.numeric(as.character(datafile[data_from+i, j+1])) - year[i,j] <- dat - j = j+1 - } - i = i+1 - } - year = as.data.frame(t(year)) - year = stack(year) - data_from = data_from + length(lat) + 2 - g <- as.numeric(year$values) - SST_value = append(SST_value, g) - } - my_data = cbind(period, lat_lon_df, SST_value) - return(list(my_data, lat_lon_df)) -} - -get_years_from_data <- function(datafile){ - return(na.omit(t(unique(datafile[3,2:ncol(datafile)])))) -} - -get_lat_from_data <- function(datafile){ - return(unique(na.omit(as.numeric(as.character(datafile[5:nrow(datafile),1]))))) -} - -get_lon_from_data <- function(datafile){ - return(na.omit(as.numeric(unique(t(datafile[5,2:ncol(datafile)]))))) -} - -lat_lon_dataframe <- function(datafile){ - latitude <- get_lat_from_data(datafile) - longitude <- get_lon_from_data(datafile) - lat <- rep(latitude, each = length(longitude)) - lon <- rep(longitude, length(latitude)) - lat_lon <- as.data.frame(cbind(lat, lon)) - station <- c() - for (j in 1:nrow(lat_lon)){ - if(lat_lon[j,1]>=0){ - station = append(station, paste(paste("latN", lat_lon[j,1], sep = ""), paste("lon", lat_lon[j,2], sep = ""), sep = "_")) - } - else{ - station = append(station, paste(paste("latS", abs(lat_lon[j,1]), sep = ""), paste("lon", lat_lon[j,2], sep = ""), sep = "_")) - } - } - return(cbind(lat_lon,station)) -} - -output_CPT <- function(data, lat_lon_data, station_latlondata, latitude, longitude, station, year, element, long.data = TRUE, na_code = -999) { - - if(missing(data)) stop("data should be provided") - if(missing(station)) stop("station must be provided") - if(missing(year) || missing(latitude) || missing(longitude)) stop("year, latitude and longiude must be provided") - - station_label <- "STN" - lat_lon_labels <- c("LAT", "LON") - - if(missing(lat_lon_data)) { - if(long.data) { - data <- data %>% dplyr::select(!!! quos(station, year, element, latitude, longitude)) - names(data)[1] <- "station" - names(data)[2] <- "year" - names(data)[3] <- "element" - names(data)[4] <- "latitude" - names(data)[5] <- "longitude" - - data <- data %>% dplyr::filter(!is.na(station)) - } - else stop("If all data is in one data frame then must have long.data = TRUE") - } - else { - if(missing(station_latlondata)) stop("station must be provided for lat_lon_data") - - if(long.data) { - yearly_data <- data %>% dplyr::select(!!! quos(station, year, element)) - names(yearly_data)[1] <- "station" - names(yearly_data)[2] <- "year" - names(yearly_data)[3] <- "element" - - lat_lon_data <- lat_lon_data %>% dplyr::select(!!! quos(station_latlondata, latitude, longitude)) - names(lat_lon_data)[1] <- "station" - names(lat_lon_data)[2] <- "latitude" - names(lat_lon_data)[3] <- "longitude" - - data <- merge(yearly_data, lat_lon_data, by = "station") - } - else { - stations <- data.frame(data[station]) - year <- data_unstacked %>% dplyr::select(!!! quos(year)) - data <- data.frame(year, stations) - stacked_data <- reshape2::melt(data, id.vars=c("year")) - names(stacked_data)[2] <- "station" - names(stacked_data)[3] <- "element" - - lat_lon_data <- lat_lon_data %>% dplyr::select(!!! quos(station_latlondata, latitude, longitude)) - names(lat_lon_data)[1] <- "station" - names(lat_lon_data)[2] <- "latitude" - names(lat_lon_data)[3] <- "longitude" - - data <- merge(stacked_data, lat_lon_data, by = "station") - - } - } - - unstacked_data <- data %>% dplyr::select(station, year, element) %>% tidyr::spread(key = station, value = element) - names(unstacked_data)[1] <- station_label - unstacked_data <- unstacked_data %>% mutate_all(funs(replace(., is.na(.), na_code))) - - lat_lon_data <- data %>% dplyr::group_by(station) %>% dplyr::summarise(latitude = min(latitude, na.rm = TRUE), longitude = min(longitude, na.rm = TRUE)) - if(anyNA(data$latitude) || anyNA(data$longitude)) warning("Missing values in latitude or longitude.") - t_lat_lon_data <- t(lat_lon_data) - station.names <- as.vector(t_lat_lon_data[1, ]) - t_lat_lon_data <- data.frame(t_lat_lon_data, stringsAsFactors = FALSE) - t_lat_lon_data <- t_lat_lon_data %>% dplyr::slice(-1) - names(t_lat_lon_data) <- station.names - row.names(t_lat_lon_data) <- lat_lon_labels - t_lat_lon_data <- tibble::rownames_to_column(t_lat_lon_data, station_label) - - cpt_data <- rbind(t_lat_lon_data, unstacked_data) - return(cpt_data) -} - -yday_366 <- function(date) { - temp_doy <- lubridate::yday(date) - temp_leap <- lubridate::leap_year(date) - temp_doy[(!is.na(temp_doy)) & temp_doy > 59 & (!temp_leap)] <- 1 + temp_doy[(!is.na(temp_doy)) & temp_doy > 59 & (!temp_leap)] - return(temp_doy) -} - -dekade <- function(date) { - temp_dekade <- 3 * (lubridate::month(date)) - 2 + (lubridate::mday(date) > 10) + (lubridate::mday(date) > 20) - return(temp_dekade) -} - -pentad <- function(date){ - temp_pentad <- 6*(lubridate::month(date)) - 5 + (lubridate::mday(date) > 5) + (lubridate::mday(date) > 10) + (lubridate::mday(date) > 15) + (lubridate::mday(date) > 20) + (lubridate::mday(date) > 25) - return(temp_pentad) - } - -nc_get_dim_min_max <- function(nc, dimension, time_as_date = TRUE) { - if(!dimension %in% names(nc$dim)) stop(dimension, " not found in file.") - vals <- nc$dim[[dimension]]$vals - dim_axes <- ncdf4.helpers::nc.get.dim.axes(nc) - time_dims <- names(dim_axes[which(dim_axes == "T")]) - if(dimension %in% time_dims && time_as_date) { - time_vals <- c() - try({ - units <- ncdf4::ncatt_get(nc, dimension, "units") - if(units$hasatt && units$value == "julian_day") { - # RDotNet interprets Date class as numeric so character needed to preserve date - time_vals <- as.character(as.Date(vals, origin = structure(-2440588, class = "Date"))) - } - else { - pcict_time <- ncdf4.helpers::nc.get.time.series(nc, time.dim.name = dimension) - posixct_time <- PCICt::as.POSIXct.PCICt(pcict_time) - # RDotNet interprets Date class as numeric so character needed to preserve date - time_vals <- as.character(as.Date(posixct_time)) - } - }) - if(length(time_vals) > 0 && !anyNA(time_vals)) vals <- time_vals - } - bounds <- c(min(vals, na.rm = TRUE), max(vals, na.rm = TRUE)) - return(bounds) -} - -nc_as_data_frame <- function(nc, vars, keep_raw_time = TRUE, include_metadata = TRUE, boundary = NULL, lon_points = NULL, lat_points = NULL, id_points = NULL, show_requested_points = TRUE, great_circle_dist = TRUE) { - if(missing(vars)) vars <- ncdf4.helpers::nc.get.variable.list(nc) - if(sum(is.null(lon_points), is.null(lat_points)) == 1) stop("You must specificy both lon_points and lat_points") - has_points <- (sum(is.null(lon_points), is.null(lat_points)) == 0) - if(has_points && length(lon_points) != length(lat_points)) stop("lon_points and lat_points have unequal lengths.") - if(has_points && !is.null(id_points) && length(id_points) != length(lat_points)) stop("id_points (if specified) must have the same length as lon_points and lat_points.") - dim_names <- ncdf4.helpers::nc.get.dim.names(nc, vars[1]) - dim_values <- list() - requested_points_added <- FALSE - for(dim_name in dim_names) { - #why no wrapper for this in ncdf4.helper? - #(as.numeric ensures vectors no not have array class) - dim_values[[dim_name]] <- as.numeric(nc$dim[[dim_name]]$vals) - #This is not recommended but appears in tutorials - #ncdf4::ncvar_get(nc, dim_name) - } - dim_axes <- ncdf4.helpers::nc.get.dim.axes(nc, vars[1]) - if(!is.null(boundary)) { - if(!all(names(boundary) %in% dim_names)) stop("boundary contains dimensions not associated with", vars[1]) - if(anyNA(dim_axes)) { - warning("Cannot subset data when some dimension axes cannot be identified.") - start <- NA - count <- NA - } - else { - start <- c() - count <- c() - for(dim in c("X", "Y", "Z", "T", "S")) { - if(dim %in% dim_axes) { - dim_var <- names(dim_axes)[which(dim_axes == dim)] - curr_dim_values <- dim_values[[dim_var]] - if(dim_var %in% names(boundary) && !(has_points && dim %in% c("X", "Y"))) { - if(dim == "T") { - ind <- integer(0) - try({ - print(dim_var) - units <- ncdf4::ncatt_get(nc, dim_var, "units") - if(units$hasatt && units$value == "julian_day") { - # RDotNet interprets Date class as numeric so character needed to preserve date - time_vals <- as.Date(curr_dim_values, origin = structure(-2440588, class = "Date")) - } - else { - pcict_time <- ncdf4.helpers::nc.get.time.series(nc, time.dim.name = dim_var) - posixct_time <- PCICt::as.POSIXct.PCICt(pcict_time) - time_vals <- as.Date(posixct_time) - } - ind <- which(time_vals >= boundary[[dim_var]][[1]] & time_vals <= boundary[[dim_var]][[2]]) - }) - } - else ind <- which(curr_dim_values >= boundary[[dim_var]][1] & curr_dim_values <= boundary[[dim_var]][2]) - # TODO This is temporary solution for when there is only one value for a dimension and there are rounding difference - if(length(ind) == 0 && length(curr_dim_values) == 1 && round(curr_dim_values, 3) == round(boundary[[dim_var]][1], 3) && round(curr_dim_values, 3) == round(boundary[[dim_var]][2], 3)) ind <- 1 - if(length(ind) == 0) { - stop("No values within the range specified for ", dim_var, ".") - } - else { - start <- c(start, min(ind)) - count <- c(count, length(ind)) - dim_values[[dim_var]] <- dim_values[[dim_var]][ind] - } - } - else { - start <- c(start, 1) - count <- c(count, length(curr_dim_values)) - } - } - } - if(length(start) == 0) { - start <- rep(1, length(dim_axes)) - count <- rep(-1, length(dim_axes)) - } - } - } - else { - start <- rep(1, length(dim_axes)) - count <- rep(-1, length(dim_axes)) - } - start_list <- list() - count_list <- list() - dim_values_list <- list() - if(has_points) { - dim_axes <- ncdf4.helpers::nc.get.dim.axes(nc, vars[1]) - x_var <- names(dim_axes)[which(dim_axes == "X")] - y_var <- names(dim_axes)[which(dim_axes == "Y")] - if(length(x_var) == 0 || length(y_var) == 0) stop("Cannot select points because dimensions are not labelled correctly in the nc file. Modify the nc file or remove the points to import all data.") - xs <- dim_values[[x_var]] - ys <- dim_values[[y_var]] - for(i in seq_along(lon_points)) { - curr_start <- start - curr_count <- count - curr_dim_values <- dim_values - xy_possible <- expand.grid(xs, ys) - point_ind <- which.min(sp::spDistsN1(pts = as.matrix(xy_possible), pt = c(lon_points[i], lat_points[i]), longlat = great_circle_dist)) - x_ind <- which(xs == xy_possible[point_ind, 1])[1] - curr_start[1] <- x_ind - curr_count[1] <- 1 - curr_dim_values[[x_var]] <- curr_dim_values[[x_var]][x_ind] - y_ind <- which(ys == xy_possible[point_ind, 2])[1] - curr_start[2] <- y_ind - curr_count[2] <- 1 - curr_dim_values[[y_var]] <- curr_dim_values[[y_var]][y_ind] - if(show_requested_points) { - curr_dim_values[[paste0(x_var, "_point")]] <- lon_points[i] - curr_dim_values[[paste0(y_var, "_point")]] <- lat_points[i] - if(!is.null(id_points)) curr_dim_values[["station"]] <- id_points[i] - requested_points_added <- TRUE - } - - start_list[[i]] <- curr_start - count_list[[i]] <- curr_count - dim_values_list[[i]] <- curr_dim_values - } - } - else { - start_list[[1]] <- start - count_list[[1]] <- count - dim_values_list[[1]] <- dim_values - } - - dim_axes <- ncdf4.helpers::nc.get.dim.axes(nc) - time_dims <- names(dim_axes[which(dim_axes == "T" & names(dim_axes) %in% dim_names)]) - var_data_list <- list() - for(i in seq_along(start_list)) { - curr_dim_values <- dim_values_list[[i]] - curr_var_data <- expand.grid(curr_dim_values, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE) - for(j in seq_along(curr_var_data)) { - attr(curr_var_data[[j]], "dim") <- NULL - } - names(curr_var_data) <- names(curr_dim_values) - included_vars <- dim_names - for(var in vars) { - curr_dim_names <- ncdf4.helpers::nc.get.dim.names(nc, var) - if(!setequal(curr_dim_names, dim_names)) { - warning("The dimensions of", var, "do not match the other variables.", var, "will be dropped.") - } - else { - included_vars <- c(included_vars, var) - curr_var_data[[var]] <- as.vector(ncdf4::ncvar_get(nc, var, start = start_list[[i]], count = count_list[[i]])) - } - } - if(length(time_dims) == 1) { - time_var <- time_dims - raw_time_full <- nc$dim[[time_var]]$vals - raw_time <- curr_dim_values[[time_var]] - attr(raw_time, "dim") <- NULL - df_names <- time_var - time_df <- data.frame(raw_time) - names(time_df) <- time_var - try({ - # need to subset this if time var has been subsetted - time_ind <- which(raw_time_full %in% raw_time) - units <- ncdf4::ncatt_get(nc, time_var, "units") - if(units$hasatt && units$value == "julian_day") { - time_df[["date"]] <- as.Date(raw_time, origin = structure(-2440588, class = "Date")) - } - else { - pcict_time <- ncdf4.helpers::nc.get.time.series(nc, time.dim.name = time_var) - pcict_time <- pcict_time[time_ind] - posixct_time <- PCICt::as.POSIXct.PCICt(pcict_time) - time_df[["date"]] <- as.Date(posixct_time) - time_df[["datetime"]] <- posixct_time - } - }) - if(ncol(time_df) > 1) curr_var_data <- dplyr::full_join(curr_var_data, time_df, by = time_var) - if(!keep_raw_time) { - var_data[[time_var]] <- NULL - included_vars <- included_vars[-which(included_vars == time_var)] - } - } - var_data_list[[i]] <- curr_var_data - } - # # Following conventions in http://www.unidata.ucar.edu/software/netcdf/docs/attribute_conventions.html - # if(replace_missing) { - # for(inc_var in included_vars) { - # - # numeric_var <- is.numeric(var_data[[inc_var]]) - # integer_var <- is.integer(var_data[[inc_var]]) - # valid_range <- ncdf4::ncatt_get(nc, var, "valid_range") - # valid_min <- ncdf4::ncatt_get(nc, var, "valid_min") - # valid_max <- ncdf4::ncatt_get(nc, var, "valid_max") - # fill_value <- ncdf4::ncatt_get(nc, var, "_FillValue") - # missing_value <- ncdf4::ncatt_get(nc, var, "missing_value") - # - # if(numeric_var && valid_range[[1]]) { - # var_data[[inc_var]][var_data[[inc_var]] < valid_range[[2]][1] | var_data[[inc_var]] > valid_range[[2]][2]] <- NA - # } - # else if(numeric_var && (valid_min[[1]] || valid_max[[1]])) { - # if(valid_min[[1]]) { - # var_data[[inc_var]][var_data[[inc_var]] < valid_min[[2]]] <- NA - # } - # if(valid_max[[2]]) { - # var_data[[inc_var]][var_data[[inc_var]] > valid_max[[2]]] <- NA - # } - # } - # else if(fill_value[[1]]) { - # val <- fill_value[[2]] - # if(numeric_var) { - # # Not sure this is safe if 'integer' types from file do not import as - # # 'integer' types in R. - # if(integer_var) width <- 1 - # else width <- 2 * .Machine$double.xmin - # - # if(val > 0) var_data[[inc_var]][var_data[[inc_var]] > val + width] <- NA - # else var_data[[inc_var]][var_data[[inc_var]] < val - width] <- NA - # } - # else { - # # Should we do this? Non numeric not mentioned in convention - # var_data[[inc_var]][var_data[[inc_var]] %in% val] <- NA - # } - # } - # if(missing_value[[1]]) var_data[[inc_var]][var_data[[inc_var]] %in% missing_value[[2]]] <- NA - # } - # } - if(length(var_data_list) > 1) { - var_data <- dplyr::bind_rows(var_data_list) - } - else if(length(var_data_list) == 1) var_data <- var_data_list[[1]] - else var_data_list <- data.frame() - - if(include_metadata) { - for(col_name in included_vars) { - col_attr <- ncdf4::ncatt_get(nc, col_name) - for(i in seq_along(col_attr)) { - attr(var_data[[col_name]], names(col_attr)[i]) <- col_attr[[i]] - } - } - global_attr <- ncdf4::ncatt_get(nc, 0) - for(i in seq_along(global_attr)) { - attr(var_data, names(global_attr)[i]) <- global_attr[[i]] - } - } - return(var_data) -} - -# open_NetCDF <- function(nc_data, latitude_col_name, longitude_col_name, default_names){ -# variables = names(nc_data$var) -# lat_lon_names = names(nc_data$dim) -# #we may need to add latitude_col_name, longitude_col_name to the character vector of valid names -# lat_names = c("lat", "latitude", "LAT", "Lat", "LATITUDE") -# lon_names = c("lon", "longitude", "LON", "Lon", "LONGITUDE") -# time_names = c("time", "TIME", "Time", "period", "Period", "PERIOD") -# if (stringr::str_trim(latitude_col_name) != ""){ -# lat_names <- c(lat_names, latitude_col_name) -# } -# if (str_trim(longitude_col_name) != ""){ -# lon_names <- c(lon_names, longitude_col_name) -# } -# lat_in <- which(lat_lon_names %in% lat_names) -# lat_found <- (length(lat_in) == 1) -# if(lat_found) { -# lat <- as.numeric(ncdf4::ncvar_get(nc_data, lat_lon_names[lat_in])) -# } -# -# lon_in <- which(lat_lon_names %in% lon_names) -# lon_found <- (length(lon_in) == 1) -# if(lon_found) { -# lon <- as.numeric(ncdf4::ncvar_get(nc_data, lat_lon_names[lon_in])) -# } -# -# time_in <- which(lat_lon_names %in% time_names) -# time_found <- (length(time_in) == 1) -# if(time_found) { -# time <- as.numeric(ncdf4::ncvar_get(nc_data, lat_lon_names[time_in])) -# } -# -# if(!lon_found || (!lat_found)) stop("Latitude and longitude names could not be recognised.") -# if(!time_found) { -# warning("Time variable could not be found/recognised. Time will be set to 1.") -# time = 1 -# } -# period <- rep(time, each = (length(lat)*length(lon))) -# lat_rep <- rep(lat, each = length(lon)) -# lon_rep <- rep(lon, length(lat)) -# # if (!default_names){ -# # #we need to check if the names are valid -# # new_lat_lon_column_names <- c(latitude_col_name, longitude_col_name) -# # } -# # else{ -# new_lat_lon_column_names <- c(lat_lon_names[lat_in], lat_lon_names[lon_in]) -# #} -# lat_lon <- as.data.frame(cbind(lat_rep, lon_rep)) -# names(lat_lon) = new_lat_lon_column_names -# station = ifelse(lat_rep >= 0 & lon_rep >= 0, paste(paste("N", lat_rep, sep = ""), paste("E", lon_rep, sep = ""), sep = "_"), -# ifelse(lat_rep < 0 & lon_rep >= 0, paste(paste("S", abs(lat_rep), sep = ""), paste("E", lon_rep, sep = ""), sep = "_"), -# ifelse(lat_rep >= 0 & lon_rep < 0, paste(paste("N", lat_rep, sep = ""), paste("W", abs(lon_rep), sep = ""), sep = "_") , -# paste(paste("S", abs(lat_rep), sep = ""), paste("W", abs(lon_rep), sep = ""), sep = "_")))) -# -# lat_lon_df <- cbind(lat_lon, station) -# my_data <- cbind(period, lat_lon_df) -# -# for (current_var in variables){ -# dataset <- ncdf4::ncvar_get(nc_data, current_var) -# if(length(dim(dataset)) == 1) { -# nc_value = dataset -# } -# else if(length(dim(dataset)) == 2) { -# nc_value = as.vector(t(dataset)) -# } -# else if(length(dim(dataset)) == 3) { -# lonIdx <- which(!is.na(lon)) -# latIdx <- which(!is.na(lat)) -# timeIdx <- which(!is.na(time)) -# new_dataset <- dataset[lonIdx, latIdx, timeIdx] -# nc_value = as.vector(new_dataset) -# } -# else { -# stop("The format of the data cannot be recognised") -# } -# my_data = cbind(my_data, nc_value) -# names(my_data)[length(names(my_data))] <- current_var -# } -# return(list(my_data, lat_lon_df, new_lat_lon_column_names)) -# } - -multiple_nc_as_data_frame <- function(path, vars, keep_raw_time = TRUE, include_metadata = TRUE, boundary = NULL, lon_points = NULL, lat_points = NULL, id_points = NULL, show_requested_points = TRUE, great_circle_dist = TRUE, id = "id") { - filepaths <- list.files(path = path, pattern="*\\.nc", full.names = TRUE) - filenames <- basename(filepaths) - nc_list <- list() - - n_files <- length(filepaths) - is_win <- Sys.info()['sysname'] == "Windows" - if (is_win) pb <- winProgressBar(title = "Reading files", min = 0, max = n_files) - for(i in seq_along(filepaths)) { - nc <- ncdf4::nc_open(filename = filepaths[i]) - dat <- nc_as_data_frame(nc = nc, vars = vars, keep_raw_time = keep_raw_time, include_metadata = include_metadata, boundary = boundary, lon_points = lon_points, lat_points = lat_points, id_points = id_points, show_requested_points = show_requested_points, great_circle_dist = great_circle_dist) - nc_list[[length(nc_list) + 1]] <- dat - ncdf4::nc_close(nc) - info <- paste0("Reading file ", i, " of ", n_files, " - ", round(100*i/n_files), "%") - if (is_win) setWinProgressBar(pb, value = i, title = info, label = info) - } - if (is_win) close(pb) - names(nc_list) <- tools::file_path_sans_ext(filenames) - merged_data <- dplyr::bind_rows(nc_list, .id = id) - return(merged_data) -} - -import_from_iri <- function(download_from, data_file, path, X1, X2,Y1,Y2, get_area_point){ - if(path == ""){ - gaugelocdir = getwd() - } - else { - if(!dir.exists(path)){ - dir.create(path) - } - gaugelocdir = path - } - - if(download_from == "CHIRPS_V2P0"){ - prexyaddress <- "https://iridl.ldeo.columbia.edu/SOURCES/.UCSB/.CHIRPS/.v2p0" - if(data_file == "daily_0p05") { - extension <- ".daily/.global/.0p05/.prcp" - } - else if(data_file == "daily_0p25") { - extension <- ".daily/.global/.0p25/.prcp" - } - else if(data_file == "daily_improved_0p05") { - extension <- ".daily-improved/.global/.0p05/.prcp" - } - else if(data_file == "daily_improved_0p25") { - extension <- ".daily-improved/.global/.0p25/.prcp" - } - else if(data_file == "dekad") { - extension <- ".dekad/.prcp" - } - else if(data_file == "monthly_c8113") { - extension <- ".monthly/.global/.c8113/.precipitation" - } - else if(data_file == "monthly_deg1p0") { - extension <- ".monthly/.global/.deg1p0/.precipitation" - } - else if(data_file == "monthly_NMME_deg1p0") { - extension <- ".monthly/.global/.NMME_deg1p0/.precipitation" - } - else if(data_file == "monthly_prcp") { - extension <- ".monthly/.global/.precipitation" - } - - else stop("Data file does not exist for CHIRPS V2P0 data") - #Annual and 2Monthly and 3monthly does not exist for CHIRPS_V2P0 - } - else if(download_from == "TAMSAT") { - prexyaddress <- "http://iridl.ldeo.columbia.edu/home/.remic/.Reading/.Meteorology/.TAMSAT" - if(data_file == "rainfall_estimates") { - extension <- ".TAMSAT-RFE/.rfe" - } - else if(data_file == "reconstructed_rainfall_anomaly") { - extension <- ".TAMSAT-RFE/.rfediff" - } - else if(data_file == "sahel_dry_mask") { - extension <- ".TAMSAT-RFE/.sahel_drymask" - } - else if(data_file == "SPI_1_dekad") { - extension <- ".TAMSAT-RFE/.SPI-rfe_1-dekad_Sahel" - } - #monthly,climatology and TAMSAT RFE 0p1 are yet to be implemented. - else stop("Data file does not exist for TAMSAT data") - } - else if(download_from=="NOAA_ARC2") { - prexyaddress<-paste("http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP/.CPC/.FEWS/.Africa/.DAILY/.ARC2") - if(data_file == "daily_estimated_prcp") { - extension <- ".daily/.est_prcp" - } - else if(data_file == "monthly_average_estimated_prcp") { - extension <- ".monthly/.est_prcp" - } - else stop("Data file does not exist for NOAA ARC2 data") - } - else if(download_from=="NOAA_RFE2") { - prexyaddress <- "http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP/.CPC/.FEWS/.Africa" - if(data_file == "daily_estimated_prcp"){ - extension <- ".DAILY/.RFEv2/.est_prcp" - } - else stop("Data file does not exist for NOAA RFE2 data") - } - else if(download_from=="NOAA_CMORPH_DAILY" || download_from=="NOAA_CMORPH_3HOURLY" || download_from=="NOAA_CMORPH_DAILY_CALCULATED") { - if(download_from=="NOAA_CMORPH_DAILY") { - prexyaddress <- "http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP/.CPC/.CMORPH/.daily" - } - else if(download_from == "NOAA_CMORPH_3HOURLY") { - prexyaddress <- "http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP/.CPC/.CMORPH/.3-hourly" - } - if(download_from == "NOAA_CMORPH_DAILY_CALCULATED") { - prexyaddress <- "http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP/.CPC/.CMORPH/.daily_calculated" - } - - if(data_file == "mean_microwave_only_est_prcp") { - extension <- ".mean/.microwave-only/.comb" - } - else if(data_file == "mean_morphed_est_prcp") { - extension <- ".mean/.morphed/.cmorph" - } - if(data_file == "orignames_mean_microwave_only_est_prcp") { - extension <- ".orignames/.mean/.microwave-only/.comb" - } - else if(data_file == "orignames_mean_morphed_est_prcp") { - extension <- ".orignames/.mean/.morphed/.cmorph" - } - if(data_file == "renamed102015_mean_microwave_only_est_prcp") { - extension <- ".renamed102015/.mean/.microwave-only/.comb" - } - else if(data_file == "renamed102015_mean_morphed_est_prcp") { - extension <- ".renamed102015/.mean/.morphed/.cmorph" - } - else stop("Data file does not exist for NOAA CMORPH data") - } - else if(download_from=="NASA_TRMM_3B42") { - prexyaddress <- "https://iridl.ldeo.columbia.edu/SOURCES/.NASA/.GES-DAAC/.TRMM_L3/.TRMM_3B42/.v7" - if(data_file == "daily_estimated_prcp") { - extension <- ".daily/.precipitation" - } - else if(data_file == "3_hourly_estimated_prcp") { - extension <- ".three-hourly/.precipitation" - } - else if(data_file == "3_hourly_pre_gauge_adjusted_infrared_est_prcp") { - extension <- ".three-hourly/.IRprecipitation" - } - else if(data_file == "3_hourly_pre_gauge_adjusted_microwave_est_prcp") { - extension <- ".three-hourly/.HQprecipitation" - } - else stop("Data file does not exist for NASA TRMM 3B42 data") - } - else{ - stop("Source not specified correctly.") - } - - prexyaddress = paste(prexyaddress, extension, sep="/") - #we need to add time range to get the data - if(get_area_point == "area") { - xystuff <- paste("X", X1, X2, "RANGEEDGES/Y", Y1, Y2, "RANGEEDGES", sep = "/") - postxyaddress <- "ngridtable+table-+skipanyNaN+4+-table+.csv" - } - else if(get_area_point == "point") { - xystuff <- paste("X", X1, "VALUES/Y", Y1, "VALUES", sep = "/") - postxyaddress <- "T+exch+table-+text+text+skipanyNaN+-table+.csv" - } - else stop("Unrecognised download type.") - - address <- paste(prexyaddress,xystuff,postxyaddress,sep="/") - - file.name <- paste(gaugelocdir,"tmp_iri.csv", sep="/") - download.file(address, file.name, quiet=FALSE) - dataout <- read.table(paste(gaugelocdir, "tmp_iri.csv", sep="/"), sep = ",", header = TRUE) - if(nrow(dataout) == 0) stop("There is no data for the selected point/area.") - - if(get_area_point == "point") { - Longitude <- rep(X1, nrow(dataout)) - Latitude = rep(Y1, nrow(dataout)) - dataout = cbind(Longitude, Latitude, dataout) - } - - lat_lon_dataframe = unique(dataout[,c(1,2)]) - - file.remove(paste(gaugelocdir,"tmp_iri.csv",sep="/")) - return(list(dataout,lat_lon_dataframe)) -} - -is.logical.like <- function(x) { - if(is.logical(x)) return(TRUE) - else if(is.numeric(x)) return(all(na.omit(x) %in% c(1,0))) - else return(FALSE) -} - -is.binary <- function(x) { - if(is.logical(x)) return(TRUE) - else if(is.numeric(x)) return(all(na.omit(x) %in% c(1,0))) - else if(is.factor(x)) return(nlevels(x) == 2) - else return(FALSE) -} - -get_column_attributes <- function(x, drop = c("class", "levels")) { - tmp_attr <- attributes(x) - tmp_attr <- tmp_attr[!names(tmp_attr) %in% drop] - return(tmp_attr) -} - -split_items_in_groups <- function(items, num) { - if(length(items) %% num != 0) stop("The number of items must be divisible by the number of groups") - x <- split(items, rep(1:num, each = length(items)/num)) - return(x) -} - -cancor_coef <- function(object) { - object[c("xcoef", "ycoef")] -} -################### - -# cmsaf Plot.Region script - -# -# This script displays a map of the selected product. -# You can either specify a certain year / month from a data file with several time steps -# or plot one 2D field. -# Prepare the your netcdf-files with the R-script "Prep.Data.R" or "Apply.Function.R" -########################################################################################## - -plot.region <- function(lon, lat, product, time, time_point = as.Date("2002-01-01"), add2title = "CM SAF, ", lonmin = NA, lonmax = NA, latmin = NA, latmax = NA, height = 600, width = 600, plot.ano = FALSE, set.col.breaks = FALSE, brk.set = seq(240,310,5), colmin0 = NA, colmax0 = NA, ncol = 14, plotHighRes = FALSE, plotCoastline = TRUE, plotCountries = TRUE, plotRivers = FALSE, contour.thick = 2, plotCities = TRUE, pch.cities = 2, cex.cities = 1, label.cities = TRUE, plotCapitals = 1, cex.label.cities = 0.5, dlat = 0.25, plotOwnLocations = FALSE, loc_lon = c(), loc_lat = c(), loc_name = c(""), label_pos = 1, variable = "Tm", level = 5, CTY.type = 4) { - - # Set the variable name - #varname <- nc$var[[datalev]]$name - varname <- attr(product, "name") - if(is.null(varname)) varname <- attr(product, "longname") - if(is.null(varname)) varname <- "" - - # In HLW and HSH multiple variables are stored in each file - if (varname == "HLW" || varname =="HSH") { - varname <- paste(varname, "_", variable, sep="") - } - - # Retrieve the unit, the missing_data-value, and the title of the data - # unit <- ncatt_get(nc, varname,"units")$value - # missval <- ncatt_get(nc,varname,"_FillValue")$value - unit <- attr(product, "units") - missval <- attr(product, "_FillValue") - - # if (ncatt_get(nc,varname,"title")$hasatt==TRUE) { - # name <- ncatt_get(nc,varname,"title")$value - # } else { - # name <- varname - # } - name <- attr(product, "title") - if(is.null(name)) name <- varname - - # The offset and the scalefactor is required because - # the Fill_Value attribute is not applied by the ncdf-package - # The offset and scalefactor is automatically applied to all data - # if (ncatt_get(nc,varname,"add_offset")$hasatt==TRUE) { - # offset.value <- ncatt_get(nc,varname,"add_offset")$value } - # if (ncatt_get(nc,varname,"scale_factor")$hasatt==TRUE) { - # scale.factor <- ncatt_get(nc,varname,"scale_factor")$value } - offset.value <- attr(product, "add_offset") - if(is.null(offset.value)) offset.value <- 0 - scale.factor <- attr(product, "scale_factor") - if(is.null(scale.factor)) scale.factor <- 1 - - time.unit <- attr(time, "units") - - time_ind <- which(time == time_point) - if(length(time_ind) == 0) stop("time_point of ", time_point, " not within range of data.") - lon <- lon[time_ind] - lon <- unique(lon) - lat <- lat[time_ind] - lat <- unique(lat) - product <- product[time_ind] - if(missing(lonmin)) lonmin <- min(lon, na.rm = TRUE) - if(missing(lonmax)) lonmax <- max(lon, na.rm = TRUE) - if(missing(latmin)) latmin <- min(lat, na.rm = TRUE) - if(missing(latmax)) latmax <- max(lat, na.rm = TRUE) - - nx <- length(lon) - ny <- length(lat) - nt <- length(time) - - field <- matrix(product, nrow = nx, ncol = ny) - - na.ind <- which(field == (missval * scale.factor + offset.value)) - field[na.ind] <- NA - - z <- field - zdate <- time_point - - # Set the plot ranges - lonplot=c(lonmin,lonmax) - latplot=c(latmin,latmax) - - # Define the HOAPS2011 data set - HOAPS2011 <- c("PRE","EMP","EVA","LHF","NSH","SWS") - - # Retrieve the name of the variable and the data - datalev <- 1 - if (varname %in% HOAPS2011) datalev <- 2 - if (varname == "CTY") datalev <- CTY.type - - #--------------------------------------------------# - - # Invert the latitude dimension if necessary - if (lat[ny] < lat[1]) { - sort.out <- sort(lat,index.return=TRUE) - lat <- sort.out$x - z <- z[,sort.out$ix] - } - - # Calulate the mean, min, max for the selected region only - lon.reg <- which(lon >= lonmin & lon <= lonmax) - lat.reg <- which(lat >= latmin & lat <= latmax) - z.reg <- z[lon.reg,lat.reg] - - # Set the title of the plot - title <- paste(name," (",unit,"), ",sep="") - if (varname == "HLW" || varname =="HSH") { - title <- paste(varname," (",unit,"), level: ",level.out,", ",sep="") - } - #---------------------------------------------------------- - # Set the number of rows and columns of the plot - par(mfrow = c(1,1)) - - colmin <- colmin0 - colmax <- colmax0 - - if (is.na(colmin) && is.na(colmax)) { - colmin <- min(z.reg,na.rm=TRUE) - colmax <- max(z.reg,na.rm=TRUE) - } - - if (set.col.breaks) { - brk <- brk.set - } else { - brk <- seq(colmin,colmax,length.out=ncol+1) - } - - # Set the colors and the color bar for the Difference plots - col.breaks <- brk - ncolor <- length(col.breaks) - at.ticks <- seq(1,ncolor) - names.ticks <- round(col.breaks[at.ticks]) - zlim <- c(1,ncolor) - - colors <- colorRamps::matlab.like(ncolor-1) - if (plot.ano) colors[as.integer(ncolor/2)] <- rgb(1,1,1) - - # Generate the field to be plotted - field.plot <- matrix(ncol=ny,nrow=nx) - for (l in seq(1,ncolor-1) ) { - idx <- which(z >= col.breaks[l] & - z < col.breaks[l+1],arr.ind=TRUE) - field.plot[idx] <- l + 0.1 - } - - if (plotCoastline && plotCountries) (plotCoastline=FALSE) - - # Make the plot including color bar - fields::image.plot(lon,lat,field.plot,xlab="longitude, deg E",ylab="latitude, deg N", - main=paste(title,add2title,zdate,sep=""), - legend.mar = 4, xlim=lonplot, ylim=latplot, zlim=zlim, - nlevel=(ncolor-1), col=colors,lab.breaks=names.ticks) - - if (plotHighRes){ - - data("worldMapEnv", package = "maps") - data("worldHiresMapEnv", package = "mapdata") - data("countriesHigh", package = "rworldxtra") - world <- as(countriesHigh,"SpatialLines") - - # add rivers - if (plotRivers) { - maps::map('rivers', add=TRUE, col="blue") - } - - # add coastline - if (plotCoastline) { - maps::map('worldHires', add=TRUE, interior=F) - } - - # add country borders - if (plotCountries) { - plot(world,add=TRUE) - } - - # add cities - if (plotCities) { - if (label.cities) { - maps::map.cities(pch=pch.cities,cex=cex.cities,capitals=plotCapitals,label=TRUE) - }else{maps::map.cities(pch=pch.cities,cex=cex.cities,capitals=plotCapitals,label=FALSE)} - } - - # add own locations - if (plotOwnLocations){ - if (length(loc_lon)==length(loc_lat)&length(loc_lon)==length(loc_name)){ - for (i in 1:length(loc_lon)){ - points(loc_lon[i],loc_lat[i],pch=pch.cities) - text(loc_lon[i],loc_lat[i],loc_name[i], pos=label_pos) - } - } - } - - }else{ - - data("worldMapEnv", package = "maps") - - # add rivers - if (plotRivers) { - maps::map('rivers', add=TRUE, col="blue") - } - - # add coastline - if (plotCoastline) { - maps::map('world', add=TRUE, interior=F) - } - - # add country borders - if (plotCountries) { - data("countriesLow", package = "rworldmap") - world <- as(countriesLow,"SpatialLines") - plot(world,add=TRUE) - } - - # add cities - if (plotCities) { - if (label.cities) { - maps::map.cities(pch=pch.cities,cex=cex.cities,capitals=plotCapitals,label=TRUE) - } - else { - maps::map.cities(pch=pch.cities,cex=cex.cities,capitals=plotCapitals,label=FALSE) - } - } - - # add own locations - if (plotOwnLocations){ - if (length(loc_lon)==length(loc_lat)&length(loc_lon)==length(loc_name)){ - for (i in 1:length(loc_lon)){ - points(loc_lon[i],loc_lat[i],pch=pch.cities) - text(loc_lon[i],loc_lat[i],loc_name[i], pos=label_pos) - } - } - } - } - - # Draw lines around the plot - axis(1,lwd=1,at=c(lonmin,lonmax),tick=TRUE,lwd.ticks=0,labels=FALSE) - axis(2,lwd=1,at=c(latmin,latmax), tick=TRUE,lwd.ticks=0,labels=FALSE) - axis(3,lwd=1,at=c(lonmin,lonmax),tick=TRUE,lwd.ticks=0,labels=FALSE) - axis(4,lwd=1,at=c(latmin,latmax), tick=TRUE,lwd.ticks=0,labels=FALSE) -} - -duplicated_cases <- function(col_name, ignore = NULL, tolerance=0.01) { - col_name <- as.vector(col_name) - col_data1 <- c(1, rep(NA, length(col_name) - 1)) - - if(is.numeric(col_name)) { - for(i in 2:length(col_name)) { - if(!is.na(col_data1[i-1])) { - col_data1[i] <- ifelse((col_name[i] >= (col_name[i-1] - tolerance)) & (col_name[i] <= (col_name[i-1] + tolerance)) & !(col_name[i] %in% ignore), col_data1[i-1] + 1, 1) - } - else { - col_data1[i] <- ifelse(col_name[i] %in% ignore, 1, 1) - } - } - } - else { - for(i in 2:length(col_name)) { - if(!is.na(col_data1[i-1])) { - col_data1[i] <- ifelse((col_name[i] == col_name[i-1]) & !(col_name[i] %in% ignore), col_data1[i-1] + 1, 1) - } - else { - col_data1[i] <- ifelse(col_name[i] %in% ignore, 1, 1) - } - } - } - return(col_data1) -} - -#This is Sam function from issue #4270 -duplicated_count_index<-function(x, type = "count"){ - if(type == "count"){ - #make sure x is a dataframe or can be coerced into a dataframe - x<-data.frame(x) - - #calculate the frequency of each combo. (using plyr:: because the function name is used in other packages so need explicit-ness) - counts<-plyr::count(x) - - #merge onto dataset. Adding a call to suppressMessages() because join() likes to tell you stuff a bit unneccesarily otherwise. - x<-suppressMessages(plyr::join(x, counts)) - - #return column. Minus 1 so that the number represents number of other matches (i.e. doesn't include itself); so zero for unique, 1 for 1 match etc - return(x$freq-1) - } - if(type == "index"){ - #make sure x is a dataframe or can be coerced into a dataframe - x<-data.frame(x) - - x$hash<-apply(x, 1, paste, collapse = ";;") - x$id<-1:nrow(x) - - x<-x[order(x$hash),] - x$count<-1 - for(i in 2:nrow(x)){ - x$count[i]<-ifelse(x$hash[i]==x$hash[i-1],x$count[i-1]+1,1) - } - x<-x[order(x$id),] - - return(x$count) - } -} - - -get_installed_packages_with_data <- function(with_data = TRUE) { - all_installed_packages <- .packages(all.available = TRUE) - if (with_data) all_installed_packages <- unique(data(package = all_installed_packages)[["results"]][,1]) - return(all_installed_packages) -} - -drop_unused_levels <- function(dat, columns) { - for(i in seq_along(columns)) { - if(is.factor(dat[[columns[i]]])) dat[[columns[i]]] <- droplevels(dat[[columns[i]]]) - } - return(dat) -} - -compare_columns <- function(x, y, use_unique = TRUE, sort_values = TRUE, firstnotsecond = TRUE, secondnotfirst = TRUE, display_intersection = FALSE, display_union = FALSE, display_values = TRUE) { - x_name <- deparse(substitute(x)) - y_name <- deparse(substitute(y)) - if(use_unique) { - x <- unique(x) - y <- unique(y) - } - if(sort_values) { - x <- sort(x) - y <- sort(y) - } - equal <- setequal(x, y) - cat(paste0("Columns contain all the same values: ", equal, "\n \n")) - if(equal) { - if(display_values) cat(paste0("Values: ", paste0("'", x, "'", collapse = ", "), "\n \n")) - } - if(!equal) { - cat("First column:", x_name, "\n \n") - cat("Second column:", y_name, "\n \n") - if(firstnotsecond) { - cat("Values in first not in second: ") - setd <- dplyr::setdiff(x, y) - if(length(setd) != 0) cat(paste0("'", setd, "'", collapse = ", ")) - cat("\n \n") - } - if(secondnotfirst) { - cat("Values in second not in first: ") - setd <- dplyr::setdiff(y, x) - if(length(setd) != 0) cat(paste0("'", setd, "'", collapse = ", ")) - cat("\n \n") - } - if(display_intersection) { - cat("Intersection (Values that appear in both columns):") - inter <- dplyr::intersect(x, y) - if(length(inter) != 0) cat(paste0("'", inter, "'", collapse = ", ")) - cat("\n \n") - } - if(display_union) cat(paste0("Union (Values that appear in either column): ", paste0("'", dplyr::union(x, y), "'", collapse = ", "))) - } -} - - -consecutive_sum <- function(x, initial_value = NA){ - out = x - for(i in 1:length(x)){ - if(!is.na(x[i])){ - if(x[i] != 0){ - if(i > 1){ - out[i]=x[i] + out[i-1] - } else{ - out[i] = x[i] + initial_value - } - } - } - } - return(out) -} - -max_consecutive_sum <- function(x){ - max(consecutive_sum(x, initial_value = 0)) -} - -hashed_id <- function(x, salt, algo = "crc32") { - if (missing(salt)){ - y <- x - }else{ - y <- paste(x, salt) - } - y <- sapply(y, function(X) digest::digest(X, algo = algo)) - as.character(y) - -} - -# Possible alternative but is slower: -# spells <- function(z) { -# Reduce(function(x,y) {y = dplyr::if_else(y == 0, 0, x + 1)}, z[-1], -# init = dplyr::if_else(z[1] == 0, 0, NA_real_), accumulate = TRUE) -# } -.spells <- function(x, initial_value = NA_real_) { - y <- mat.or.vec(length(x), 1) - if(length(x) > 0) { - y[1] <- dplyr::if_else(x[1] == 0, 0, initial_value + 1) - if(length(x) > 1) { - for(i in 2:length(x)) { - y[i] <- dplyr::if_else(x[i] == 0, 0, y[i-1] + 1) - } - } - } - return(y) -} - -convert_to_dec_deg <- function (dd, mm = 0 , ss = 0, dir) { - if(missing(dd)) stop("dd must be supplied") - if(!missing(dir)) { - dir <- toupper(dir) - if(!all(na.omit(dir) %in% c("E", "W", "N", "S"))) stop("dir must only contain direction letters E, W, N or S") - if(any(na.omit(dd) < 0)) stop("dd must be positive if dir is supplied") - } - if(!all(mm >= 0 & mm <= 60, na.rm = TRUE)) stop("mm must be between 0 and 60") - if(!all(ss >= 0 & ss <= 60, na.rm = TRUE)) stop("ss must be between 0 and 60") - sgn <- ifelse(is.na(dir), NA, ifelse(dir %in% c("S", "W"), -1, 1)) - decdeg <- (dd + ((mm * 60) + ss)/3600) * sgn - return(decdeg) -} - -convert_yy_to_yyyy <- function (x, base) { - if(missing(base)) stop("base year must be supplied") - dplyr::if_else(x+2000 <= base, x+2000, x+1900) -} - -create_av_packs <- function() { - av_packs <<- available.packages(repos = "https://cran.rstudio.com/") - av_packs <<- data.frame(av_packs) -} - -package_check <- function(package) { - out <- c() - if (!pingr::is_online()) out[[1]] <- "5" - else{ - if(!exists("av_packs")) { - create_av_packs() - } - #CHECK the Package is a CRAN package - if (package %in% av_packs$Package){ - #PACKAGE IS INSTALLED - if (package %in% rownames(installed.packages())){ - out[[1]] <- "1" - v_machine <- as.character(packageVersion(package)) - v_web <- as.character(av_packs[av_packs$Package == package, "Version"]) - out[[2]] <- compareVersion(v_machine, v_web) - out[[3]] <- v_machine - out[[4]] <- v_web - } - else out[[1]] <- "2" - } - else{ - #PACKAGE IS INSTALLED BUT NOT IN THE CRAN REPO - if (package %in% rownames(installed.packages())) out[[1]] <- "3" - #PACKAGE IS NOT INSTALLED AND NOT IN THE CRAN REPO - else out[[1]] <- "4" - } - - } - return(out) -} - -in_top_n <- function(x, n = 10, wt, fun = sum) { - dat <- data.frame(x = x) - if(!missing(wt)) { - dat$wt <- wt - dat <- dat %>% - dplyr::group_by(x) %>% - dplyr::summarise(fq = as.function(fun)(na.omit(wt))) %>% dplyr::arrange(-fq) - } - else dat <- dat %>% dplyr::count(x, sort = TRUE, name = "fq") - return(x %in% dat$x[1:n]) -} - -summary_sample <- function(x, size, replace = FALSE){ - if(length(x)==0){return(NA)} - else if(length(x)==1){return(x)} - else{sample(x = x, size = size, replace = replace)} -} - -add_xy_area_range <- function(path, min_lon, max_lon, min_lat, max_lat, dim_x = "X", dim_y = "Y") { - paste0( - path, "/", dim_x, "/", - "(", ifelse(min_lon < 0, paste0(abs(min_lon), "W"), paste0(min_lon, "E")), ")", "/", - "(", ifelse(max_lon < 0, paste0(abs(max_lon), "W"), paste0(max_lon, "E")), ")", "/", - "RANGEEDGES", "/", - dim_y, "/", - "(", ifelse(min_lat < 0, paste0(abs(min_lat), "S"), paste0(min_lat, "N")), ")", "/", - "(", ifelse(max_lat < 0, paste0(abs(max_lat), "S"), paste0(max_lat, "N")), ")", "/", - "RANGEEDGES", "/" - ) -} - -add_xy_point_range <- function(path, min_lon, min_lat, dim_x = "X", dim_y = "Y") { - paste0( - path, "/", dim_x, "/", - "(", ifelse(min_lon < 0, paste0(abs(min_lon), "W"), paste0(min_lon, "E")), ")", "/", - "VALUES", "/", - dim_y, "/", - "(", ifelse(min_lat < 0, paste0(abs(min_lat), "S"), paste0(min_lat, "N")), ")", "/", - "VALUES", "/" - ) -} - -add_t_range <- function(path, min_date, max_date, dim_t = "T") { - paste0( - path, dim_t, "/", - "(", lubridate::day(min_date), "%20", lubridate::month(min_date, label = TRUE), - "%20", lubridate::year(min_date), ")", "/", - "(", lubridate::day(max_date), "%20", lubridate::month(max_date, label = TRUE), - "%20", lubridate::year(max_date), ")", "/", - "RANGEEDGES", "/" - ) -} - -add_nc <- function(path) { - paste0(path, "data.nc") -} - -fourier_series <- function(x, n, period) { - p2 <- "2 * pi" - h <- seq_len(n) - paste0("sin(", x, " * ", h, " * ", p2, " / ", period, ")", " + ", - "cos(", x, " * ", h, " * ", p2, " / ", period, ")", - collapse = " + ") -} - - -climatic_missing <- function(data, date, elements = ..., stations, - start = TRUE, end = FALSE){ - - - if (missing(date)){ - stop('argument "date" is missing, with no default') - } - - if (missing(elements)){ - stop('argument "elements" is missing, with no default') - } - - # stack data - data.stack <- data %>% - tidyr::pivot_longer(cols = c({{ elements }}), - names_to = "Element", - values_to = "value") - - # sort start/end times - - # set start date - if (start){ - data.stack <- data.stack %>% - dplyr::group_by({{ stations }}, Element) %>% - dplyr::mutate(start = ({{ date }})[which.min(is.na( value ))]) - - }else{ - data.stack <- data.stack %>% - dplyr::group_by({{ stations }}) %>% - dplyr::mutate(start = dplyr::first( {{ date }} )) - } - - # set end date - if (end){ - data.stack <- data.stack %>% - dplyr::group_by({{ stations }}, Element ) %>% - dplyr::mutate(end = ({{ date }} )[dplyr::last(which(!is.na( value )))]) - }else{ - data.stack <- data.stack %>% - dplyr::group_by({{ stations }} ) %>% - dplyr::mutate(end = dplyr::last({{ date }})) - } - - # number and percentage missing - summary.data <- data.stack %>% - dplyr::group_by({{ stations }}, Element) %>% - dplyr::filter(({{ date }}) >= start & ({{ date }}) <= end) %>% - dplyr::summarise(From = dplyr::first(start), - To = dplyr::last(end), - Missing = sum(is.na(value)), - `%` = round(sum(is.na(value))/n()*100, 1)) - - # complete years - complete.years <- data.stack %>% - dplyr::group_by({{ stations }}) %>% - dplyr::filter(({{ date }}) >= start & ({{ date }}) <= end) %>% - dplyr::group_by(lubridate::year({{ date }}), {{ stations }}, Element) %>% - dplyr::summarise(count = sum(is.na(value))) - complete.years <- complete.years %>% - dplyr::group_by({{ stations }}, Element) %>% - dplyr::summarise(Full_Years = sum(count == 0)) - - - # bind together - summary.data <- merge(summary.data, complete.years) - - if (missing(stations)){ - summary.data$stations <- NULL - } - - return(summary.data) -} - - - -climatic_details <- function(data, date, elements = ..., stations, - order = TRUE, - day = FALSE, - month = TRUE, - year = FALSE, level = FALSE){ - - - if (missing(date)){ - stop('argument "date" is missing, with no default') - } - - if (missing(elements)){ - stop('argument "elements" is missing, with no default') - } - - i <- 0 - list_tables <- NULL - - # stack data - data.stack <- data %>% - tidyr::pivot_longer(cols = c({{ elements }}), - names_to = "Element", - values_to = "Value") %>% - dplyr::mutate(Element = make_factor(Element)) - - # sort start/end times - - if (!any(day, month, year)){ - warning('At least one of day, month, year need to be selected') - } - - if (day){ - i = i + 1 - detail.table.day = data.stack %>% - dplyr::group_by({{ stations }}, Element) %>% - dplyr::mutate(element.na = data.table::rleid(Value)) %>% - dplyr::filter(is.na(Value)) %>% - dplyr::group_by(element.na, {{ stations }}, Element) %>% - dplyr::summarise(From = dplyr::first({{ date }}), - To = dplyr::last({{ date }}), - Count = dplyr::n()) %>% - mutate(Level = "Day") - - if (order){ - detail.table.day <- detail.table.day %>% dplyr::arrange(From) - } else { - detail.table.day <- detail.table.day %>% dplyr::arrange(Element) - } - - detail.table.day <- detail.table.day %>% dplyr::ungroup() %>% dplyr::select(-c("element.na")) - list_tables[[i]] <- detail.table.day - - } - - if (month){ - i = i + 1 - detail.table.month <- data.stack %>% - dplyr::mutate(Date.ym = zoo::as.yearmon({{ date }})) %>% - dplyr::group_by(Date.ym, {{ stations }}, Element) - - detail.table.month <- detail.table.month %>% - dplyr::summarise(no = n(), - na = sum(is.na(Value)), - From = dplyr::first({{ date }}), - To = dplyr::last({{ date }})) %>% - dplyr::mutate(is.complete = ifelse(no == na, 1, 0)) # 0 if all are missing - - detail.table.month <- detail.table.month %>% - dplyr::group_by({{ stations }}, Element) %>% - dplyr::mutate(element.na = data.table::rleid(is.complete)) %>% - dplyr::filter(is.complete == 1) %>% - dplyr::group_by(element.na, {{ stations }}, Element) %>% - dplyr::summarise(From = dplyr::first(From), - To = dplyr::last(To), - Count = n()) %>% - mutate(Level = "Month") - - if (order){ - detail.table.month <- detail.table.month %>% dplyr::arrange(From) - } else { - detail.table.month <- detail.table.month %>% dplyr::arrange(Element) - } - - detail.table.month <- detail.table.month %>% dplyr::ungroup() %>% dplyr::select(-c("element.na")) - list_tables[[i]] <- detail.table.month - } - - if (year) { - i = i + 1 - detail.table.year <- data.stack %>% - dplyr::mutate(Date.y = lubridate::year({{ date }})) %>% - dplyr::group_by(Date.y, {{ stations }}, Element) - - detail.table.year <- detail.table.year %>% - dplyr::summarise(no = n(), - na = sum(is.na(Value)), - From = dplyr::first({{ date }}), - To = dplyr::last({{ date }})) %>% - dplyr::mutate(is.complete = ifelse(no == na, 1, 0)) # 0 if all are missing - - detail.table.year <- detail.table.year %>% - dplyr::group_by({{ stations }}, Element) %>% - dplyr::mutate(element.na = data.table::rleid(is.complete)) %>% - dplyr::filter(is.complete == 1) %>% - dplyr::group_by(element.na, {{ stations }}, Element) %>% - dplyr::summarise(From = dplyr::first(From), - To = dplyr::last(To), - Count = n()) %>% - mutate(Level = "Year") - - if (order){ - detail.table.year <- detail.table.year %>% dplyr::arrange(From) - } else { - detail.table.year <- detail.table.year %>% dplyr::arrange(Element) - } - - detail.table.year <- detail.table.year %>% dplyr::ungroup() %>% dplyr::select(-c("element.na")) - list_tables[[i]] <- detail.table.year - } - - detail.table.all <- plyr::ldply(list_tables, data.frame) %>% - dplyr::mutate(Level = make_factor(Level)) - - return(detail.table.all) - -} - -slope <- function(y, x) { - x <- as.numeric(x) - lm(y ~ x)$coefficients[2] - -} - -# make_factor is intended to be somewhat equivalent to forcats::as_factor() or base::as.factor(). -# It provides default behaviour for converting to factor depending on the data type, similar to forcats::as_factor(). -# For "character" and "numeric" types make_factor is consistent with forcats::as_factor() in terms of the order of the factor levels. -# It differs from forcats::as_factor() in two main ways: -# 1. It includes an ordered parameter to allow for creating ordered factors, including converting a factor to an ordered factor (and vice versa). -# 2. It works for any data types (e.g. Dates) whereas forcats::as_factor() is limited to "factor", "character", "logical", "numeric". -# For any other data types, levels are given in order of appearance (the same as for "character"). -# Note that this should be used cautiously for other data types and the default behaviour may not be the most sensible. -# If anything other than this default behaviour is required, use factor(). -make_factor <- function(x, ordered = is.ordered(x)) { - if (is.factor(x)) { - if (ordered != is.ordered(x)) { - if (ordered) class(x) <- c("ordered", class(x)) - else class(x) <- class(x)[class(x) != "ordered"] - } - x - } else if (is.numeric(x)) { - factor(x, ordered = ordered) - } else if (is.logical(x)) { - factor(x, levels = c("FALSE", "TRUE"), ordered = ordered) - } else if (is.character(x)) { - factor(x, levels = unique(x), ordered = ordered) - } else { - factor(x, levels = as.character(unique(x)), ordered = ordered) - } -} - - - -# wwr_export function is meant to reshape data into formats required by WMO for submission of climatic data -# this gives Yearly data records with monthly and annual data for a particular year: -wwr_export <- function(data, year, month, mean_station_pressure, mean_sea_level_pressure, - mean_temp, total_precip, mean_max_temp, mean_min_temp, mean_rel_hum, link, link_by, - station_data, wmo_number, latitude, longitude, country_name, station_name, - height_station, height_barometer, wigos_identifier, folder) { - - stopifnot(link_by %in% c("wmo_number", "station_name")) - if (any(nchar(station_data[[year]]) != 4)) stop("year must be a 4 digit number.") - if (!missing(wmo_number)) { - # Convert to character to avoid incorrect - if (is.factor(station_data[[wmo_number]])) station_data[[wmo_number]] <- as.character(station_data[[wmo_number]]) - if (any(is.na(as.numeric(station_data[[wmo_number]])))) stop("wmo_number must not contain missing values and must be a number.") - if (any(nchar(as.character(station_data[[wmo_number]])) > 5, na.rm = TRUE)) stop("wmo_number must be no more than 5 digits.") - } - - if (link_by == "wmo_number") { - station_link <- wmo_number - } else station_link <- station_name - if (!all(unique(data[[link]]) %in% station_data[[station_link]])) { - stop("station_data is missing information for the following stations - found in the data:", - paste(which(!unique(data[[link]]) %in% station_data[[wmo_number]]), collapse = ", ")) - } - if (!missing(wmo_number)) { - station_data[[wmo_number]] <- as.numeric(station_data[[wmo_number]]) - station_data[[wmo_number]] <- ifelse(is.na(station_data[[wmo_number]]), - "", sprintf("%05d", station_data[[wmo_number]])) - } else { - wmo_number <- ".wmo_number" - station_data[[wmo_number]] <- "" - } - station_data[[latitude]] <- dd_to_dms(station_data[[latitude]], lat = TRUE) - station_data[[longitude]] <- dd_to_dms(station_data[[longitude]], lat = FALSE) - if (!missing(height_station)) { - station_data[[height_station]] <- ifelse(is.na(station_data[[height_station]]), - "", round(station_data[[height_station]])) - } else { - height_station <- ".height_station" - station_data[[height_station]] <- "" - } - if (!missing(height_barometer)) { - station_data[[height_barometer]] <- ifelse(is.na(station_data[[height_barometer]]), - "", round(station_data[[height_barometer]], 1)) - } else { - height_barometer <- ".height_barometer" - station_data[[height_barometer]] <- "" - } - if (missing(wigos_identifier)) { - wigos_identifier <- ".wigos_identifier" - station_data[[wigos_identifier]] <- "" - } - if (!missing(mean_station_pressure)) { - df_2_means <- data %>% - dplyr::group_by(!!! rlang::syms(c(link, year))) %>% - dplyr::summarise(mean = sprintf("%6s", round(summary_mean(.data[[mean_station_pressure]], na.rm = TRUE), 1)), .groups = "keep") - data[[mean_station_pressure]] <- ifelse(is.na(data[[mean_station_pressure]]), - "", round(data[[mean_station_pressure]], 1)) - data[[mean_station_pressure]] <- sprintf("%6s", data[[mean_station_pressure]]) - df_2 <- data %>% tidyr::pivot_wider(id_cols = tidyselect::all_of(c(link, year)), - names_from = tidyselect::all_of(month), - values_from = tidyselect::all_of(mean_station_pressure), - values_fill = strrep(" ", 6)) - } - if (!missing(mean_sea_level_pressure)) { - df_3_means <- data %>% - dplyr::group_by(!!! rlang::syms(c(link, year))) %>% - dplyr::summarise(mean = sprintf("%6s", round(summary_mean(.data[[mean_sea_level_pressure]], na.rm = TRUE), 1)), .groups = "keep") - data[[mean_sea_level_pressure]] <- ifelse(is.na(data[[mean_sea_level_pressure]]), - "", round(data[[mean_sea_level_pressure]], 1)) - data[[mean_sea_level_pressure]] <- sprintf("%6s", data[[mean_sea_level_pressure]]) - df_3 <- data %>% tidyr::pivot_wider(id_cols = tidyselect::all_of(c(link, year)), - names_from = tidyselect::all_of(month), - values_from = tidyselect::all_of(mean_sea_level_pressure), - values_fill = strrep(" ", 6)) - } - if (!missing(mean_temp)) { - df_4_means <- data %>% - dplyr::group_by(!!! rlang::syms(c(link, year))) %>% - dplyr::summarise(mean = sprintf("%6s", round(summary_mean(.data[[mean_temp]], na.rm = TRUE), 1)), .groups = "keep") - data[[mean_temp]] <- ifelse(is.na(data[[mean_temp]]), - "", round(data[[mean_temp]], 1)) - data[[mean_temp]] <- sprintf("%6s", data[[mean_temp]]) - df_4 <- data %>% tidyr::pivot_wider(id_cols = tidyselect::all_of(c(link, year)), - names_from = tidyselect::all_of(month), - values_from = tidyselect::all_of(mean_temp), - values_fill = strrep(" ", 6)) - } - if (!missing(total_precip)) { - df_5_means <- data %>% - dplyr::group_by(!!! rlang::syms(c(link, year))) %>% - dplyr::summarise(mean = sprintf("%6s", format(summary_sum(.data[[total_precip]], na.rm = TRUE), digits = 1, nsmall = 1)), .groups = "keep") - data[[total_precip]] <- ifelse(is.na(data[[total_precip]]), - "", ifelse(data[[total_precip]] <= 0.05, 0, format(data[[total_precip]], digits = 1, nsmall = 1))) - data[[total_precip]] <- sprintf("%6s", data[[total_precip]]) - df_5 <- data %>% tidyr::pivot_wider(id_cols = tidyselect::all_of(c(link, year)), - names_from = tidyselect::all_of(month), - values_from = tidyselect::all_of(total_precip), - values_fill = strrep(" ", 6)) - } - if (!missing(mean_max_temp)) { - df_6_means <- data %>% - group_by(!!! rlang::syms(c(link, year))) %>% - dplyr::summarise(mean = sprintf("%6s", round(summary_mean(.data[[mean_max_temp]], na.rm = TRUE), 1)), .groups = "keep") - data[[mean_max_temp]] <- ifelse(is.na(data[[mean_max_temp]]), - "", round(data[[mean_max_temp]], 1)) - data[[mean_max_temp]] <- sprintf("%6s", data[[mean_max_temp]]) - df_6 <- data %>% tidyr::pivot_wider(id_cols = tidyselect::all_of(c(link, year)), - names_from = tidyselect::all_of(month), - values_from = tidyselect::all_of(mean_max_temp), - values_fill = strrep(" ", 6)) - } - if (!missing(mean_min_temp)) { - df_7_means <- data %>% - dplyr::group_by(!!! rlang::syms(c(link, year))) %>% - dplyr::summarise(mean = sprintf("%6s", round(summary_mean(.data[[mean_min_temp]], na.rm = TRUE), 1)), .groups = "keep") - data[[mean_min_temp]] <- ifelse(is.na(data[[mean_min_temp]]), - "", round(data[[mean_min_temp]], 1)) - data[[mean_min_temp]] <- sprintf("%6s", data[[mean_min_temp]]) - df_7 <- data %>% tidyr::pivot_wider(id_cols = tidyselect::all_of(c(link, year)), - names_from = tidyselect::all_of(month), - values_from = tidyselect::all_of(mean_min_temp), - values_fill = strrep(" ", 6)) - } - if (!missing(mean_rel_hum)) { - if (any(data[[mean_rel_hum]] < 0 | data[[mean_rel_hum]] > 100, na.rm = TRUE)) stop("Mean Relative Humidity must be a percentage between 0 and 100.") - df_8_means <- data %>% - dplyr::group_by(!!! rlang::syms(c(link, year))) %>% - dplyr::summarise(mean = sprintf("%6s", round(summary_mean(.data[[mean_rel_hum]], na.rm = TRUE), 0)), .groups = "keep") - data[[mean_rel_hum]] <- ifelse(is.na(data[[mean_rel_hum]]), - "", round(data[[mean_rel_hum]], 1)) - data[[mean_rel_hum]] <- sprintf("%6s", data[[mean_rel_hum]]) - df_8 <- data %>% tidyr::pivot_wider(id_cols = tidyselect::all_of(c(link, year)), - names_from = tidyselect::all_of(month), - values_from = tidyselect::all_of(mean_rel_hum), - values_fill = strrep(" ", 6)) - } - - month_header <- paste0("Year", " ", paste(c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", - "Aug", "Sep", "Oct", "Nov", "Dec", "ANNUAL"), - collapse = strrep(" ", 4))) - for (i in seq_along(station_data[[station_link]])) { - # filter data for single station - curr_df <- data %>% dplyr::filter(.data[[link]] == station_data[[station_link]][i]) - # lines to be written to txt file - lines <- c() - # add header lines - lines <- append(lines, paste0("WMO Number:", strrep(" ", 28), - station_data[[wmo_number]][i])) - lines <- append(lines, paste0("Station Name:", strrep(" ", 26), - station_data[[station_name]][i])) - lines <- append(lines, paste0("Country Name:", strrep(" ", 26), - station_data[[country_name]][i])) - lines <- append(lines, paste0("Latitude (DD MM SS N/S):", strrep(" ", 15), - station_data[[latitude]][i])) - lines <- append(lines, paste0("Longitude (DDD MM SS E/W):", strrep(" ", 13), - station_data[[longitude]][i])) - lines <- append(lines, paste0("Station Height (whole meters):", strrep(" ", 9), - station_data[[height_station]][i])) - lines <- append(lines, paste0("Barometer Height (meters, to tenths):", strrep(" ", 2), - station_data[[height_barometer]][i])) - lines <- append(lines, paste0("WIGOS Station Identifier (WSI):", strrep(" ", 8), - station_data[[wigos_identifier]][i])) - if (!missing(mean_station_pressure)) { - lines <- append(lines, "") - lines <- append(lines, "(2) Mean Station Pressure (precision to tenths of hPa)") - lines <- append(lines, "") - lines <- append(lines, month_header) - lines <- append(lines, "") - df_2_tmp <- df_2 %>% dplyr::filter(.data[[link]] == station_data[[station_link]][i]) - df_2_mean_tmp <- df_2_means - vals <- apply(df_2_tmp, 1, function(r) paste0(r[2:14], collapse = " ")) - vals <- paste(vals, df_2_means %>% - dplyr::filter(.data[[link]] == station_data[[station_link]][i]) %>% - dplyr::pull(mean)) - lines <- append(lines, vals) - } - if (!missing(mean_sea_level_pressure)) { - lines <- append(lines, "") - lines <- append(lines, "(3) Mean Sea Level Pressure (precision to tenths of hPa)") - lines <- append(lines, "") - lines <- append(lines, month_header) - lines <- append(lines, "") - df_3_tmp <- df_3 %>% dplyr::filter(.data[[link]] == station_data[[station_link]][i]) - df_3_mean_tmp <- df_3_means - vals <- apply(df_3_tmp, 1, function(r) paste0(r[2:14], collapse = " ")) - vals <- paste(vals, df_3_means %>% - dplyr::filter(.data[[link]] == station_data[[station_link]][i]) %>% - dplyr::pull(mean)) - lines <- append(lines, vals) - } - if (!missing(mean_temp)) { - lines <- append(lines, "") - lines <- append(lines, "(4) Mean Daily Air Temperature (precision to tenths of degrees Celsius)") - lines <- append(lines, "") - lines <- append(lines, month_header) - lines <- append(lines, "") - df_4_tmp <- df_4 %>% dplyr::filter(.data[[link]] == station_data[[station_link]][i]) - df_4_mean_tmp <- df_4_means - vals <- apply(df_4_tmp, 1, function(r) paste0(r[2:14], collapse = " ")) - vals <- paste(vals, df_4_means %>% - dplyr::filter(.data[[link]] == station_data[[station_link]][i]) %>% - dplyr::pull(mean)) - lines <- append(lines, vals) - } - if (!missing(total_precip)) { - lines <- append(lines, "") - lines <- append(lines, "(5) Total Precipitation (precision to tenths of mm)") - lines <- append(lines, "") - lines <- append(lines, month_header) - lines <- append(lines, "") - df_5_tmp <- df_5 %>% dplyr::filter(.data[[link]] == station_data[[station_link]][i]) - df_5_mean_tmp <- df_5_means - vals <- apply(df_5_tmp, 1, function(r) paste0(r[2:14], collapse = " ")) - vals <- paste(vals, df_5_means %>% - dplyr::filter(.data[[link]] == station_data[[station_link]][i]) %>% - dplyr::pull(mean)) - lines <- append(lines, vals) - } - if (!missing(mean_max_temp)) { - lines <- append(lines, "") - lines <- append(lines, "(6) Mean Daily Maximum Air Temperature (precision to tenths of degree Celsius)") - lines <- append(lines, "") - lines <- append(lines, month_header) - lines <- append(lines, "") - df_6_tmp <- df_6 %>% dplyr::filter(.data[[link]] == station_data[[station_link]][i]) - df_6_mean_tmp <- df_6_means - vals <- apply(df_6_tmp, 1, function(r) paste0(r[2:14], collapse = " ")) - vals <- paste(vals, df_6_means %>% - dplyr::filter(.data[[link]] == station_data[[station_link]][i]) %>% - dplyr::pull(mean)) - lines <- append(lines, vals) - } - if (!missing(mean_min_temp)) { - lines <- append(lines, "") - lines <- append(lines, "(7) Mean Daily Minimum Air Temperature (precision to tenths of degree Celsius)") - lines <- append(lines, "") - lines <- append(lines, month_header) - lines <- append(lines, "") - df_7_tmp <- df_7 %>% dplyr::filter(.data[[link]] == station_data[[station_link]][i]) - df_7_mean_tmp <- df_7_means - vals <- apply(df_7_tmp, 1, function(r) paste0(r[2:14], collapse = " ")) - vals <- paste(vals, df_7_means %>% - dplyr::filter(.data[[link]] == station_data[[station_link]][i]) %>% - dplyr::pull(mean)) - lines <- append(lines, vals) - } - if (!missing(mean_rel_hum)) { - lines <- append(lines, "") - lines <- append(lines, "(8) Mean of the Daily Relative Humidity (whole percent)") - lines <- append(lines, "") - lines <- append(lines, month_header) - lines <- append(lines, "") - df_8_tmp <- df_8 %>% dplyr::filter(.data[[link]] == station_data[[station_link]][i]) - df_8_mean_tmp <- df_8_means - vals <- apply(df_8_tmp, 1, function(r) paste0(r[2:14], collapse = " ")) - vals <- paste(vals, df_8_means %>% - dplyr::filter(.data[[link]] == station_data[[station_link]][i]) %>% - dplyr::pull(mean)) - lines <- append(lines, vals) - } - writeLines(lines, paste0(folder, "/", station_data[[station_link]][i], "-", format(Sys.time(), "%Y%m%d_%H%M%S"), ".txt")) - } - cat(i, "file(s) created at:", folder) -} - -dd_to_dms <- function(x, lat) { - if (lat) dir <- ifelse(x >= 0, "N", "S") - else dir <- ifelse(x >= 0, "E", "W") - x <- abs(x) - d <- trunc(x) - m <- trunc((x - d) * 60) - s <- round((x - d - m/60) * 3600) - return(paste(sprintf(ifelse(lat, "%02d", "%03d"), d), sprintf("%02d", m), sprintf("%02d", s), dir)) -} - -plot_mrl <- function(data, station_name, element_name, umin, umax, ncol = 1, - xlab = "Threshold", ylab = "Mean excess", fill = "red", - col = "black", rug = TRUE, addNexcesses = TRUE, textsize = 4) { - if (!missing(station_name)) { - plts <- list() - station_col <- data[, station_name] - stations <- unique(station_col) - for (i in seq_along(stations)) { - d <- data[station_col == stations[i], ] - element_col <- d[, element_name] - if (missing(umin)) { - umin <- min(element_col, na.rm = TRUE) - } - if (missing(umax)) { - umax <- max(element_col, na.rm = TRUE) - } - plts[[i]] <- texmex::mrl(na.exclude(element_col), umin = umin, umax = umax) %>% - ggplot2::ggplot(xlab = xlab, ylab = ylab, main = stations[i], fill = fill, - col = col, rug = rug, addNexcesses = addNexcesses, textsize = textsize - ) - } - patchwork::wrap_plots(plts, ncol = ncol) - } - else { - element_col <- data[, element_name] - if (missing(umin)) { - umin <- min(element_col, na.rm = TRUE) - } - if (missing(umax)) { - umax <- max(element_col, na.rm = TRUE) - } - texmex::mrl(data = na.exclude(element_col), umin = umin, umax = umax) %>% - ggplot2::ggplot(xlab = xlab, ylab = ylab, fill = fill, col = col, rug = rug, - addNexcesses = addNexcesses, textsize = textsize - ) - } -} - - ### Constants month_abb_english <- c("Jan","Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec") month_name_english <- c("January", "February", "March", "April", "May", "June", "July", - "August", "September", "October", "November", "December") - -# factored out code for a multiple indices for a single station. -# Called by climdex(). -# Not intended to be used externally. -climdex_single_station <- function(ci, freq = "annual", indices, year, month, - spells.can.span.years = FALSE, gsl.mode = gsl.mode, - threshold = 1) { - stopifnot(freq %in% c("annual", "monthly")) - if (freq == "monthly" && missing(month)) stop("month is required for freq = 'monthly'.") - if (missing(indices)) stop("No indices specified.") - for (i in seq_along(indices)) { - vals <- switch(indices[i], - "fd" = climdex.pcic::climdex.fd(ci), - "su" = climdex.pcic::climdex.su(ci), - "id" = climdex.pcic::climdex.id(ci), - "tr" = climdex.pcic::climdex.tr(ci), - "wsdi" = climdex.pcic::climdex.wsdi(ci, spells.can.span.years = spells.can.span.years), - "csdi" = climdex.pcic::climdex.csdi(ci, spells.can.span.years = spells.can.span.years), - "gsl" = climdex.pcic::climdex.gsl(ci, gsl.mode = gsl.mode), - "txx" = climdex.pcic::climdex.txx(ci, freq = freq), - "txn" = climdex.pcic::climdex.txn(ci, freq = freq), - "tnx" = climdex.pcic::climdex.tnx(ci, freq = freq), - "tnn" = climdex.pcic::climdex.tnn(ci, freq = freq), - "tn10p" = climdex.pcic::climdex.tn10p(ci, freq = freq), - "tx10p" = climdex.pcic::climdex.tx10p(ci, freq = freq), - "tn90p" = climdex.pcic::climdex.tn90p(ci, freq = freq), - "tx90p" = climdex.pcic::climdex.tx90p(ci, freq = freq), - "dtr" = climdex.pcic::climdex.dtr(ci, freq = freq), - "rx1day" = climdex.pcic::climdex.rx1day(ci, freq = freq), - "rx5day" = climdex.pcic::climdex.rx5day(ci, freq = freq), - "sdii" = climdex.pcic::climdex.sdii(ci), - "r10mm" = climdex.pcic::climdex.r10mm(ci), - "r20mm" = climdex.pcic::climdex.r20mm(ci), - "rnnmm" = climdex.pcic::climdex.rnnmm(ci, threshold = threshold), - "cdd" = climdex.pcic::climdex.cdd(ci, spells.can.span.years = spells.can.span.years), - "cwd" = climdex.pcic::climdex.cwd(ci, spells.can.span.years = spells.can.span.years), - "r95ptot" = climdex.pcic::climdex.r95ptot(ci), - "r99ptot" = climdex.pcic::climdex.r99ptot(ci), - "prcptot" = climdex.pcic::climdex.prcptot(ci), - stop("index name ", indices[i], " not recognised.") - ) - if (i == 1) { - if (freq == "annual") { - df_ind <- data.frame(names(vals), unname(vals)) - names(df_ind) <- c(year, indices[i]) - } else { - df_ind <- data.frame(stringr::str_split_fixed(string = names(vals), n = 2, pattern = "-"), vals, row.names = NULL) - names(df_ind) <- c(year, month, indices[i]) - df_ind[[month]] <- as.numeric(df_ind[[month]]) - } - } - else { - df_ind[[indices[i]]] <- unname(vals) - } - if (indices[[i]] == "rnnmm") names(df_ind)[ncol(df_ind)] <- paste(indices[[i]], threshold, sep = "_") - } - return(df_ind) -} - -climdex <- function(data, station, date, year, month, prec, tmax, tmin, indices, freq = "annual", - base.range = c(1961, 1990), n = 5, northern.hemisphere = TRUE, - quantiles = NULL, temp.qtiles = c(0.1, 0.9), - prec.qtiles = c(0.95, 0.99), max.missing.days = c(annual = 15, monthly = 3), - min.base.data.fraction.present = 0.1, spells.can.span.years = FALSE, - gsl.mode = "GSL", threshold = 1) { - stopifnot(freq %in% c("annual", "monthly")) - if (freq == "monthly" && missing(month)) stop("month is required for freq = 'monthly'.") - - # All indices can be calculated annually. Only some have monthly versions as well. - year_only_indices <- c("fd", "su", "id", "tr", "wsdi", "csdi", "gsl", "sdii", "r10mm", - "r20mm", "rnnmm", "cdd", "cwd", "r95ptot", "r99ptot", "prcptot") - if (freq == "monthly" && any(indices %in% year_only_indices)) stop("Some indices selected are not available on a monthly frequency.") - - # climdex.pcic only calculates for a single station at a time so need to do individually then dplyr::bind_rows() together. - if (!missing(station)) { - stations <- unique(data[[station]]) - # If data[[station]] is a factor this ensure stations are in factor order (and drops levels that don't appear in the data). - if (is.factor(data[[station]])) stations <- intersect(levels(data[[station]]), stations) - df_list <- vector(mode = "list", length = length(stations)) - for (s in seq_along(stations)) { - df_station <- data %>% dplyr::filter(.data[[station]] == stations[s]) - ci <- climdex.pcic::climdexInput.raw(prec = df_station[[prec]], tmax = df_station[[tmax]], tmin = df_station[[tmin]], - base.range = base.range, northern.hemisphere = northern.hemisphere, - temp.qtiles = temp.qtiles, prec.qtiles = prec.qtiles, - max.missing.days = max.missing.days, - min.base.data.fraction.present = min.base.data.fraction.present, - tmax.dates = PCICt::as.PCICt(x = as.character(df_station[[date]]), cal="gregorian"), - tmin.dates = PCICt::as.PCICt(x = as.character(df_station[[date]]), cal="gregorian"), - prec.dates = PCICt::as.PCICt(x = as.character(df_station[[date]]), cal="gregorian")) - df_list[[stations[s]]] <- climdex_single_station(ci = ci, freq = freq, indices = indices, year = year, month = month, - spells.can.span.years = spells.can.span.years, gsl.mode = gsl.mode, - threshold = threshold) - } - df_out <- dplyr::bind_rows(df_list, .id = station) - } - else { - ci <- climdex.pcic::climdexInput.raw(prec = data[[prec]], tmax = data[[tmax]], tmin = data[[tmin]], - base.range = c(1961, 1990), northern.hemisphere = TRUE, - temp.qtiles = c(0.1, 0.9), prec.qtiles = c(0.95, 0.99), - max.missing.days = c(annual = 15, monthly = 3), - min.base.data.fraction.present=0.1, - tmax.dates = PCICt::as.PCICt(x = as.character(data[[date]]), cal="gregorian"), - tmin.dates = PCICt::as.PCICt(x = as.character(data[[date]]), cal="gregorian"), - prec.dates = PCICt::as.PCICt(x = as.character(data[[date]]), cal="gregorian")) - df_out <- climdex_single_station(ci = ci, freq = freq, indices = indices, year = year, month = month, gsl.mode = gsl.mode) - } - # Make the type of the year/month column(s) the same in the output as in data. - if (!missing(station)) { - # TODO This is done in several places and should be extracted as a function. - if (is.numeric(data[[station]])) df_out[[station]] <- as.numeric(df_out[[station]]) - else if (is.factor(data[[station]])) df_out[[station]] <- make_factor(df_out[[station]]) - else if (is.character(data[[station]])) df_out[[station]] <- as.character(df_out[[station]]) - else warning("Cannot recognise the class of station column. Link between data frames may be unstable.") - } - if (is.numeric(data[[year]])) df_out[[year]] <- as.numeric(df_out[[year]]) - else if (is.factor(data[[year]])) df_out[[year]] <- make_factor(df_out[[year]]) - else if (is.character(data[[year]])) df_out[[year]] <- as.character(df_out[[year]]) - if (freq == "monthly") { - if (is.numeric(data[[month]])) df_out[[month]] <- as.numeric(df_out[[month]]) - else if (is.factor(data[[month]])) { - lvs <- levels(data[[month]]) - if (length(lvs) == 12) df_out[[month]] <- factor(df_out[[month]], labels = lvs, ordered = is.ordered(data[[month]])) - else { - warning("month is a factor but does not have 12 levels. Output may not link correctly to data.") - df_out[[month]] <- make_factor(df_out[[month]]) - } - } - else if (is.character(data[[month]])) { - mns <- unique(data[[month]]) - # Also check English names as month.abb and month.name are locale dependent. - if (length(mns) == 12) { - if (setequal(mns, month.abb)) df_out[[month]] <- month.abb[df_out[[month]]] - else if (setequal(mns, month.name)) df_out[[month]] <- month.name[df_out[[month]]] - else if (setequal(mns, month_name_english)) df_out[[month]] <- month_abb_english[df_out[[month]]] - else if (setequal(mns, month_name_english)) df_out[[month]] <- month_name_english[df_out[[month]]] - else if (setequal(mns, tolower(month_name_english))) df_out[[month]] <- tolower(month_abb_english)[df_out[[month]]] - else if (setequal(mns, tolower(month_name_english))) df_out[[month]] <- tolower(month_name_english)[df_out[[month]]] - else if (setequal(mns, toupper(month_name_english))) df_out[[month]] <- toupper(month_abb_english)[df_out[[month]]] - else if (setequal(mns, toupper(month_name_english))) df_out[[month]] <- toupper(month_name_english)[df_out[[month]]] - else warning("Cannot determine format of month column in data. Output may not link correctly to data.") - } else { - warning("month does not have 12 unique values. Output may not link correctly to data.") - df_out[[month]] <- as.character(df_out[[month]]) - } - } - } - return(df_out) -} - -spei_input <- function(data, station, year, month, element) { - if (missing(station)) id_cols <- c(year, month) else id_cols <- c(station, year, month) - # SPEI package assumes data is ordered so must be sorted - data_sort <- data %>% dplyr::arrange(!!! rlang::syms(id_cols)) - data <- data_sort - # There should be a better way to check this. - if (!all(data == data_sort, na.rm = TRUE)) stop("data must be sorted by (", paste(id_cols, collapse = ", "), ") for SPEI/SPI to be calculated correctly.") - # Monthly data i.e. one value per month (per station) is required - if (anyDuplicated(data %>% dplyr::select(!!! rlang::syms(id_cols)))) stop("Multiple values per month detected. SPEI/SPI requires monthly data.") - if (!missing(station)) { - for (s in unique(data[[station]])) { - df <- data %>% dplyr::filter(.data[[station]] == s) - dates_seq <- seq.Date(from = as.Date(paste(df[[year]][1], as.numeric(df[[month]][1]), 1), format = "%Y %m %d"), - to = as.Date(paste(tail(df[[year]], 1), tail(as.numeric(df[[month]]), 1), 1), format = "%Y %m %d"), - by = "1 month") - if (length(dates_seq) != nrow(df)) stop("Less rows than expected. data has gaps for missing months in '", s, "'. SPEI/SPI requires no date gaps.") - } - } else { - dates_seq <- seq.Date(from = as.Date(paste(data[[year]][1], as.numeric(data[[month]][1]), 1), format = "%Y %m %d"), - to = as.Date(paste(tail(data[[year]], 1), tail(as.numeric(data[[month]]), 1), 1), format = "%Y %m %d"), - by = "1 month") - if (length(dates_seq) != nrow(data)) stop("Less rows than expected. data has gaps for missing months. SPEI/SPI requires no date gaps.") - } - cols <- c(id_cols, element) - start <- c(data[[year]][1], data[[month]][1]) - # If multiple stations, needs to be in "wide" format for SPEI - if (!missing(station)) { - ts_data <- tidyr::pivot_wider(data, id_cols = tidyselect::all_of(c(year, month)), - names_from = tidyselect::all_of(station), values_from = tidyselect::all_of(element), - values_fill = NA) - ts_data <- ts_data %>% dplyr::arrange(!!! rlang::syms(c(year, month))) - # Not sure how to do this using dplyr::select - ts_data[id_cols] <- NULL - ts_data <- ts(as.matrix(ts_data), frequency = 12, start = start) - } else { - ts_data <- ts(as.matrix(data[[element]]), frequency = 12, start = start) - } - ts_data -} - -# This function extracts the SPEI/SPI column from an spei object x. -# It requires the original data in order to return a vector of the correct length by removing NA values introduced when unstacking. -# An alternative to this is to have a single wrapper SPEI/SPI function to handle this. -# The advantage of this method is that it doesn't hide the call to SPEI/SPI in R-Instat and is compatible with the existing dialog. -# The wrapper function may be a prefered long-term solution. -spei_output <- function(x, data, station, year, month) { - if (! inherits(x, "spei")) stop("x must be an object of class 'spei'") - vals <- x$fitted - # If is.mts then multiple stations. Need to unstack and merge to ensure correct values obtained. - if (is.mts(vals)) { - df_spei <- as.data.frame(vals) - # ind will be the year in fractions - df_spei$ind <- zoo::index(x$fitted) - # Stack all stations to get back into tidy format. - df_spei <- tidyr::pivot_longer(df_spei, cols = 1:ncol(vals)) - # Integer part is year - df_spei$yy <- trunc(df_spei$ind) - # Remainder is fraction of month. Use round to ensure exact integers for merging. - df_spei$mm <- ((df_spei$ind - df_spei$yy) * 12) + 1 - df_spei$mm <- round(df_spei$mm) - if (!(is.numeric(data[[month]]) | is.factor(data[[month]]))) stop("month must be numeric or factor to ensure SPEI/SPI values are calculated correctly.") - # If factor, this assumes levels are in correct order. - data[[month]] <- as.numeric(data[[month]]) - by <- c("name", "yy", "mm") - names(by) <- c(station, year, month) - # Need to merge to know which NA values are true and which were introduced when unstacking. - df_new <- dplyr::left_join(data, df_spei, by = by) - col <- df_new$value - } else { - # If single station, then no extra missing values were introduced. Data just needs to be made into a vector. - col <- as.vector(vals) - } - col - -} - -# This function has been adapted from extRemes::threshrange.plot(). -# It has been adapted for use in R-Instat and uses ggplot2 graphical system rather than base plot(). -threshold_Plot <- function(x, r, type = c("GP", "PP", "Exponential"), nint = 10, - alpha = 0.05, na.action = na.omit, xlb = "", main = NULL , verbose = FALSE, - ...) { - type <- match.arg(type) - x <- na.action(x) - n <- length(x) - lst_plots <- list() - if (missing(r)) { - r <- quantile(x, probs = c(0.75, 0.99)) - } - u.i <- matrix(seq(from = r[1], to = r[2],length.out = nint), ncol = 1) - thfun <- function(u, x, type, a, verbose, ...) { - fit <- try(extRemes::fevd( - x = x, threshold = u, type = type, verbose = verbose, - ... - ), silent = verbose) - if (verbose) { - print(fit) - } - if (all(class(fit) != "try-error")) { - if (!is.element(type, c("PP", "Exponential"))) { - res <- try(distillery::ci(fit, - type = "parameter", alpha = a, - R = 100, tscale = TRUE, ... - ), silent = verbose) - } else { - res <- try(distillery::ci(fit, - type = "parameter", alpha = a, - R = 100, ... - ), silent = verbose) - } - if (verbose) { - print(res) - } - } - else { - res <- fit - } - if (any(class(res) == "try-error")) { - if (type == "PP") { - res <- matrix(NA, 3, 3) - } else if (type != "Exponential") { - res <- matrix(NA, 2, 3) - } else { - res <- rep(NA, 3) - } - } - return(res) - } - out <- apply(u.i, 1, thfun, - x = x, type = type, a = alpha, - verbose = verbose, ... - ) - if (type == "PP") { - rownames(out) <- c( - "low.loc", "low.scale", "low.shape", - "location", "scale", "shape", "up.loc", "up.scale", - "up.shape" - ) - } else if (type != "Exponential") { - rownames(out) <- c( - "low.t.scale", "low.shape", "t.scale", - "shape", "up.t.scale", "up.shape" - ) - } else { - rownames(out) <- c("low.scale", "scale", "up.scale") - } - m1 <- deparse(match.call()) - if (type == "PP") { - yl <- range(c(out[c("low.loc", "location", "up.loc"), ]), finite = TRUE) - lst_plots[[1]] <- ggplot2::qplot( - x = u.i, y = out["location", ], ylim = yl, xlab = xlb, - ylab = "location", geom = c("line", "point"), main = main - ) + - ggplot2::geom_pointrange(mapping = ggplot2::aes(ymin = out["low.loc", ], ymax = out["up.loc", ])) - yl <- range(c(out[c("low.scale", "scale", "up.scale"), ]), finite = TRUE) - lst_plots[[2]] <- ggplot2::qplot( - x = u.i, y = out["scale", ], ylim = yl, xlab = xlb, ylab = "scale", - geom = c("point", "line") - ) + - ggplot2::geom_pointrange(mapping = ggplot2::aes(ymin = out["low.scale", ], ymax = out["up.scale", ])) - yl <- range(c(out[c("low.shape", "shape", "up.shape"), ]), finite = TRUE) - lst_plots[[3]] <- ggplot2::qplot(u.i, out["shape", ], - ylim = yl, xlab = "Threshold", - ylab = "shape", geom = c("point", "line") - ) + - ggplot2::geom_pointrange(mapping = ggplot2::aes(ymin = out["low.shape", ], ymax = out["up.shape", ])) - } - else if (type != "Exponential") { - yl <- range(c(out[c("low.t.scale", "t.scale", "up.t.scale"), ]), finite = TRUE) - lst_plots[[1]] <- ggplot2::qplot( - x = u.i, y = out["t.scale", ], ylim = yl, xlab = xlb, ylab = "reparameterized scale", - geom = c("point", "line"), main = main) + - ggplot2::geom_pointrange(mapping = ggplot2::aes(ymin = out["low.t.scale", ], ymax = out["up.t.scale", ])) - yl <- range(c(out[c("low.shape", "shape", "up.shape"), ]), finite = TRUE) - lst_plots[[2]] <- ggplot2::qplot( - x = u.i, y = out["shape", ], ylim = yl, xlab = "Threshold", - ylab = "shape", geom = c("point", "line") - ) + - ggplot2::geom_pointrange(mapping = ggplot2::aes(ymin = out["low.shape", ], ymax = out["up.shape", ])) - } - else { - yl <- range(c(out[c("low.scale", "scale", "up.scale"), - ]), finite = TRUE) - return(ggplot2::qplot( - x = u.i, y = out["scale", ], ylim = yl, xlab = "Threshold", - ylab = "scale", geom = c("point", "line"), main = main - ) + - ggplot2::geom_pointrange(mapping = ggplot2::aes(ymin = out["low.scale", ], ymax = out["up.scale", ]))) - } - patchwork::wrap_plots(lst_plots, ncol = 1) -} - -# This function produces multiple threshold plots for various stations at a time. -plot_multiple_threshold <- function(data, station_col_name, element_col_name, r, type = c("GP", "PP", "Exponential"), nint = 10, - alpha = 0.05, ncol = 1, xlb = "", main = NULL , verbose = FALSE,...) { - if (!missing(station_col_name)) { - plts <- list() - station_col <- data[, station_col_name] - stations <- unique(station_col) - for (i in seq_along(stations)) { - d <- data[station_col == stations[i], ] - element_col <- d[, element_col_name] - plts[[i]] <- threshold_Plot(x = element_col, main = stations[i], r = r, type = type, nint = nint, alpha = alpha, verbose = verbose) - } - patchwork::wrap_plots(plts, ncol = ncol) - } - else { - element_col <- data[, element_col_name] - threshold_Plot(x = element_col, r = r, type = type, nint = nint, alpha = alpha, verbose = verbose) - } -} - - -plot_declustered <- function(data, station_col_name, element_col_name, threshold, r = NULL, xlab = NULL, ylab = NULL, ncol = 1, print_summary = FALSE) { - if (!missing(station_col_name)) { - plts <- list() - station_col <- data[, station_col_name] - stations <- unique(station_col) - for (i in seq_along(stations)) { - station <- stations[i] - d <- data[station_col == station, ] - obj <- texmex::declust(y = na.exclude(d[, element_col_name]), r = r, threshold = threshold) - if (print_summary) { - cat("Station:", paste0("", station, ""), "\n \n") - cat("Threshold", obj$threshold, "\n") - cat("Declustering using the intervals method, run length", obj$r, "\n") - cat("Identified", obj$nClusters, "clusters.", "\n") - cat("------------------------------------------------------", "\n \n") - } else { - plts[[i]] <- obj %>% - ggplot2::ggplot(xlab = xlab, ylab = ylab, main = stations[i]) - } - } - if (!print_summary) { - patchwork::wrap_plots(plts, ncol = ncol) - } - } - else { - obj <- texmex::declust(y = na.exclude(data[, element_col_name]), r = r, threshold = threshold) - if (print_summary) { - cat("Threshold", obj$threshold, "\n") - cat("Declustering using the intervals method, run length", obj$r, "\n") - cat("Identified", obj$nClusters, "clusters.", "\n") - } else { - obj %>% ggplot2::ggplot(xlab = xlab, ylab = ylab) - } - } -} - -#This function creates a wrapper around functions from openair package -other_rose_plots <- function(data, type1_col_name, type2_col_name, date_col_name, wd_col_name, ws_col_name, main_method, single_pollutant, multiple_pollutant, ...) { - type <- "default" - if (!missing(type1_col_name) && !missing(type2_col_name)) { - type <- c(type1_col_name, type2_col_name) - } - if (missing(type1_col_name) && !missing(type2_col_name)) { - type <- type2_col_name - } - if (!missing(type1_col_name) && missing(type2_col_name)) { - type <- type1_col_name - } - if (!main_method %in% c("percentile_rose", "polar_plot", "polar_annulus", "polar_cluster", "polar_frequency")) stop("Method must be either percentile_rose, polar_plot, polar_annulus, polar_cluster or polar_frequency.") - if (missing(data)) stop("Data is missing with no default.") - col_names <- colnames(data) - fun_col_names <- c(date_col_name, wd_col_name) - if (!all(fun_col_names %in% col_names)) stop(paste(fun_col_names[!fun_col_names %in% col_names], ","), " column(s) missing in the dataframe.") - if (!"date" %in% col_names) data <- dplyr::rename(data, date = !!date_col_name) - if (!"wd" %in% col_names) data <- dplyr::rename(data, wd = !!wd_col_name) - if (main_method == "percentile_rose") { - if (!"ws" %in% col_names) data <- dplyr::rename(data, ws = !!ws_col_name) - openair::percentileRose(mydata = data, type = type, pollutant = multiple_pollutant, ...) - } else if (main_method == "polar_plot") { - openair::polarPlot(mydata = data, type = type, pollutant = multiple_pollutant, ...) - } else if (main_method == "polar_annulus") { - if (!"ws" %in% col_names) data <- dplyr::rename(data, ws = !!ws_col_name) - openair::polarAnnulus(mydata = data, type = type, pollutant = multiple_pollutant, ...) - } else if (main_method == "polar_cluster") { - openair::polarCluster(mydata = data, type = type, pollutant = single_pollutant, ...) - } else if (main_method == "polar_frequency") { - if (!"ws" %in% col_names) data <- dplyr::rename(data, ws = !!ws_col_name) - openair::polarFreq(mydata = data, type = type, pollutant = single_pollutant, ...) - } -} - -#This function creates a wrapper around windRose and pollutionRose functions from openair package -wind_pollution_rose <- function(mydata, date_name, pollutant, type1_col_name, type2_col_name, ...) { - type <- "default" - if (!missing(type1_col_name) && !missing(type2_col_name)) { - type <- c(type1_col_name, type2_col_name) - } - if (missing(type1_col_name) && !missing(type2_col_name)) { - type <- type2_col_name - } - if (!missing(type1_col_name) && missing(type2_col_name)) { - type <- type1_col_name - } - if (!("date" %in% colnames(mydata))) { - mydata <- dplyr::rename(mydata, date = !!date_name) - } - if (missing(pollutant)) { - openair::windRose(mydata = mydata, type = type, ...) - } else { - openair::pollutionRose(mydata = mydata, type = type, pollutant, ...) - } -} - -n_non_numeric <- function(x) { - x <- as.character(x) - sum(is.na(x) != is.na(suppressWarnings(as.numeric(x)))) -} - -# This function creates a wrapper around grDevices::recordPlot() to enable non-ggplot graphs to be saved as recorded_plot objects. -# It also handles graphics devices carefully. -record_graph <- function(x) { - # store current device, which could be png - d <- dev.cur() - # create a new device, to ensure graph is displayed in device that recordPlot() can capture - dev.new() - # store the new device, so it can be turned off later - d2 <- dev.cur() - # Display graph to be captured by recordPlot() - x - y <- grDevices::recordPlot() - # set device back to current, to ensure code after this run correctly - dev.set(d) - # turn off the new graphics device - dev.off(which = d2) - return(y) -} -# this is a "theme" essentially. So we can create it as a theme and add that -slopegraph_theme <- function(x_text_size = 12){ - list(scale_x_discrete(position = "top"), - ggplot2::theme(legend.position = "none"), - ggplot2::theme(axis.text.y = ggplot2::element_blank()), - ggplot2::theme(panel.border = ggplot2::element_blank()), - ggplot2::theme(panel.grid.major.y = ggplot2::element_blank()), - ggplot2::theme(panel.grid.minor.y = ggplot2::element_blank()), - ggplot2::theme(axis.title.x = ggplot2::element_blank()), - ggplot2::theme(panel.grid.major.x = ggplot2::element_blank()), - ggplot2::theme(axis.text.x.top = ggplot2::element_text(size = x_text_size, face = "bold")), - ggplot2::theme(axis.ticks = ggplot2::element_blank())) -} - - -# slightly amended the "newggslopegraph" function in the CGPfunctions package -slopegraph <- function(data, x, y, colour, data_label = NULL, - y_text_size = 3, - line_thickness = 1, line_colour = "ByGroup", - data_text_size = 2.5, data_text_colour = "black", data_label_padding = 0.05, - data_label_line_size = 0, data_label_fill_colour = "white", - reverse_x_axis = FALSE, - remove_missing = TRUE){ - - - if (length(match.call()) <= 4) { - stop("Not enough arguments passed requires a dataframe, plus at least three variables") - } - argList <- as.list(match.call()[-1]) - if (!hasArg(data)) { - stop("You didn't specify a dataframe to use", call. = FALSE) - } - Nx <- deparse(substitute(x)) - Ny <- deparse(substitute(y)) - Ncolour <- deparse(substitute(colour)) - if (is.null(argList$data_label)) { - Ndata_label <- deparse(substitute(y)) - data_label <- argList$y - } - else { - Ndata_label <- deparse(substitute(data_label)) - } - Ndata <- argList$data - if (!is(data, "data.frame")) { - stop(paste0("'", Ndata, "' does not appear to be a data frame")) - } - if (!Nx %in% names(data)) { - stop(paste0("'", Nx, "' is not the name of a variable in the dataframe"), - call. = FALSE) - } - if (anyNA(data[[Nx]])) { - stop(paste0("'", Nx, "' can not have missing data please remove those rows"), - call. = FALSE) - } - if (!Ny %in% names(data)) { - stop(paste0("'", Ny, "' is not the name of a variable in the dataframe"), - call. = FALSE) - } - if (!Ncolour %in% names(data)) { - stop(paste0("'", Ncolour, "' is not the name of a variable in the dataframe"), - call. = FALSE) - } - if (!Ndata_label %in% names(data)) { - stop(paste0("'", Ndata_label, "' is not the name of a variable in the dataframe"), - call. = FALSE) - } - if (anyNA(data[[Ncolour]])) { - stop(paste0("'", Ncolour, "' can not have missing data please remove those rows"), - call. = FALSE) - } - if (!class(data[[Ny]]) %in% c("integer", "numeric")) { - stop(paste0("Variable '", - Ny, "' needs to be numeric"), call. = FALSE) - } - if (!"ordered" %in% class(data[[Nx]])) { - if (!"character" %in% class(data[[Nx]])) { - if ("factor" %in% class(data[[Nx]])) { - message(paste0("\nConverting '", Nx, - "' to an ordered factor\n")) - data[[Nx]] <- factor(data[[Nx]], - ordered = TRUE) - } - else { - stop(paste0("Variable '", - Nx, "' needs to be of class character, factor or ordered"), - call. = FALSE) - } - } - } - data_label <- enquo(data_label) - if (reverse_x_axis) { - data[[Nx]] <- forcats::fct_rev(data[[Nx]]) - } - NumbOfLevels <- nlevels(factor(data[[Nx]])) - if (length(line_colour) > 1) { - if (length(line_colour) < length(unique(data[[Ncolour]]))) { - message(paste0("\nGiven ", length(line_colour), - " colours. Recycling colours because there are ", - length(unique(data[[Ncolour]])), " ", - Ncolour, "s\n")) - line_colour <- rep(line_colour, length.out = length(unique(data[[Ncolour]]))) - } - LineGeom <- list(ggplot2::geom_line(ggplot2::aes(colour = {{colour}}), size = line_thickness), - scale_colour_manual(values = line_colour)) - } - else { - if (line_colour == "ByGroup") { - LineGeom <- list(ggplot2::geom_line(ggplot2::aes(colour = {{colour}}, - alpha = 1), size = line_thickness)) - } - else { - LineGeom <- list(ggplot2::geom_line(ggplot2::aes_(), size = line_thickness, - colour = line_colour)) - } - } - if (anyNA(data[[Ny]])) { - if (remove_missing) { - data <- data %>% group_by({{colour}}) %>% - dplyr::filter(!anyNA({{y}})) %>% droplevels() - } - else { - data <- data %>% dplyr::filter(!is.na({{y}})) - } - } - data %>% ggplot2::ggplot(ggplot2::aes(group = {{colour}}, y = {{y}}, x = {{x}})) + - LineGeom + - - # note: this may conflict with other label in R, in which case we need to rewrite this - ggrepel::geom_text_repel(data = . %>% dplyr::filter({{x}} == min({{x}})), ggplot2::aes(label = {{colour}}), - hjust = "left", box.padding = 0.1, point.padding = 0.1, - segment.colour = "gray", segment.alpha = 0.6, fontface = "bold", - size = y_text_size, nudge_x = -1.95, direction = "y", - force = 0.5, max.iter = 3000) + - ggrepel::geom_text_repel(data = . %>% dplyr::filter({{x}} == max({{x}})), ggplot2::aes(label = {{colour}}), - hjust = "right", box.padding = 0.1, point.padding = 0.1, - segment.colour = "gray", segment.alpha = 0.6, fontface = "bold", - size = y_text_size, nudge_x = 1.95, direction = "y", - force = 0.5, max.iter = 3000) + - ggplot2::geom_label(ggplot2::aes_string(label = Ndata_label), size = data_text_size, label.padding = unit(data_label_padding, "lines"), - label.size = data_label_line_size, colour = data_text_colour, fill = data_label_fill_colour) -} - -# Returns a three-letter string representing a specific quarter in a year (e.g. "JFM", "AMJ" etc.). -get_quarter_label <- function(quarter, start_month){ - if (!start_month %in% 1:12) stop(start_month, " is an invalid start month, must be in range of 1:12") - if (!all(quarter %in% 1:4)) stop(quarter, " is an invalid quarter, must be in range of 1:4") - mabb <- rep(substr(month.abb, 1, 1), times = 2)[start_month:(11 + start_month)] - qtr <- sapply(quarter, function(x){start_pos <- 1 + ((x-1) * 3) - paste(mabb[start_pos:(start_pos+2)], collapse = "")}) - return(factor(x = qtr, levels = unique(qtr))) -} - -is.containVariableLabel <- function(x){ - return(isTRUE(sjlabelled::get_label(x) != "")) -} - -is.emptyvariable <- function(x){ - return(isTRUE(length(x) == sum(x == ""))) -} - -is.NAvariable <- function(x){ - return(isTRUE(length(x) == sum(is.na(x)))) -} - -is.levelscount <- function(x, n){ - return(isTRUE(sum(levels(x)) == n)) -} - -is.containValueLabel <- function(x){ - return(labels_label %in% names(attributes(x))) -} - -is.containPartialValueLabel <- function(x) { - if(is.containValueLabel(x)) { - levelCounts <- table(x) - return(!all(x[!is.na(x)] %in% attr(x, labels_label)) && - sum(levelCounts == 0) == 0) - } - else{return(FALSE)} -} - -read_corpora <- function(data){ - data_all <- NULL - description <- NULL - # check different data types that are in the rcorpora package - # first check if it is a data frame outright. If it is, then we just need to return the data - if (is.data.frame(data)){ - return(data) - } - # If it isn't a data frame, we check each element of the `data` argument - data_unlist <- NULL - for (i in 1:length(data)){ - # first, check for description and metadata - if (!is.null(names(data[i])) && names(data[i]) == "description") { - description <- data[i][[1]] - } else if (!is.null(names(data[i])) && names(data[i]) == "meta"){ - data_unlist[[i]] <- NULL - # then check if the element is a vector, matrix, data frame, or list. - } else if (class(data[[i]]) %in% c("character", "factor", "logical", "numeric", "integer")){ - data_unlist[[i]] <- data.frame(list = data[[i]]) - } else if ("matrix" %in% class(data[[i]])){ - data_unlist[[i]] <- data.frame(list = do.call(paste, c(data.frame(data[[i]]), sep="-"))) - } else if (class(data[[i]]) == "data.frame"){ - data_unlist[[i]] <- data.frame(list = data[[i]]) - } else if (class(data[[i]]) == "list"){ - if (length(data[[i]]) == 0) { - data_unlist[[i]] <- data.frame(NA) - } else { - # unlist the list, to create a data frame with two elements: list name ("rowname") and value - # if there are nested lists, the "rowname" variable combines the different lists together. - # We want to separate these into separate variables to make the data more usable and readable. - # We do this by `str_split_fixed`, and `gsub`. - new_data <- tidyr::as_tibble(unlist(data), rownames = "rowname") - split <- stringr::str_split_fixed(string=new_data$rowname, pattern=stringr::coll(pattern="."), n=Inf) - split <- gsub("[0-9]$|[0-9][0-9]$","",split) - # add in the separated list to the value variable, and rename the variables - data_unlist[[i]] <- cbind(data.frame(split), value = new_data$value) - names(data_unlist[[i]]) <- c(paste0("variable", 1:(length(data_unlist[[i]])-1)), "list") - } # end of ifelse lists - } # end of list - } # end of for loop - names(data_unlist) <- names(data[1:length(data_unlist)]) - data_all <- plyr::ldply(data_unlist, .id = "variable1") - - if (!is.null(description)){ - return (data.frame(description = description, data_all)) - } - return (data.frame(data_all)) -} - - -# Bind two data frames -# and remove any duplicates from data frame x that are in data frame y -# x = our data to remove duplicates from -# y = data frame that contains variables in cols argument -# cols = columns to bind into data1 -cbind_unique <- function(x, y, cols){ - x <- x %>% dplyr::select(c(setdiff(names(x), cols))) - x <- dplyr::bind_cols(x = x, y = dplyr::select(y, tidyselect::all_of(cols))) -} - -#object is the object to be displayed -#object_format is the display format. If supplied, then returns file name of the object -#if not then it prints the object -view_object_data <- function(object, object_format = NULL) { - file_name <- "" - if (identical(object_format, "image")) { - file_name <- view_graph_object(object) - } else if (identical(object_format, "text")) { - file_name <- view_text_object(object) - } else if (identical(object_format, "html")) { - file_name <- view_html_object(object) - } else{ - print(object) - } - return(file_name) -} - -view_object <- function(data_book_object) { - return( - view_object_data( - object = data_book_object$object, - object_format = data_book_object$object_format - ) - ) -} - -#displays the graph object in the set R "viewer". -#if the viewer is not available then -#it saves the object as a file in the temporary folder -#and returns the file path. -view_graph_object <- function(graph_object){ - #get object class names - object_class_names <- class(graph_object) - - #if there is a viewer, like in the case of RStudio then just print the object - #this check is primarily meant to make this function work in a similar manner when run outside R-Instat - r_viewer <- base::getOption("viewer") - if (!is.null(r_viewer)) { - #TODO. When print command is called in R-Studio, a temp file is automatically created - #Investigate how that can be done in R-Instat - #as of 07/09/2022 just return the object. Important for RStudio to display the object - if ("grob" %in% object_class_names){ - #for grob objects draw them first - grid::grid.draw(graph_object) - } - return(graph_object) - } - - - #get a unique temporary file name from the tempdir path - file_name <- tempfile(pattern = "viewgraph", fileext = ".png") - - #save the object as a graph file depending on the object type - grDevices::png(file = file_name, width = 4000, height = 4000, res = 500) - if ("grob" %in% object_class_names) { - grid::grid.draw(graph_object) - }else{ - print(graph_object) - } - dev.off() #todo. use graphics.off() which one is better? - - - #todo. should we use respective package "convenience" functions to save the objects as image files depending on the class names? - #investigate if it will help with resolution and scaling? - # if ("ggplot" %in% object_class_names) { - # } else if ("ggmultiplot" %in% object_class_names) { - # } else if ("openair" %in% object_class_names) { - # } else if ("ggsurvplot" %in% object_class_names) { - # } else if ("recordedplot" %in% object_class_names) { - # } - - message("R viewer not detected. File saved in location ", file_name) - return(file_name) - -} - -#displays the object in the set R "viewer". -#if the viewer is not available then -#it saves the object as a file in the temporary folder -#and returns the file path. -view_text_object <- function(text_object){ - #if there is a viewer, like in the case of RStudio then just print the object - #this check is primarily meant to make this function work in a similar manner when run outside R-Instat - r_viewer <- base::getOption("viewer") - if (!is.null(r_viewer)) { - #TODO. When print command is called in R-Studio, a temp file is - #automatically created. Investigate how that can be done in R-Instat - #as of 07/09/2022 just return output. Important for RStudio to display the object - return(utils::capture.output(text_object)) - } - - - #get a unique temporary file name from the tempdir path - file_name <- tempfile(pattern = "viewtext", fileext = ".txt") - - #todo. should we use respective package "convenience" functions to save the objects as text files depending on the class names - #get object class names - #object_class_names <- class(text_object) - - #save the object as a text file - utils::capture.output(text_object, file = file_name) - - message("R viewer not detected. File saved in location ", file_name) - return(file_name) - -} - -view_html_object <- function(html_object) { - # Check if html_object is a list and has more than one element - if (is.list(html_object) && all(sapply(html_object, class) == class(html_object[[1]]))) { - file_names <- vector("list", length(html_object)) - for (i in seq_along(html_object)) { - # If html_object is a list with multiple elements of the same class, - # use a for loop to process each element - file_names[[i]] <- process_html_object(html_object[[i]]) - } - return(file_names) - } - - # Process the html_object - return(process_html_object(html_object)) -} - -#Function to process individual HTML object -#displays the html object in the set R "viewer". -#if the viewer is not available then -#it saves the object as a file in the temporary folder -#and returns the file path. -process_html_object <- function(html_object) { - #if there is a viewer, like in the case of RStudio then just print the object - #this check is primarily meant to make this function work in a similar manner when run outside R-Instat - r_viewer <- base::getOption("viewer") - if (!is.null(r_viewer)) { - #When print command is called in R-Studio, a temp file is - #automatically created. TODO. Investigate how that can be done in R-Instat. - #as of 07/09/2022 just return the object. Important for RStudio to display the object - return(html_object) - } - - # Get a unique temporary file name from the tempdir path - file_name <- tempfile(pattern = "viewhtml", fileext = ".html") - - # Get a vector of available class names - object_class_names <- class(html_object) - - # Save the object as an HTML file depending on the object type - if ("htmlwidget" %in% object_class_names) { - #Note. When selfcontained is set to True - #a "Saving a widget with selfcontained = TRUE requires pandoc" error is thrown in R-Instat - #when saving an rpivotTable - #TODO. Investigate how to solve it then. - htmlwidgets::saveWidget(html_object, file = file_name, selfcontained = FALSE) - } else if ("sjTable" %in% object_class_names) { - #"sjTable" objects are not compatible with "htmlwidgets" package. So they have to be saved differently - #"sjplot" package produces "sjTable" objects - html_object$file = file_name - #TODO. Is there any other function that can save an sjTable to a html file? - print(html_object) - } else if ("gt_tbl" %in% object_class_names) { - #"gt table" objects are not compatible with "htmlwidgets" package. So they have to be saved differently. - #"mmtable2" package produces "gt_tbl" objects - gt::gtsave(html_object, filename = file_name) - } - - message("R viewer not detected. File saved in location ", file_name) - return(file_name) -} - - - -#tries to recordPlot if graph_object = NULL, then returns graph object of class "recordedplot". -#applicable to base graphs only -check_graph <- function(graph_object){ - - out <- graph_object - - if (is.null(out)) { - out <- tryCatch({ - message("Recording plot") - recordPlot() - }, - error = function(cond) { - message("Graph object does not exist:") - message(cond) - # Choose a return value in case of error - return(NULL) - }, - warning = function(cond) { - message("Warning message:") - message(cond) - return(NULL) - }, - finally = { - message("Plot recorded") - }) - } - - return(out) -} - - -get_data_book_output_object_names <- function(output_object_list, - object_type_label = NULL, - excluded_items = c(), - as_list = FALSE, - list_label = NULL){ - - if(is.null(object_type_label)){ - out <- names(output_object_list) - }else{ - out <- names(output_object_list)[sapply(output_object_list, function(x) any( identical(x$object_type_label, object_type_label) ))] - } - - if(length(out) == 0){ - return(out) - } - - if(length(excluded_items) > 0) { - #get indices of items to exclude - excluded_indices <- which(out %in% excluded_items) - - #remove the excluded items from the list - if(length(excluded_indices) > 0){ - out <- out[-excluded_indices] - } - - } - - if(as_list) { - #convert the character vector list - lst <- list() - if(!is.null(list_label)){ - lst[[list_label]] <- out - }else{ - lst <- as.list(out) - } - - return(lst) - }else{ - #return as a character vector - return(out) - } - -} - -get_data_book_scalar_names <- function(scalar_list, - excluded_items = c(), - as_list = FALSE, - list_label = NULL){ - out = names(scalar_list) - if(length(excluded_items) > 0) { - ex_ind = which(out %in% excluded_items) - if(length(ex_ind) != length(excluded_items)) warning("Some of the excluded_items were not found in the list of calculations") - if(length(ex_ind) > 0) out = out[-ex_ind] - } - if(!as_list) { - return(out) - } - lst = list() - lst[[list_label]] <- out - return(lst) -} - -get_vignette <- function (package = NULL, lib.loc = NULL, all = TRUE) -{ - oneLink <- function(s) { - if (length(s) == 0L) - return(character(0L)) - title <- s[, "Title"] - if (port > 0L) - prefix <- sprintf("/library/%s/doc", pkg) - else prefix <- sprintf("file://%s/doc", s[, "Dir"]) - src <- s[, "File"] - pdf <- s[, "PDF"] - rcode <- s[, "R"] - pdfext <- sub("^.*\\.", "", pdf) - sprintf("
  • %s - \n %s \n %s \n %s \n
  • \n", - title, ifelse(nzchar(pdf), sprintf("%s ", - prefix, pdf, toupper(pdfext)), ""), sprintf("source ", - prefix, src), ifelse(nzchar(rcode), sprintf("R code ", - prefix, rcode), "")) - } - - port <- tools::startDynamicHelp(NA) - file <- tempfile("Rvig.", fileext = ".html") - print(file) - sink(file = file, type = "output") - vinfo <- tools::getVignetteInfo(package, lib.loc, all) - pkgs <- unique(vinfo[, "Package"]) - db <- lapply(pkgs, function(p) vinfo[vinfo[, "Package"] == - p, , drop = FALSE]) - names(db) <- pkgs - attr(db, "call") <- sys.call() - attr(db, "footer") <- if (all) - "" - else sprintf(gettext("Use %s \n to list the vignettes in all available packages."), - "browseVignettes(all = TRUE)") - if (port > 0L) - css_file <- "/doc/html/R.css" - else css_file <- file.path(R.home("doc"), "html", - "R.css") - cat(sprintf("\n\n\nR Vignettes\n\n\n\n
    \n", - css_file)) - - cat(sprintf("

    Vignettes found by %s

    ", - deparse1(attr(db, "call")))) - cat("
    ") - for (pkg in names(db)) { - cat(sprintf("

    Vignettes in package %s

    \n", - pkg)) - cat("\n") - } - cat("
    ") - sink() - if (port > 0L){ - return(sprintf("http://127.0.0.1:%d/session/%s", - port, basename(file)))} - else return(sprintf("file://%s", file)) -} - -# for issue 8342 - adding in a count of the number of elements that have missing values by period (and station) -cumulative_inventory <- function(data, station = NULL, from, to){ - if (is.null(station)){ - data <- data %>% - dplyr::group_by(.data[[from]], .data[[to]]) %>% - dplyr::mutate(cum=dplyr::n()) - data <- data %>% - dplyr::group_by(.data[[from]]) %>% - dplyr::mutate(cum1 = dplyr::n()) %>% - dplyr::mutate(cum1 = ifelse(cum == cum1, # are they all in the same period? - yes = cum, - no = ifelse(cum == max(cum), - cum, - max(cum) + 0.5))) - } else { - data <- data %>% - dplyr::group_by(.data[[station]], .data[[from]], .data[[to]]) %>% - dplyr::mutate(cum=dplyr::n()) - data <- data %>% - dplyr::group_by(.data[[station]], .data[[from]]) %>% - dplyr::mutate(cum1 = dplyr::n()) %>% - dplyr::mutate(cum1 = ifelse(cum == cum1, # are they all in the same period? - yes = cum, - no = ifelse(cum == max(cum), - cum, - max(cum) + 0.5))) - } - return(data) -} - -getRowHeadersWithText <- function(data, column, searchText, ignore_case, use_regex, match_entire_cell) { - if (use_regex) { - # Adjust the search text to match the entire cell if required - if (match_entire_cell) { - searchText <- paste0("^", searchText, "$") - } - # Find the rows that match the search text using regex - matchingRows <- stringr::str_detect(data[[column]], stringr::regex(searchText, ignore_case = ignore_case)) - } else if (is.na(searchText)) { - matchingRows <- apply(data[, column, drop = FALSE], 1, function(row) any(is.na(row))) - } else { - # Adjust the search text to match the entire cell if required - if (match_entire_cell) { - searchText <- paste0("^", searchText, "$") - } - # Find the rows that match the search text - matchingRows <- grepl(searchText, data[[column]], ignore.case = ignore_case, fixed = TRUE) - } - # Get the row headers where the search text is found - rowHeaders <- rownames(data)[matchingRows] - - # Return the row headers - return(rowHeaders) -} - -# Custom function to convert character to list of numeric vector -convert_to_list <- function(x) { - if (grepl("^c\\(", x)) { - x <- gsub("^c\\(|\\)$", "", x) # Remove 'c(' and ')' - return(as.numeric(unlist(strsplit(x, ",")))) - } else if (grepl(":", x)) { - x <- gsub(":", ",", x, fixed = TRUE) # Replace ':' with ',' - return(as.numeric(unlist(strsplit(x, ",")))) - } else { - return(as.numeric(x)) - } -} - -getExample <- function (topic, package = NULL, lib.loc = NULL, character.only = TRUE, give.lines = FALSE, local = FALSE, echo = TRUE, verbose = getOption("verbose"), setRNG = FALSE, ask = getOption("example.ask"), prompt.prefix = abbreviate(topic, 6), run.dontrun = FALSE, run.donttest = interactive()) { - if (!character.only) { - topic <- substitute(topic) - if (!is.character(topic)) - topic <- deparse(topic)[1L] - } - pkgpaths <- find.package(package, lib.loc, verbose = verbose) - file <- utils:::index.search(topic, pkgpaths, firstOnly = TRUE) - if (!length(file)) { - warning(gettextf("no help found for %s", sQuote(topic)), - domain = NA) - return(character()) - } - if (verbose) - cat("Found file =", sQuote(file), "\n") - packagePath <- dirname(dirname(file)) - pkgname <- basename(packagePath) - lib <- dirname(packagePath) - tf <- tempfile("Rex") - tools::Rd2ex(utils:::.getHelpFile(file), tf, commentDontrun = !run.dontrun, - commentDonttest = !run.donttest) - if (!file.exists(tf)) { - if (give.lines) - return(character()) - warning(gettextf("%s has a help file but no examples", - sQuote(topic)), domain = NA) - return(character()) - } - on.exit(unlink(tf)) - example_text <- readLines(tf) - example_text <- paste(example_text, collapse = "\n") - if (give.lines) { - return(example_text) - } - if (echo) { - cat(example_text) - } - return(example_text) -} - -WB_evaporation <- function(water_balance, frac, capacity, evaporation_value, rain){ - if (water_balance >= frac*capacity){ - evaporation <- evaporation_value - } else { - if (rain == 0){ - evaporation <- evaporation_value * ((water_balance)/(frac*capacity)) - } else { - if (water_balance < frac*capacity){ - if (rain > evaporation_value){ - evaporation <- evaporation_value - } else { - evaporation <- evaporation_value * ((water_balance)/(frac*capacity)) - evaporation <- evaporation + ((evaporation_value - evaporation)*(rain/evaporation_value)) - } - } else { - evaporation <- evaporation_value - } - } - } - return(evaporation) -} - -write_weather_data <- function(year, month, day, rain, mn_tmp, mx_tmp, missing_code, output_file) { - # Create a data frame with the provided inputs - weather_data <- data.frame(year = year, - month = month, - day = day, - rain = rain, - mn_tmp = mn_tmp, - mx_tmp = mx_tmp) - - # Replace missing values with the specified code - weather_data[is.na(weather_data)] <- missing_code - - # Write the data frame to a text file - write.table(weather_data, file = output_file, sep = "\t", row.names = FALSE, col.names = TRUE, quote = FALSE) - - cat("Weather data has been written to", output_file, "\n") -} - -prepare_walter_lieth <- function(data, month, tm_min, ta_min){ - dat_long_int <- NULL - for (j in seq(nrow(data) - 1)) { - intres <- NULL - for (i in seq_len(ncol(data))) { - if (is.character(data[j, i]) | is.factor(data[j, i])) { - val <- as.data.frame(data[j, i]) - } - else { - interpol <- approx(x = data[c(j, j + 1), "indrow"], - y = data[c(j, j + 1), i], - n = 50) - val <- as.data.frame(interpol$y) - } - names(val) <- names(data)[i] - intres <- dplyr::bind_cols(intres, val) - } - dat_long_int <- dplyr::bind_rows(dat_long_int, intres) - } - dat_long_int$interpolate <- TRUE - dat_long_int[[month]] <- "" - data$interpolate <- FALSE - dat_long_int <- dat_long_int[!dat_long_int$indrow %in% data$indrow, ] - dat_long_end <- dplyr::bind_rows(data, dat_long_int) - dat_long_end <- dat_long_end[order(dat_long_end$indrow), ] - dat_long_end <- dat_long_end[dat_long_end$indrow >= 0 & dat_long_end$indrow <= 12, ] - dat_long_end <- tibble::as_tibble(dat_long_end) - - getpolymax <- function(x, y, y_lim) { - initpoly <- FALSE - yres <- NULL - xres <- NULL - for (i in seq_len(length(y))) { - lastobs <- i == length(x) - if (y[i] > y_lim[i]) { - if (isFALSE(initpoly)) { - xres <- c(xres, x[i]) - yres <- c(yres, y_lim[i]) - initpoly <- TRUE - } - xres <- c(xres, x[i]) - yres <- c(yres, y[i]) - if (lastobs) { - xres <- c(xres, x[i], NA) - yres <- c(yres, y_lim[i], NA) - } - } - else { - if (initpoly) { - xres <- c(xres, x[i - 1], NA) - yres <- c(yres, y_lim[i - 1], NA) - initpoly <- FALSE - } - } - } - poly <- tibble::tibble(x = xres, y = yres) - return(poly) - } - getlines <- function(x, y, y_lim) { - yres <- NULL - xres <- NULL - ylim_res <- NULL - for (i in seq_len(length(y))) { - if (y[i] > y_lim[i]) { - xres <- c(xres, x[i]) - yres <- c(yres, y[i]) - ylim_res <- c(ylim_res, y_lim[i]) - } - } - line <- tibble::tibble(x = xres, y = yres, ylim_res = ylim_res) - return(line) - } - prep_max_poly <- getpolymax(x = dat_long_end$indrow, y = pmax(dat_long_end$pm_reesc, - 50), y_lim = rep(50, length(dat_long_end$indrow))) - tm_max_line <- getlines(x = dat_long_end$indrow, y = dat_long_end$tm, - y_lim = dat_long_end$pm_reesc) - pm_max_line <- getlines(x = dat_long_end$indrow, y = pmin(dat_long_end$pm_reesc, - 50), y_lim = dat_long_end$tm) - dat_real <- dat_long_end[dat_long_end$interpolate == FALSE, - c("indrow", ta_min)] - x <- NULL - y <- NULL - for (i in seq_len(nrow(dat_real))) { - if (dat_real[i, ][[ta_min]] < 0) { - x <- c(x, NA, rep(dat_real[i, ]$indrow - 0.5, 2), - rep(dat_real[i, ]$indrow + 0.5, 2), NA) - y <- c(y, NA, -3, 0, 0, -3, NA) - } - else { - x <- c(x, NA) - y <- c(y, NA) - } - } - probfreeze <- tibble::tibble(x = x, y = y) - rm(dat_real) - dat_real <- dat_long_end[dat_long_end$interpolate == FALSE, - c("indrow", tm_min)] - x <- NULL - y <- NULL - for (i in seq_len(nrow(dat_real))) { - if (dat_real[i, ][[tm_min]] < 0) { - x <- c(x, NA, rep(dat_real[i, ]$indrow - 0.5, 2), - rep(dat_real[i, ]$indrow + 0.5, 2), NA) - y <- c(y, NA, -3, 0, 0, -3, NA) - } - else { - x <- c(x, NA) - y <- c(y, NA) - } - } - surefreeze <- tibble::tibble(x = x, y = y) - return_list <- list(dat_long_end, - tm_max_line, - pm_max_line, - prep_max_poly, - probfreeze, - surefreeze) - names(return_list) <- c("dat_long_end", "tm_max_line", "pm_max_line", - "prep_max_poly", "prob_freeze", "surefreeze") - return(return_list) -} -ggwalter_lieth <- function (data, month, station = NULL, p_mes, tm_max, tm_min, ta_min, station_name = "", - alt = NA, per = NA, pcol = "#002F70", - tcol = "#ff0000", pfcol = "#9BAEE2", sfcol = "#3C6FC4", - shem = FALSE, p3line = FALSE, ...) - { - - # Preprocess data with vectorised operations - data <- data %>% - dplyr::mutate(tm = (.data[[tm_max]] + .data[[tm_min]]) / 2, - pm_reesc = dplyr::if_else(.data[[p_mes]] < 100, .data[[p_mes]] * 0.5, .data[[p_mes]] * 0.05 + 45), - p3line = .data[[p_mes]] / 3) %>% - dplyr::mutate(across(.data[[month]], ~ forcats::fct_expand(.data[[month]], ""))) %>% - dplyr::arrange(.data[[month]]) - # do this for each station, if we have a station - if (!is.null(station)){ - data <- data %>% group_by(!!sym(station)) - } - data <- data %>% - group_modify(~{ - # Add dummy rows at the beginning and end for each group - .x <- bind_rows(.x[nrow(.x), , drop = FALSE], .x, .x[1, , drop = FALSE]) - # Clear month value for the dummy rows - .x[c(1, nrow(.x)), which(names(.x) == data[[month]])] <- "" - # Add an index column for plotting or further transformations - .x <- cbind(indrow = seq(-0.5, 12.5, 1), .x) - .x - }) - - if (!is.null(station)){ - data <- data %>% ungroup() - } - data <- data.frame(data) - - # split by station - if (is.null(station)){ - data_list <- prepare_walter_lieth(data, month, tm_min, ta_min) - # data things - dat_long_end <- data_list$dat_long_end - tm_max_line <- data_list$tm_max_line - pm_max_line <- data_list$pm_max_line - prep_max_poly <- data_list$prep_max_poly - probfreeze <- data_list$prob_freeze - surefreeze <- data_list$surefreeze - } else { - results <- - map(.x = unique(data[[station]]), - .f = ~{filtered_data <- data %>% filter(!!sym(station) == .x) - prepare_walter_lieth(filtered_data, month, tm_min, ta_min)}) - # Function to bind rows for a specific sub-element across all main elements - n <- length(results) - m <- length(results[[1]]) - station_name <- unique(data[[station]]) - binds <- NULL - combined <- NULL - for (j in 1:m){ - for (i in 1:n) { # for each station data set - binds[[i]] <- results[[i]][[j]] %>% mutate(!!sym(station) := station_name[i]) - } - combined[[j]] <- do.call(rbind, binds) # Combine all the sub-elements row-wise - } - # data things - dat_long_end <- combined[[1]] - tm_max_line <- combined[[2]] - pm_max_line <- combined[[3]] - prep_max_poly <- combined[[4]] - probfreeze <- combined[[5]] - surefreeze <- combined[[6]] - } - - # data frame pretty things ------------------------------------------------------ - ticks <- data.frame(x = seq(0, 12), ymin = -3, ymax = 0) - month_breaks <- dat_long_end[dat_long_end[[month]] != "", ]$indrow - month_labs <- dat_long_end[dat_long_end[[month]] != "", ][[month]] - - ymax <- max(60, 10 * floor(max(dat_long_end$pm_reesc)/10) + 10) - ymin <- min(-3, min(dat_long_end$tm)) - range_tm <- seq(0, ymax, 10) - if (ymin < -3) { - ymin <- floor(ymin/10) * 10 - range_tm <- seq(ymin, ymax, 10) - } - templabs <- paste0(range_tm) - templabs[range_tm > 50] <- "" - range_prec <- range_tm * 2 - range_prec[range_tm > 50] <- range_tm[range_tm > 50] * 20 - 900 - preclabs <- paste0(range_prec) - preclabs[range_tm < 0] <- "" - - wandlplot <- ggplot2::ggplot() + ggplot2::geom_line(data = dat_long_end, - aes(x = .data$indrow, y = .data$pm_reesc), color = pcol) + - ggplot2::geom_line(data = dat_long_end, aes(x = .data$indrow, - y = .data$tm), color = tcol) - if (nrow(tm_max_line > 0)) { - wandlplot <- wandlplot + ggplot2::geom_segment(aes(x = .data$x, - y = .data$ylim_res, xend = .data$x, yend = .data$y), - data = tm_max_line, color = tcol, alpha = 0.2) - } - if (nrow(pm_max_line > 0)) { - wandlplot <- wandlplot + ggplot2::geom_segment(aes(x = .data$x, - y = .data$ylim_res, xend = .data$x, yend = .data$y), - data = pm_max_line, color = pcol, alpha = 0.2) - } - if (p3line) { - wandlplot <- wandlplot + ggplot2::geom_line(data = dat_long_end, - aes(x = .data$indrow, y = .data$p3line), color = pcol) - } - if (max(dat_long_end$pm_reesc) > 50) { - wandlplot <- wandlplot + ggplot2::geom_polygon(data = prep_max_poly, aes(x, y), - fill = pcol) - } - if (min(dat_long_end[[ta_min]]) < 0) { - wandlplot <- wandlplot + ggplot2::geom_polygon(data = probfreeze, aes(x = x, y = y), - fill = pfcol, colour = "black") - } - if (min(dat_long_end[[tm_min]]) < 0) { - wandlplot <- wandlplot + geom_polygon(data = surefreeze, aes(x = x, y = y), - fill = sfcol, colour = "black") - } - wandlplot <- wandlplot + geom_hline(yintercept = c(0, 50), - linewidth = 0.5) + - geom_segment(data = ticks, aes(x = x, xend = x, y = ymin, yend = ymax)) + - scale_x_continuous(breaks = month_breaks, name = "", labels = month_labs, expand = c(0, 0)) + - scale_y_continuous("C", limits = c(ymin, ymax), labels = templabs, - breaks = range_tm, sec.axis = dup_axis(name = "mm", labels = preclabs)) - wandlplot <- wandlplot + - ggplot2::theme_classic() + - ggplot2::theme(axis.line.x.bottom = element_blank(), - axis.title.y.left = element_text(angle = 0, - vjust = 0.9, size = 10, colour = tcol, - margin = unit(rep(10, 4), "pt")), - axis.text.x.bottom = element_text(size = 10), - axis.text.y.left = element_text(colour = tcol, size = 10), - axis.title.y.right = element_text(angle = 0, vjust = 0.9, - size = 10, colour = pcol, - margin = unit(rep(10, 4), "pt")), - axis.text.y.right = element_text(colour = pcol, size = 10)) - - if (!is.null(station)){ - wandlplot <- wandlplot + facet_wrap(station) - } - - return(wandlplot) -} - -# Function to check if a repo exists and is in R. Can give either owner and repo, or give url -check_github_repo <- function(owner = NULL, repo = NULL, url = NULL) { - if (!is.null(url)){ - # Extract the part after 'github.com' - url <- sub(".*github.com/", "", url) - - # Extract the correct parts - owner <- dirname(url) - repo <- basename(url) - } - # Check if the package is already installed - if (requireNamespace(repo, quietly = TRUE)) { - - # Get the installed package's remote SHA (if installed via GitHub) - local_sha <- packageDescription(repo)$GithubSHA1 - - if (!is.null(local_sha)) { - # Get the latest commit SHA from the GitHub repository - latest_commit <- tryCatch({ - response <- gh::gh("/repos/:owner/:repo/commits", owner = owner, repo = repo, .limit = 1) - response[[1]]$sha - }, error = function(e) { - # Handle error if GitHub API call fails - return(NULL) - }) - - if (!is.null(latest_commit)) { - if (local_sha == latest_commit) { - return(0) # Package is installed and up-to-date - } else { - return(1) # Package is installed but not the latest version - } - } else { - return(2) # Unable to retrieve the latest commit from GitHub - } - } else { - return(3) # Package is installed but not from GitHub - } - - # If not installed, check if the repository exists on GitHub - } else { - tryCatch({ - response <- gh::gh("/repos/:owner/:repo", owner = owner, repo = repo, verb = "GET", silent = TRUE) - if (response$language == "R") { - return(4) # Repository exists and is in the R language - } else { - return(5) # Repository exists, but isn't in the R language - } - }, error = function(e) { - return(6) # Error occurred, repository doesn't exist - }) - } -} -#Convert Decimal to Fractions -frac10 <- function(x) {paste0(round(x * 10), "/", 10)} #Give fraction our of 10 for a decimal value -frac20 <- function(x) {paste0(round(x * 20), "/", 20)} #Give fraction our of 20 for a decimal value -frac100 <- function(x) {paste0(round(x * 100), "/", 100)} # Give fraction our of 100 for a decimal value - -frac_den <- function(x, den) {paste0(round(x * den), "/", den)} # Give fraction for a given denominator - -# Monitor memory usage function -monitor_memory <- function() { - if (.Platform$OS.type == "windows") { - mem_used <- memory.size() - } #else { - # mem_used <- sum(gc()[, "used"]) / 1024 # Convert KB to MB on non-Windows systems - # } - return(mem_used) -} - -time_operation <- function(expr) { - timing <- system.time(expr) - print(timing) -} - -set_library_paths <- function(library_path) { - # Update the library paths - .libPaths(c(library_path, .libPaths())) - - # Check if there are more than 2 library paths - if (length(.libPaths()) > 2) { - # Get the current library paths - current_paths <- .libPaths() - - # Select valid indices (1 and 3) only if they exist - valid_indices <- c(1, 3)[c(1, 3) <= length(current_paths)] - - # Set the library paths to the valid ones - .libPaths(current_paths[valid_indices]) - } -} + "August", "September", "October", "November", "December") \ No newline at end of file diff --git a/instat/ucrCalculator.vb b/instat/ucrCalculator.vb index 9e67c8d0cf7..9006a01e089 100644 --- a/instat/ucrCalculator.vb +++ b/instat/ucrCalculator.vb @@ -6104,33 +6104,33 @@ Public Class ucrCalculator Private Sub cmdFrac10_Click(sender As Object, e As EventArgs) Handles cmdFrac10.Click If chkShowParameters.Checked Then - ucrReceiverForCalculation.AddToReceiverAtCursorPosition("frac10(x= )", 2) + ucrReceiverForCalculation.AddToReceiverAtCursorPosition("instatExtras::frac10(x= )", 2) Else - ucrReceiverForCalculation.AddToReceiverAtCursorPosition("frac10( )", 2) + ucrReceiverForCalculation.AddToReceiverAtCursorPosition("instatExtras::frac10( )", 2) End If End Sub Private Sub cmdFrac20_Click(sender As Object, e As EventArgs) Handles cmdFrac20.Click If chkShowParameters.Checked Then - ucrReceiverForCalculation.AddToReceiverAtCursorPosition("frac20(x= )", 2) + ucrReceiverForCalculation.AddToReceiverAtCursorPosition("instatExtras::frac20(x= )", 2) Else - ucrReceiverForCalculation.AddToReceiverAtCursorPosition("frac20( )", 2) + ucrReceiverForCalculation.AddToReceiverAtCursorPosition("instatExtras::frac20( )", 2) End If End Sub Private Sub cmdFrac100_Click(sender As Object, e As EventArgs) Handles cmdFrac100.Click If chkShowParameters.Checked Then - ucrReceiverForCalculation.AddToReceiverAtCursorPosition("frac100(x= )", 2) + ucrReceiverForCalculation.AddToReceiverAtCursorPosition("instatExtras::frac100(x= )", 2) Else - ucrReceiverForCalculation.AddToReceiverAtCursorPosition("frac100( )", 2) + ucrReceiverForCalculation.AddToReceiverAtCursorPosition("instatExtras::frac100( )", 2) End If End Sub Private Sub cmdFracDen_Click(sender As Object, e As EventArgs) Handles cmdFracDen.Click If chkShowParameters.Checked Then - ucrReceiverForCalculation.AddToReceiverAtCursorPosition("frac_den(x= ,den= )", 8) + ucrReceiverForCalculation.AddToReceiverAtCursorPosition("instatExtras::frac_den(x= ,den= )", 8) Else - ucrReceiverForCalculation.AddToReceiverAtCursorPosition("frac_den(, )", 3) + ucrReceiverForCalculation.AddToReceiverAtCursorPosition("instatExtras::frac_den(, )", 3) End If End Sub End Class \ No newline at end of file diff --git a/instat/ucrFactor.vb b/instat/ucrFactor.vb index 715ce4f6710..de8d2424aa1 100644 --- a/instat/ucrFactor.vb +++ b/instat/ucrFactor.vb @@ -414,6 +414,7 @@ Public Class ucrFactor clsGetFactorDataFunction.AddParameter("data_name", strDataFrameName) clsGetFactorDataFunction.AddParameter("col_name", strFactorVariableName) clsGetFactorDataFunction.AddParameter("include_NA_level", If(bIncludeNALevel, "TRUE", "FALSE")) + clsConvertToCharFunction.SetPackageName("instatExtras") clsConvertToCharFunction.SetRCommand("convert_to_character_matrix") clsConvertToCharFunction.AddParameter("data", clsRFunctionParameter:=clsGetFactorDataFunction) expDataFrame = frmMain.clsRLink.RunInternalScriptGetValue(clsConvertToCharFunction.ToScript(), bSilent:=True)