-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #55 from firms-gta/53/hotfix/rendering-report-for-…
…efforts 53/hotfix/rendering report for efforts
- Loading branch information
Showing
6 changed files
with
284 additions
and
26 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#' Calculate Raising Factors for Conversion to Nominal Data | ||
#' | ||
#' This function calculates the average conversion factors needed to match georeferenced data in numbers with nominal data in tons. | ||
#' It only considers strata where both measurement units (numbers and tons) are available and evaluates the coherence of conversion factors | ||
#' to achieve 100% of the nominal data for the same strata. If conversion factors are excessively high, the results are inconclusive, | ||
#' as nominal data should generally exceed georeferenced data; here, both conversion and raising occur. However, if the conversion factors | ||
#' are very low, an inconsistency between the datasets is likely. | ||
#' | ||
#' @param nominal_df A dataframe containing nominal data with columns for measurement units and measurement values. | ||
#' @param lvl0_conv_df A dataframe containing georeferenced data with both "number" and "tons" units, from which conversion factors will be calculated. | ||
#' @param strata_cols A character vector of column names used for stratifying the data (e.g., `species`, `gear_type`, `fishing_fleet`, `year`). | ||
#' | ||
#' @return A dataframe with calculated raising factors for each stratum where conversion is applicable. If conversion factors are too high, | ||
#' they are marked as inconclusive. For very low conversion factors, potential inconsistencies are indicated. | ||
#' | ||
#' @details The function groups and aggregates data within each specified stratum. It calculates the conversion factors for each stratum | ||
#' by determining the difference between nominal data values and converted georeferenced data values in tons. Strata with both measurement | ||
#' units are selected for comparison. | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' # Example usage: | ||
#' nominal_data <- data.frame(time_start = as.Date("2020-01-01"), measurement_value = 100, ...) | ||
#' georef_data <- data.frame(time_start = as.Date("2020-01-01"), measurement_unit = "no", measurement_value = 200, ...) | ||
#' strata_cols <- c("species", "gear_type", "fishing_fleet", "year", "geographic_identifier_nom) | ||
#' calculate_rf_to_reach_final.R(nominal_df = nominal_data, lvl0_conv_df = georef_data, strata_cols = strata_cols) | ||
#' } | ||
#' | ||
#' @import dplyr | ||
#' @import lubridate | ||
#' @import tidyr | ||
#' @export | ||
calculate_rf_to_reach_final <- function(df_to_reach, df_to_convert, strata_cols) { | ||
|
||
lvl0_conv_only_double_unit <-df_to_convert %>% | ||
dplyr::mutate(year = lubridate::year(time_start)) %>% | ||
dplyr::group_by(across(all_of(strata_cols))) %>% | ||
dplyr::mutate(numberunit = n_distinct(measurement_unit)) %>% dplyr::rowwise()%>% dplyr::filter(numberunit != 1 | (numberunit == 2 & measurement_unit == "no")) | ||
|
||
# Regrouper les données du dataframe nominal selon les strates et calculer la somme des valeurs | ||
nominal_groupped <- df_to_reach %>% | ||
dplyr::mutate(year = lubridate::year(time_start)) %>% | ||
dplyr::group_by(across(all_of(strata_cols))) %>% | ||
dplyr::summarise(measurement_value = sum(measurement_value), .groups = 'drop') | ||
|
||
# Regrouper les données du dataframe lvl0_conv selon les strates et les unités de mesure, puis pivoter les données | ||
lvl0_conv_groupped <- lvl0_conv_only_double_unit %>% | ||
dplyr::mutate(year = lubridate::year(time_start)) %>% | ||
dplyr::group_by(across(all_of(strata_cols)), measurement_unit) %>% | ||
dplyr::summarise(measurement_value = sum(measurement_value), .groups = 'drop') %>% | ||
tidyr::pivot_wider(names_from = measurement_unit, values_from = measurement_value) | ||
|
||
# Effectuer la jointure entre les deux jeux de données | ||
join_result <- dplyr::full_join(lvl0_conv_groupped, nominal_groupped, by = strata_cols) %>% | ||
dplyr::mutate( t = dplyr::coalesce(t, 0), | ||
raising_factors = (measurement_value - t) / no) | ||
|
||
return(join_result) | ||
} |
100 changes: 100 additions & 0 deletions
100
Analysis_markdown/functions/compare_nominal_georef_corrected.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
compare_nominal_georef_corrected <- function(nominal, georef_mapped, list_strata = list(c("species", "year", "source_authority", "gear_type", "fishing_fleet", "geographic_identifier_nom"))) { | ||
# Convertir les data.frames en data.tables | ||
setDT(nominal) | ||
setDT(georef_mapped) | ||
|
||
# Créer la colonne "year" à partir de time_start | ||
georef_mapped[, year := as.character(year(ymd(time_start)))] | ||
nominal[, year := as.character(year(ymd(time_start)))] | ||
|
||
# Conserver uniquement les données en tonnes | ||
georef_mapped_tons <- georef_mapped[measurement_unit == "t"] | ||
|
||
# Initialise une liste pour stocker les résultats (un résultat pour chaque liste de dimensions à conserver pour faire la comparaison) | ||
results <- list() | ||
|
||
for (strata in list_strata) { | ||
# Nom pour la catégorie actuelle de strata | ||
name <- paste0(toString(strata)) | ||
|
||
# Agréger les données pour le nominal et georef sur les colonnes spécifiées dans 'strata' (ex groupper les données par années, espèces, engins, pavillon) | ||
nominal_grouped <- nominal[, .(measurement_value_nominal = sum(measurement_value, na.rm = TRUE)), by = strata] | ||
georef_mapped_grouped <- georef_mapped[, .(measurement_value_georef = sum(measurement_value, na.rm = TRUE)), by = strata] | ||
georef_mapped_tons_grouped <- georef_mapped_tons[, .(measurement_value_georef_tons = sum(measurement_value, na.rm = TRUE)), by = strata] | ||
|
||
# # Retirer les valeurs des colonnes pour comparer uniquement les strates (si on veut garder que elles) | ||
nominal_grouped_without_value <- nominal_grouped[, .SD, .SDcols = strata] | ||
georef_grouped_without_value <- georef_mapped_grouped[, .SD, .SDcols = strata] | ||
georef_tons_grouped_without_value <- georef_mapped_tons_grouped[, .SD, .SDcols = strata] | ||
|
||
|
||
# # Assurer que les colonnes sont dans le même ordre pour la comparaison | ||
setcolorder(georef_grouped_without_value, names(nominal_grouped_without_value)) | ||
setcolorder(georef_tons_grouped_without_value, names(nominal_grouped_without_value)) | ||
|
||
# Trouver les strates présentes dans georef_mapped mais absentes de nominal | ||
georef_no_nominal <- fsetdiff(georef_grouped_without_value, nominal_grouped_without_value, all = FALSE) | ||
georef_no_nominal_with_value <- merge(georef_mapped_tons_grouped, georef_no_nominal, by = strata, all = FALSE) | ||
sum_georef_no_nominal_tons <- sum(georef_no_nominal_with_value$measurement_value_georef_tons ,na.rm = TRUE) | ||
|
||
|
||
# Comparer uniquement les données en tonnes | ||
georef_tons_no_nominal <- fsetdiff(georef_tons_grouped_without_value, nominal_grouped_without_value, all = FALSE) | ||
|
||
# Comparer les valeurs des strates communes entre nominal et georef_mapped pour les données en tonnes | ||
georef_sup_nominal <- merge(nominal_grouped, georef_mapped_tons_grouped, by = strata, all = FALSE) | ||
|
||
# Vérifier si les colonnes existent après le merge | ||
if ("measurement_value_georef_tons" %in% names(georef_sup_nominal) && | ||
"measurement_value_nominal" %in% names(georef_sup_nominal)) { | ||
georef_sup_nominal[, Difference := measurement_value_georef_tons - measurement_value_nominal] | ||
georef_sup_nominal <- georef_sup_nominal[round(Difference, 3) > 1] # Supérieur strictement à 1, on s'affranchit des petits kouaks | ||
} else { | ||
georef_sup_nominal <- data.table() # Retourne une table vide s'il n'y a pas de données | ||
} | ||
|
||
if ("fishing_fleet" %in% colnames(georef_sup_nominal)){ | ||
tons_nei_georef <- georef_no_nominal_with_value[ | ||
fishing_fleet == "NEI" , | ||
sum(measurement_value_georef_tons)] + georef_sup_nominal[ | ||
fishing_fleet == "NEI" , | ||
sum(measurement_value_georef_tons) | ||
]} else { | ||
tons_nei_georef <- 0 | ||
} | ||
|
||
tons_aggregated_georef <- georef_no_nominal_with_value[ | ||
species %in% c("TUN", "TUS" ,"BIL"), | ||
sum(measurement_value_georef_tons) | ||
] + georef_sup_nominal[ | ||
species %in% c("TUN", "TUS" ,"BIL"), | ||
sum(measurement_value_georef_tons) | ||
] | ||
|
||
if ("fishing_fleet" %in% colnames(nominal_grouped)){ | ||
tons_nei_nominal <- nominal_grouped[ | ||
fishing_fleet == "NEI", | ||
sum(measurement_value_nominal) | ||
]} else {tons_nei_nominal <- 0} | ||
|
||
|
||
sum_georef_sup_nom <- sum(georef_sup_nominal$Difference, na.rm = TRUE) | ||
|
||
suffisant <- ifelse(sum_georef_no_nominal_tons + sum_georef_sup_nom -(tons_aggregated_georef + tons_nei_georef) > 0, FALSE, TRUE) | ||
# Stocker les résultats | ||
results[[name]] <- list( | ||
georef_no_nominal = georef_no_nominal, # Strates dans georef mais absentes dans nominal | ||
georef_no_nominal_with_value = georef_no_nominal_with_value %>% dplyr::rename(measurement_value = measurement_value_georef_tons), # Strates dans georef mais absentes dans nominal avec la valeur totale | ||
georef_tons_no_nominal = georef_tons_no_nominal, # Strates en tonnes absentes dans nominal | ||
georef_sup_nominal = georef_sup_nominal, # Strates où georef est supérieur à nominal | ||
tons_nei_nominal = tons_nei_nominal, # Strates nei qui pourraient expliquer les différences | ||
tons_nei_georef = tons_nei_georef, # Strates nei qui pourraient expliquer les différences | ||
sum_georef_no_nominal = sum_georef_no_nominal_tons, | ||
suffisant = suffisant, | ||
tons_aggregated_georef = tons_aggregated_georef, | ||
sum_georef_sup_nom = sum_georef_sup_nom | ||
) | ||
} | ||
|
||
return(results) | ||
} |
Oops, something went wrong.