Skip to content

Commit

Permalink
Merge pull request #55 from firms-gta/53/hotfix/rendering-report-for-…
Browse files Browse the repository at this point in the history
…efforts

53/hotfix/rendering report for efforts
  • Loading branch information
bastienird authored Feb 5, 2025
2 parents c22d745 + 130fac9 commit e4177ab
Show file tree
Hide file tree
Showing 6 changed files with 284 additions and 26 deletions.
52 changes: 32 additions & 20 deletions Analysis_markdown/functions/Summarising_step.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,7 @@ Summarising_step <- function(main_dir, connectionDB, config, source_authoritylis
flog.info("Initialized child environment")

i <- 1
# file_path_url <- "https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/Analysis_markdown/functions"
file_path_url <- "~/firms-gta/geoflow-tunaatlas/Analysis_markdown/functions"
file_path_url <- "https://raw.githubusercontent.com/firms-gta/geoflow-tunaatlas/master/Analysis_markdown/functions"
source(file.path(file_path_url,"copy_project_files.R"), local = TRUE)
source(file.path(file_path_url,"tidying_GTA_data_for_comparison.R"))
source(file.path(file_path_url,"Functions_markdown.R"), local = TRUE)
Expand All @@ -112,6 +111,7 @@ Summarising_step <- function(main_dir, connectionDB, config, source_authoritylis
source(file.path(file_path_url,"other_dimension_analysis.R"), local = TRUE)
source(file.path(file_path_url,"comprehensive_cwp_dataframe_analysis.R"), local = TRUE)
source(file.path(file_path_url,"process_fisheries_data.R"), local = TRUE)
source(file.path(file_path_url,"process_fisheries_effort_data.R"), local = TRUE)
flog.info("Sourced all required functions")

for (entity_dir in entity_dirs) {
Expand All @@ -125,7 +125,17 @@ Summarising_step <- function(main_dir, connectionDB, config, source_authoritylis

if (opts$fact == "effort") {
flog.warn("Effort dataset not displayed for now")
parameter_colnames_to_keep_fact = c("source_authority", "fishing_mode", "geographic_identifier","fishing_fleet", "gear_type",
"measurement_unit", "measurement_value", "GRIDTYPE","species_group")
topnumberfact = 3
} else {
parameter_colnames_to_keep_fact = c("source_authority", "species", "gear_type", "fishing_fleet",
"fishing_mode", "geographic_identifier",
"measurement_unit", "measurement_value", "GRIDTYPE",
"species_group", "Gear")
topnumberfact = 6

}
entity_name <- basename(entity_dir)
setwd(here::here(entity_dir))
copy_project_files(original_repo_path = here::here("Analysis_markdown"), new_repo_path = getwd())
Expand Down Expand Up @@ -168,14 +178,22 @@ Summarising_step <- function(main_dir, connectionDB, config, source_authoritylis


data <- data %>% dplyr::left_join(fishing_fleet_label %>% dplyr::select(code,fishing_fleet_label = label), by = c("fishing_fleet" = "code"))
if("species" %in% colnames(data)){
data <- data %>% dplyr::left_join(species_label %>% dplyr::select(code,species_label = label, species_definition = definition), by = c("species" = "code"))
}

if(opts$fact == "effort"){
data <- data %>% dplyr::filter(measurement_unit%in%c("HOOKS", "FDAYS", "SETS", "NO.HOOKS", "NO.LINES", "NETS")) #only two first measuremnt_unit for each dataset
}
qs::qsave(data, file = file)
flog.info("Processed and saved data for file: %s", file)
}
}
} else {
flog.info("Retrieving processed data: %s", file)

data <- qs::qread(gsub(pattern = basename(file), replacement = "ancient.qs", file)) #hotifx
data <- data %>% dplyr::filter(measurement_unit%in%c("HOOKS", "FDAYS", "SETS", "NO.HOOKS", "NO.LINES", "NETS"))
qs::qsave(data, file = file)
}
}
parameter_resolution_filter <- opts$resolution_filter
Expand Down Expand Up @@ -213,17 +231,14 @@ Summarising_step <- function(main_dir, connectionDB, config, source_authoritylis
parameter_final = NULL,
fig.path = parameters_child_global$fig.path,
parameter_fact = "catch",
parameter_colnames_to_keep = c("source_authority", "species", "gear_type", "fishing_fleet",
"fishing_mode", "geographic_identifier",
"measurement_unit", "measurement_value", "GRIDTYPE",
"species_group", "Gear"),
parameter_colnames_to_keep = parameter_colnames_to_keep_fact,
coverage = TRUE,
shapefile_fix = shapefile.fix,
continent = continent,
parameter_resolution_filter = parameters_child_global$parameter_resolution_filter,
parameter_filtering = parameters_child_global$parameter_filtering,
parameter_titre_dataset_1 = entity$identifiers[["id"]],
unique_analyse = TRUE
unique_analyse = TRUE, topnumber = topnumberfact
)

filename <- paste0("Report_on_", entity$identifiers[["id"]])
Expand All @@ -242,18 +257,15 @@ Summarising_step <- function(main_dir, connectionDB, config, source_authoritylis
parameter_final = sub_list_dir_2[length(sub_list_dir_2)],
fig.path = parameters_child_global$fig.path,
parameter_fact = "catch",
parameter_colnames_to_keep = c("source_authority", "species", "gear_type", "fishing_fleet",
"fishing_mode", "geographic_identifier",
"measurement_unit", "measurement_value", "GRIDTYPE",
"species_group", "Gear"),
parameter_colnames_to_keep = parameter_colnames_to_keep_fact,
shapefile_fix = shapefile.fix,
continent = continent,
coverage = TRUE,
parameter_resolution_filter = parameters_child_global$parameter_resolution_filter,
parameter_filtering = parameters_child_global$parameter_filtering,
parameter_titre_dataset_1 = "FirmsLevel0",
parameter_titre_dataset_2 = entity$identifiers[["id"]],
unique_analyse = FALSE
unique_analyse = FALSE, topnumber = topnumberfact
)

new_path <- file.path(parameters_child_global$fig.path, paste0("/Comparison/initfinal_", basename(sub_list_dir_2[1]), "_", basename(sub_list_dir_2[length(sub_list_dir_2)])))
Expand All @@ -268,7 +280,11 @@ Summarising_step <- function(main_dir, connectionDB, config, source_authoritylis

sub_list_dir_3 <- gsub("/data.qs", "", sub_list_dir_2)
render_env$sub_list_dir_3 <- sub_list_dir_3
process_fisheries_data_list <- process_fisheries_data(sub_list_dir_3, parameter_fact = "catch", parameter_filtering)
if(opts$fact == "effort"){
process_fisheries_data_list <- process_fisheries_effort_data(sub_list_dir_3, parameter_filtering)
} else {
process_fisheries_data_list <- process_fisheries_data(sub_list_dir_3, parameter_fact = "catch", parameter_filtering)
}
flog.info("Processed process_fisheries_data_list")

render_env$process_fisheries_data_list <- process_fisheries_data_list
Expand Down Expand Up @@ -308,18 +324,15 @@ Summarising_step <- function(main_dir, connectionDB, config, source_authoritylis
fig.path = new_path,
parameter_fact = "catch",
plotting_type = "view",
parameter_colnames_to_keep = c("source_authority", "species", "gear_type", "fishing_fleet",
"fishing_mode", "geographic_identifier",
"measurement_unit", "measurement_value", "GRIDTYPE",
"species_group", "Gear"),
parameter_colnames_to_keep = parameter_colnames_to_keep_fact,
shapefile_fix = shapefile.fix,
continent = continent,
coverage = coverage,
parameter_resolution_filter = parameters_child_global$parameter_resolution_filter,
parameter_filtering = parameters_child_global$parameter_filtering,
parameter_titre_dataset_1 = parameter_titre_dataset_1,
parameter_titre_dataset_2 = parameter_titre_dataset_2,
unique_analyse = FALSE
unique_analyse = FALSE, topnumber = topnumberfact
)

# Log successful analysis
Expand Down Expand Up @@ -411,7 +424,6 @@ Summarising_step <- function(main_dir, connectionDB, config, source_authoritylis
sprintf("entity: %s is done", entity_dir)

}
}
try(setwd(ancient_wd))
flog.info("Finished Summarising_step function")
# return(render_env)
Expand Down
59 changes: 59 additions & 0 deletions Analysis_markdown/functions/calculate_rf_to_reach_final.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#' Calculate Raising Factors for Conversion to Nominal Data
#'
#' This function calculates the average conversion factors needed to match georeferenced data in numbers with nominal data in tons.
#' It only considers strata where both measurement units (numbers and tons) are available and evaluates the coherence of conversion factors
#' to achieve 100% of the nominal data for the same strata. If conversion factors are excessively high, the results are inconclusive,
#' as nominal data should generally exceed georeferenced data; here, both conversion and raising occur. However, if the conversion factors
#' are very low, an inconsistency between the datasets is likely.
#'
#' @param nominal_df A dataframe containing nominal data with columns for measurement units and measurement values.
#' @param lvl0_conv_df A dataframe containing georeferenced data with both "number" and "tons" units, from which conversion factors will be calculated.
#' @param strata_cols A character vector of column names used for stratifying the data (e.g., `species`, `gear_type`, `fishing_fleet`, `year`).
#'
#' @return A dataframe with calculated raising factors for each stratum where conversion is applicable. If conversion factors are too high,
#' they are marked as inconclusive. For very low conversion factors, potential inconsistencies are indicated.
#'
#' @details The function groups and aggregates data within each specified stratum. It calculates the conversion factors for each stratum
#' by determining the difference between nominal data values and converted georeferenced data values in tons. Strata with both measurement
#' units are selected for comparison.
#'
#' @examples
#' \dontrun{
#' # Example usage:
#' nominal_data <- data.frame(time_start = as.Date("2020-01-01"), measurement_value = 100, ...)
#' georef_data <- data.frame(time_start = as.Date("2020-01-01"), measurement_unit = "no", measurement_value = 200, ...)
#' strata_cols <- c("species", "gear_type", "fishing_fleet", "year", "geographic_identifier_nom)
#' calculate_rf_to_reach_final.R(nominal_df = nominal_data, lvl0_conv_df = georef_data, strata_cols = strata_cols)
#' }
#'
#' @import dplyr
#' @import lubridate
#' @import tidyr
#' @export
calculate_rf_to_reach_final <- function(df_to_reach, df_to_convert, strata_cols) {

lvl0_conv_only_double_unit <-df_to_convert %>%
dplyr::mutate(year = lubridate::year(time_start)) %>%
dplyr::group_by(across(all_of(strata_cols))) %>%
dplyr::mutate(numberunit = n_distinct(measurement_unit)) %>% dplyr::rowwise()%>% dplyr::filter(numberunit != 1 | (numberunit == 2 & measurement_unit == "no"))

# Regrouper les données du dataframe nominal selon les strates et calculer la somme des valeurs
nominal_groupped <- df_to_reach %>%
dplyr::mutate(year = lubridate::year(time_start)) %>%
dplyr::group_by(across(all_of(strata_cols))) %>%
dplyr::summarise(measurement_value = sum(measurement_value), .groups = 'drop')

# Regrouper les données du dataframe lvl0_conv selon les strates et les unités de mesure, puis pivoter les données
lvl0_conv_groupped <- lvl0_conv_only_double_unit %>%
dplyr::mutate(year = lubridate::year(time_start)) %>%
dplyr::group_by(across(all_of(strata_cols)), measurement_unit) %>%
dplyr::summarise(measurement_value = sum(measurement_value), .groups = 'drop') %>%
tidyr::pivot_wider(names_from = measurement_unit, values_from = measurement_value)

# Effectuer la jointure entre les deux jeux de données
join_result <- dplyr::full_join(lvl0_conv_groupped, nominal_groupped, by = strata_cols) %>%
dplyr::mutate( t = dplyr::coalesce(t, 0),
raising_factors = (measurement_value - t) / no)

return(join_result)
}
100 changes: 100 additions & 0 deletions Analysis_markdown/functions/compare_nominal_georef_corrected.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
compare_nominal_georef_corrected <- function(nominal, georef_mapped, list_strata = list(c("species", "year", "source_authority", "gear_type", "fishing_fleet", "geographic_identifier_nom"))) {
# Convertir les data.frames en data.tables
setDT(nominal)
setDT(georef_mapped)

# Créer la colonne "year" à partir de time_start
georef_mapped[, year := as.character(year(ymd(time_start)))]
nominal[, year := as.character(year(ymd(time_start)))]

# Conserver uniquement les données en tonnes
georef_mapped_tons <- georef_mapped[measurement_unit == "t"]

# Initialise une liste pour stocker les résultats (un résultat pour chaque liste de dimensions à conserver pour faire la comparaison)
results <- list()

for (strata in list_strata) {
# Nom pour la catégorie actuelle de strata
name <- paste0(toString(strata))

# Agréger les données pour le nominal et georef sur les colonnes spécifiées dans 'strata' (ex groupper les données par années, espèces, engins, pavillon)
nominal_grouped <- nominal[, .(measurement_value_nominal = sum(measurement_value, na.rm = TRUE)), by = strata]
georef_mapped_grouped <- georef_mapped[, .(measurement_value_georef = sum(measurement_value, na.rm = TRUE)), by = strata]
georef_mapped_tons_grouped <- georef_mapped_tons[, .(measurement_value_georef_tons = sum(measurement_value, na.rm = TRUE)), by = strata]

# # Retirer les valeurs des colonnes pour comparer uniquement les strates (si on veut garder que elles)
nominal_grouped_without_value <- nominal_grouped[, .SD, .SDcols = strata]
georef_grouped_without_value <- georef_mapped_grouped[, .SD, .SDcols = strata]
georef_tons_grouped_without_value <- georef_mapped_tons_grouped[, .SD, .SDcols = strata]


# # Assurer que les colonnes sont dans le même ordre pour la comparaison
setcolorder(georef_grouped_without_value, names(nominal_grouped_without_value))
setcolorder(georef_tons_grouped_without_value, names(nominal_grouped_without_value))

# Trouver les strates présentes dans georef_mapped mais absentes de nominal
georef_no_nominal <- fsetdiff(georef_grouped_without_value, nominal_grouped_without_value, all = FALSE)
georef_no_nominal_with_value <- merge(georef_mapped_tons_grouped, georef_no_nominal, by = strata, all = FALSE)
sum_georef_no_nominal_tons <- sum(georef_no_nominal_with_value$measurement_value_georef_tons ,na.rm = TRUE)


# Comparer uniquement les données en tonnes
georef_tons_no_nominal <- fsetdiff(georef_tons_grouped_without_value, nominal_grouped_without_value, all = FALSE)

# Comparer les valeurs des strates communes entre nominal et georef_mapped pour les données en tonnes
georef_sup_nominal <- merge(nominal_grouped, georef_mapped_tons_grouped, by = strata, all = FALSE)

# Vérifier si les colonnes existent après le merge
if ("measurement_value_georef_tons" %in% names(georef_sup_nominal) &&
"measurement_value_nominal" %in% names(georef_sup_nominal)) {
georef_sup_nominal[, Difference := measurement_value_georef_tons - measurement_value_nominal]
georef_sup_nominal <- georef_sup_nominal[round(Difference, 3) > 1] # Supérieur strictement à 1, on s'affranchit des petits kouaks
} else {
georef_sup_nominal <- data.table() # Retourne une table vide s'il n'y a pas de données
}

if ("fishing_fleet" %in% colnames(georef_sup_nominal)){
tons_nei_georef <- georef_no_nominal_with_value[
fishing_fleet == "NEI" ,
sum(measurement_value_georef_tons)] + georef_sup_nominal[
fishing_fleet == "NEI" ,
sum(measurement_value_georef_tons)
]} else {
tons_nei_georef <- 0
}

tons_aggregated_georef <- georef_no_nominal_with_value[
species %in% c("TUN", "TUS" ,"BIL"),
sum(measurement_value_georef_tons)
] + georef_sup_nominal[
species %in% c("TUN", "TUS" ,"BIL"),
sum(measurement_value_georef_tons)
]

if ("fishing_fleet" %in% colnames(nominal_grouped)){
tons_nei_nominal <- nominal_grouped[
fishing_fleet == "NEI",
sum(measurement_value_nominal)
]} else {tons_nei_nominal <- 0}


sum_georef_sup_nom <- sum(georef_sup_nominal$Difference, na.rm = TRUE)

suffisant <- ifelse(sum_georef_no_nominal_tons + sum_georef_sup_nom -(tons_aggregated_georef + tons_nei_georef) > 0, FALSE, TRUE)
# Stocker les résultats
results[[name]] <- list(
georef_no_nominal = georef_no_nominal, # Strates dans georef mais absentes dans nominal
georef_no_nominal_with_value = georef_no_nominal_with_value %>% dplyr::rename(measurement_value = measurement_value_georef_tons), # Strates dans georef mais absentes dans nominal avec la valeur totale
georef_tons_no_nominal = georef_tons_no_nominal, # Strates en tonnes absentes dans nominal
georef_sup_nominal = georef_sup_nominal, # Strates où georef est supérieur à nominal
tons_nei_nominal = tons_nei_nominal, # Strates nei qui pourraient expliquer les différences
tons_nei_georef = tons_nei_georef, # Strates nei qui pourraient expliquer les différences
sum_georef_no_nominal = sum_georef_no_nominal_tons,
suffisant = suffisant,
tons_aggregated_georef = tons_aggregated_georef,
sum_georef_sup_nom = sum_georef_sup_nom
)
}

return(results)
}
Loading

0 comments on commit e4177ab

Please sign in to comment.