Skip to content

Commit

Permalink
Merge branch 'main' of github.com:MTES-MCT/zero-logement-vacant
Browse files Browse the repository at this point in the history
  • Loading branch information
rcourivaud committed Feb 21, 2025
2 parents 9113b21 + f7e0409 commit 4961d4c
Show file tree
Hide file tree
Showing 18 changed files with 138 additions and 34 deletions.
17 changes: 15 additions & 2 deletions analytics/dagster/src/assets/dwh/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
from .checks import check_ff_lovac_on_duckdb
from .copy import copy_dagster_duckdb_to_metabase_duckdb, export_mother_duck_local_duckdb, copy_dagster_duckdb_to_metabase_duckdb_through_s3
from .ingest import import_postgres_data_from_replica_to_duckdb, import_cerema_ff_lovac_data_from_s3_to_duckdb, setup_replica_db, setup_s3_connection
from .ingest import (
import_postgres_data_from_replica_to_duckdb,
import_cerema_ff_lovac_data_from_s3_to_duckdb,
setup_replica_db,
setup_s3_connection,
raw_communes,
raw_epci,
raw_departements,
raw_regions
)
from .upload import upload_duckdb_to_s3, upload_ff_to_s3, download_ff_from_s3
from .setup_duckdb import setup_duckdb

Expand All @@ -16,5 +25,9 @@
"download_ff_from_s3",
"setup_duckdb",
"export_mother_duck_local_duckdb",
"copy_dagster_duckdb_to_metabase_duckdb_through_s3"
"copy_dagster_duckdb_to_metabase_duckdb_through_s3",
"raw_communes",
"raw_epci",
"raw_departements",
"raw_regions",
]
6 changes: 5 additions & 1 deletion analytics/dagster/src/assets/dwh/ingest/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from .ingest_lovac_ff_s3_asset import import_cerema_ff_lovac_data_from_s3_to_duckdb, setup_s3_connection
from .ingest_postgres_asset import import_postgres_data_from_replica_to_duckdb, setup_replica_db

from .administrative_cuts import raw_communes, raw_epci, raw_departements, raw_regions
__all__ = [
"import_postgres_data_from_replica_to_duckdb",
"import_cerema_ff_lovac_data_from_s3_to_duckdb",
"setup_replica_db",
"setup_s3_connection",
"raw_communes",
"raw_epci",
"raw_departements",
"raw_regions",
]
40 changes: 40 additions & 0 deletions analytics/dagster/src/assets/dwh/ingest/administrative_cuts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# assets.py
import pandas as pd
from dagster_duckdb import DuckDBResource
from dagster import AssetKey, asset, AssetExecutionContext



@asset(deps=[AssetKey("setup_duckdb")],
group_name="external_seeds")
def raw_communes(context: AssetExecutionContext, duckdb: DuckDBResource):
df = pd.read_json('https://geo.api.gouv.fr/communes')
with duckdb.get_connection() as conn:
conn.execute("CREATE SCHEMA IF NOT EXISTS external;")
conn.execute("CREATE OR REPLACE TABLE external.communes AS SELECT * FROM df")
@asset(deps=[AssetKey("setup_duckdb")],
group_name="external_seeds")
def raw_epci(context: AssetExecutionContext, duckdb: DuckDBResource):
df = pd.read_json('https://geo.api.gouv.fr/epcis')
with duckdb.get_connection() as conn:
conn.execute("CREATE SCHEMA IF NOT EXISTS external;")
conn.execute("CREATE OR REPLACE TABLE external.epci AS SELECT * FROM df")


@asset(deps=[AssetKey("setup_duckdb")],
group_name="external_seeds")
def raw_departements(context: AssetExecutionContext, duckdb: DuckDBResource):
df = pd.read_json('https://geo.api.gouv.fr/departements')
with duckdb.get_connection() as conn:
conn.execute("CREATE SCHEMA IF NOT EXISTS external;")
conn.execute("CREATE OR REPLACE TABLE external.departements AS SELECT * FROM df")



@asset(deps=[AssetKey("setup_duckdb")],
group_name="external_seeds")
def raw_regions(context: AssetExecutionContext, duckdb: DuckDBResource):
df = pd.read_json('https://geo.api.gouv.fr/regions')
with duckdb.get_connection() as conn:
conn.execute("CREATE SCHEMA IF NOT EXISTS external;")
conn.execute("CREATE OR REPLACE TABLE external.regions AS SELECT * FROM df")
13 changes: 12 additions & 1 deletion analytics/dagster/src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@ class Config:
"marts_public_establishments_hierarchy",
]

admin_tables = [
"marts_admin_epci",
"marts_admin_communes",
"marts_admin_departements",
"marts_admin_regions",
]

analysis_tables = ["marts_analysis_exit_flow_ff23_lovac"]

common_tables = ["marts_common_cities", "marts_common_morphology"]
Expand Down Expand Up @@ -84,6 +91,10 @@ class Config:
"marts_production_join_owner_housing": "join_owner_housing",
"marts_production_join_establishment_housing": "join_establishment_housing",
"marts_common_cities": "cities_zonage_2024",
"marts_admin_epci": "admin_epci",
"marts_admin_communes": "admin_communes",
"marts_admin_departements": "admin_departements",
"marts_admin_regions": "admin_regions",
# "marts_common_morphology": "infra_municipalities_morphology",
# "marts_production_campaigns": "prod_campaigns",
# "marts_production_establishments": "prod_establishments",
Expand All @@ -96,7 +107,7 @@ class Config:
# "marts_stats_monthly_global": "stats_activity_monthly",
}

RESULT_TABLES = production_tables + join_tables + common_tables + public_tables
RESULT_TABLES = production_tables + join_tables + common_tables + public_tables + admin_tables


def translate_table_name(table_name):
Expand Down
6 changes: 3 additions & 3 deletions analytics/dbt/macros/geo/get_hierarchy.sql
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% macro generate_hierarchy_relations(source_type, target_type, depth) %}
{% macro generate_hierarchy_relations(source_types, target_types, depth) %}
(
SELECT DISTINCT
source.id as ancestor_id,
Expand All @@ -11,6 +11,6 @@
JOIN {{ ref('int_production_establishments') }} target
CROSS JOIN UNNEST(target.localities_geo_code) as t_geo_code
ON CAST(s_geo_code AS VARCHAR) = CAST(t_geo_code AS VARCHAR)
WHERE source.kind = '{{ source_type }}'
AND target.kind = '{{ target_type }}')
WHERE source.kind IN ({{ source_types }})
AND target.kind IN ({{ target_types }}))
{% endmacro %}
3 changes: 3 additions & 0 deletions analytics/dbt/models/marts/admin/marts_admin_communes.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT
*
FROM {{ ref('stg_admin_communes') }}
3 changes: 3 additions & 0 deletions analytics/dbt/models/marts/admin/marts_admin_departements.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT
*
FROM {{ ref('stg_admin_departements') }}
3 changes: 3 additions & 0 deletions analytics/dbt/models/marts/admin/marts_admin_epci.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT
*
FROM {{ ref('stg_admin_epci') }}
3 changes: 3 additions & 0 deletions analytics/dbt/models/marts/admin/marts_admin_regions.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT
*
FROM {{ ref('stg_admin_regions') }}
11 changes: 3 additions & 8 deletions analytics/dbt/models/marts/common/marts_common_cities.sql
Original file line number Diff line number Diff line change
@@ -1,13 +1,8 @@
SELECT
ccm.city_code,
MIN(label) AS label, -- Si le label est le même pour tous les arrondissements, sinon utiliser GROUP_CONCAT
MIN(zip_code) AS zip_code, -- Pour prendre un code postal représentatif, sinon utiliser GROUP_CONCAT
AVG(latitude) AS avg_latitude,
AVG(longitude) AS avg_longitude,
MIN(department_name) AS department_name, -- Même remarque que pour le label
MIN(department_number) AS department_number,
MIN(region_name) AS region_name,
MIN(region_geojson_name) AS region_geojson_name,
MIN(cc.libelle) AS label,
MIN(cc.department_code) AS department_code,
MIN(cc.region_code) AS region_code,
MAX(ca1.is_in) AS tlv1, -- Prend 1 s'il y a au moins un arrondissement où la valeur est 1
MAX(ca2.is_in) AS tlv2, -- Idem
MAX(action_coeur_de_ville) AS action_coeur_de_ville,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,32 +1,32 @@
WITH
-- Définir les groupes de types pour chaque niveau
{% set niveau_1 = "'Commune'" %}
{% set niveau_2 = "'CA', 'CC', 'CU', 'EPCI', 'ME'" %}
{% set niveau_3 = "'SDED', 'DEP'" %}
{% set niveau_4 = "'REG', 'SDER'" %}

-- Toutes les relations possibles
all_relations AS (
-- Commune -> EPCI (profondeur 1)
{{ generate_hierarchy_relations('Commune', 'CA', 1) }}
-- Niveau 1 -> Niveau 2 (profondeur 1)
{{ generate_hierarchy_relations(niveau_1, niveau_2, 1) }}
UNION ALL
{{ generate_hierarchy_relations('Commune', 'CC', 1) }}
-- Niveau 1 -> Niveau 3 (profondeur 2)
{{ generate_hierarchy_relations(niveau_1, niveau_3, 2) }}
UNION ALL
-- Commune -> Département (profondeur 2)
{{ generate_hierarchy_relations('Commune', 'DEP', 2) }}
-- Niveau 1 -> Niveau 4 (profondeur 3)
{{ generate_hierarchy_relations(niveau_1, niveau_4, 3) }}
UNION ALL
-- Commune -> Région (profondeur 3)
{{ generate_hierarchy_relations('Commune', 'REG', 3) }}
-- Niveau 2 -> Niveau 3 (profondeur 1)
{{ generate_hierarchy_relations(niveau_2, niveau_3, 1) }}
UNION ALL
-- EPCI -> Département (profondeur 1)
{{ generate_hierarchy_relations('CA', 'DEP', 1) }}
-- Niveau 2 -> Niveau 4 (profondeur 2)
{{ generate_hierarchy_relations(niveau_2, niveau_4, 2) }}
UNION ALL
{{ generate_hierarchy_relations('CC', 'DEP', 1) }}
UNION ALL
-- EPCI -> Région (profondeur 2)
{{ generate_hierarchy_relations('CA', 'REG', 2) }}
UNION ALL
{{ generate_hierarchy_relations('CC', 'REG', 2) }}
UNION ALL
-- Département -> Région (profondeur 1)
{{ generate_hierarchy_relations('DEP', 'REG', 1) }}
-- Niveau 3 -> Niveau 4 (profondeur 1)
{{ generate_hierarchy_relations(niveau_3, niveau_4, 1) }}
)

-- Table finale de hiérarchie
SELECT DISTINCT
ancestor_id,
descendant_id,
Expand Down
7 changes: 7 additions & 0 deletions analytics/dbt/models/staging/admin/schema.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
version: 2

models:
- name: stg_admin_communes
- name: stg_admin_epci
- name: stg_admin_departements
- name: stg_admin_regions
10 changes: 10 additions & 0 deletions analytics/dbt/models/staging/admin/sources.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
version: 2

sources:
- name: duckdb_raw
schema: external
tables:
- name: communes
- name: epci
- name: departements
- name: regions
3 changes: 3 additions & 0 deletions analytics/dbt/models/staging/admin/stg_admin_communes.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT
*
FROM {{ source ('duckdb_raw', 'communes') }}
3 changes: 3 additions & 0 deletions analytics/dbt/models/staging/admin/stg_admin_departements.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT
*
FROM {{ source ('duckdb_raw', 'departements') }}
3 changes: 3 additions & 0 deletions analytics/dbt/models/staging/admin/stg_admin_epci.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT
*
FROM {{ source ('duckdb_raw', 'epci') }}
3 changes: 3 additions & 0 deletions analytics/dbt/models/staging/admin/stg_admin_regions.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SELECT
*
FROM {{ source ('duckdb_raw', 'regions') }}
2 changes: 1 addition & 1 deletion analytics/dbt/models/staging/common/stg_common_cities.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
SELECT
TYPECOM as city_kind,
COM as geo_code,
REG as region,
REG as region_code,
DEP as department_code,
CTCD as ctcd,
ARR as arr,
Expand Down

0 comments on commit 4961d4c

Please sign in to comment.