diff --git a/CHANGELOG.md b/CHANGELOG.md index ec2b7d53..788d21aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +- Converter for Spain (whole), based on the FEGA 2025+ data - Update fr-converter to support 2021/2022 files ## [v0.21.0] - 2026-02-16 diff --git a/fiboa_cli/datasets/es.py b/fiboa_cli/datasets/es.py index 4d2fd9d4..2352d444 100644 --- a/fiboa_cli/datasets/es.py +++ b/fiboa_cli/datasets/es.py @@ -1,50 +1,121 @@ +import re + +import requests from vecorel_cli.vecorel.extensions import ADMIN_DIVISION -from fiboa_cli.conversion.fiboa_converter import FiboaBaseConverter -from fiboa_cli.datasets.commons.data import read_data_csv +from ..conversion.fiboa_converter import FiboaBaseConverter + +class Converter(FiboaBaseConverter): + id = "es" + short_name = "Spain" + title = "Spain Declared Crops (Cultivos Declarados SIGPAC)" + description = """ +National declared-crop dataset (Cultivos Declarados SIGPAC) published by the Spanish Agricultural Guarantee Fund +(FEGA) via the unified SIGPAC Hub Cloud portal (sigpac-hubcloud.es). Each record is a declaration line within a +farmer's Single Application (Solicitud Única) for Common Agricultural Policy (CAP) direct payments, mapped onto +SIGPAC cadastral divisions. Data is distributed as one GeoPackage per Spanish province, harmonised across the +country since the 2025 campaign year. -class ESBaseConverter(FiboaBaseConverter): +This is a high-value dataset (HVD) under EU Implementing Regulation 2023/138. """ - Base Converter for Spain - Asssumes a source column with the SIGPAC-Land Use code - The Land Use code is filtered for agricultural use and transformed into a high-level crop type + provider = "Fondo Español de Garantía Agraria (FEGA) " + attribution = "©FEGA / Ministerio de Agricultura, Pesca y Alimentación" + license = "CC-BY-4.0" - "Cultivo Declarado" is what we would prefer, but the "Recinto" is the best to be found so far + variants = {"2025": "2025"} - For Spanish Sources, see https://www.cartodruid.es/en/-/descargar-sigpac-comunidad-autonoma - There seems to be a National Layer; https://inspire-geoportal.ec.europa.eu/srv/api/records/87ce5171-d713-4eec-a1f3-2b9dd94cad91 - """ + columns = { + "geometry": "geometry", + "id": "id", + "provincia": "admin_province_code", + "municipio": "admin_municipality_code", + "dn_surface": "metrics:area", + "parc_producto": "crop:code", + "parc_sistexp": "irrigation_system", + "parc_supcult": "cultivation_surface", + } - use_code_attribute = "uso_sigpac" + area_is_in_ha = False extensions = { "https://fiboa.org/crop-extension/v0.2.0/schema.yaml", ADMIN_DIVISION, } + column_additions = { - # https://www.euskadi.eus/contenidos/informacion/pac2015_pagosdirectos/es_def/adjuntos/Anexos_PAC_marzo2015.pdf - # https://www.fega.gob.es/sites/default/files/files/document/AD-CIRCULAR_2-2021_EE98293_SIGC2021.PDF - # Very generic list "admin:country_code": "ES", - "crop:code_list": "https://fiboa.org/code/es/sigpac/land_use.csv", + # FEGA declared-crop codelist (PARC_PRODUCTO) — separate from the SIGPAC land-use list. + # Reference list shipped inside each provincial GPKG as the `cod_producto` layer. + "crop:code_list": "https://fiboa.org/code/es/cultivos_declarados/parc_producto.csv", + } + + column_migrations = { + # crop:code must be a string per the crop extension; parc_producto is an integer. + "parc_producto": lambda col: col.astype("Int64").astype(str), + # admin_*_code are strings; zero-pad province to 2 digits (INE convention). + "provincia": lambda col: col.astype("Int64").astype(str).str.zfill(2), + "municipio": lambda col: col.astype("Int64").astype(str), + } + + missing_schemas = { + "properties": { + "admin_province_code": {"type": "string"}, + "admin_municipality_code": {"type": "string"}, + "irrigation_system": {"type": "string"}, + "cultivation_surface": {"type": "int32"}, + } } def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - assert self.id.startswith("es_"), "Assuming Spanish subclass" - - def code_filter(col): - return ~col.isin("AG/CA/ED/FO/IM/IS/IV/TH/ZC/ZU/ZV/MT".split("/") + [None]) + if not self.variant: + self.variant = next(iter(self.variants)) + self.column_additions = { + **self.column_additions, + "determination:datetime": f"{self.variant}-01-01T00:00:00Z", + } - self.column_filters = {self.use_code_attribute: code_filter} - self.column_additions["admin:subdivision_code"] = self.id[len("es_") :].upper() + def layer_filter(self, layer: str, uri: str) -> bool: + # GPKG contains the data layer plus several codelist tables (cod_*) — only read the data. + return layer == "cultivo_declarado" def migrate(self, gdf): - # This actually is a land use code. Not sure if we should put this in crop:code - rows = read_data_csv("es_coda_uso.csv") - mapping = {row["original_code"]: row["original_name"] for row in rows} - mapping_en = {row["original_code"]: row["name_en"] for row in rows} - gdf["crop:name"] = gdf[self.use_code_attribute].map(mapping) - gdf["crop:name_en"] = gdf[self.use_code_attribute].map(mapping_en) + # The source has no globally unique row identifier. Build one from the SIGPAC cadastral key + # plus the declaration-line index, which is unique per record. + def part(col): + return gdf[col].astype("Int64").astype(str) + + gdf["id"] = ( + part("provincia").str.zfill(2) + + "-" + + part("municipio") + + "-" + + part("agregado") + + "-" + + part("zona") + + "-" + + part("poligono") + + "-" + + part("parcela") + + "-" + + part("recinto") + + "-" + + part("ld_recinto") + ) return super().migrate(gdf) + + def get_urls(self): + if self.variant not in self.variants: + opts = ", ".join(self.variants.keys()) + raise ValueError(f"Unknown variant '{self.variant}', choose from {opts}") + + year = self.variant + base = f"https://sigpac-hubcloud.es/geopackages/{year}/cultivo_declarado/" + response = requests.get(base, timeout=60) + response.raise_for_status() + # The directory listing is a classic Apache-style HTML index; parse out the .zip hrefs. + zip_paths = re.findall(r'HREF="(/geopackages/[^"]+\.zip)"', response.text) + if not zip_paths: + raise RuntimeError(f"No GeoPackage archives found at {base}") + return {f"https://sigpac-hubcloud.es{p}": ["*.gpkg"] for p in zip_paths} diff --git a/fiboa_cli/datasets/es_an.py b/fiboa_cli/datasets/es_an.py index 44abd2fe..b484f3a0 100644 --- a/fiboa_cli/datasets/es_an.py +++ b/fiboa_cli/datasets/es_an.py @@ -1,7 +1,7 @@ from loguru import logger from .commons.data import read_data_csv -from .es import ESBaseConverter +from .es_base import ESBaseConverter class ANConverter(ESBaseConverter): diff --git a/fiboa_cli/datasets/es_ar.py b/fiboa_cli/datasets/es_ar.py index ac8d1f60..9e5b059b 100644 --- a/fiboa_cli/datasets/es_ar.py +++ b/fiboa_cli/datasets/es_ar.py @@ -1,6 +1,6 @@ import pandas as pd -from .es import ESBaseConverter +from .es_base import ESBaseConverter class ARConverter(ESBaseConverter): diff --git a/fiboa_cli/datasets/es_base.py b/fiboa_cli/datasets/es_base.py new file mode 100644 index 00000000..4d2fd9d4 --- /dev/null +++ b/fiboa_cli/datasets/es_base.py @@ -0,0 +1,50 @@ +from vecorel_cli.vecorel.extensions import ADMIN_DIVISION + +from fiboa_cli.conversion.fiboa_converter import FiboaBaseConverter +from fiboa_cli.datasets.commons.data import read_data_csv + + +class ESBaseConverter(FiboaBaseConverter): + """ + Base Converter for Spain + Asssumes a source column with the SIGPAC-Land Use code + The Land Use code is filtered for agricultural use and transformed into a high-level crop type + + "Cultivo Declarado" is what we would prefer, but the "Recinto" is the best to be found so far + + For Spanish Sources, see https://www.cartodruid.es/en/-/descargar-sigpac-comunidad-autonoma + There seems to be a National Layer; https://inspire-geoportal.ec.europa.eu/srv/api/records/87ce5171-d713-4eec-a1f3-2b9dd94cad91 + """ + + use_code_attribute = "uso_sigpac" + + extensions = { + "https://fiboa.org/crop-extension/v0.2.0/schema.yaml", + ADMIN_DIVISION, + } + column_additions = { + # https://www.euskadi.eus/contenidos/informacion/pac2015_pagosdirectos/es_def/adjuntos/Anexos_PAC_marzo2015.pdf + # https://www.fega.gob.es/sites/default/files/files/document/AD-CIRCULAR_2-2021_EE98293_SIGC2021.PDF + # Very generic list + "admin:country_code": "ES", + "crop:code_list": "https://fiboa.org/code/es/sigpac/land_use.csv", + } + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + assert self.id.startswith("es_"), "Assuming Spanish subclass" + + def code_filter(col): + return ~col.isin("AG/CA/ED/FO/IM/IS/IV/TH/ZC/ZU/ZV/MT".split("/") + [None]) + + self.column_filters = {self.use_code_attribute: code_filter} + self.column_additions["admin:subdivision_code"] = self.id[len("es_") :].upper() + + def migrate(self, gdf): + # This actually is a land use code. Not sure if we should put this in crop:code + rows = read_data_csv("es_coda_uso.csv") + mapping = {row["original_code"]: row["original_name"] for row in rows} + mapping_en = {row["original_code"]: row["name_en"] for row in rows} + gdf["crop:name"] = gdf[self.use_code_attribute].map(mapping) + gdf["crop:name_en"] = gdf[self.use_code_attribute].map(mapping_en) + return super().migrate(gdf) diff --git a/fiboa_cli/datasets/es_cb.py b/fiboa_cli/datasets/es_cb.py index 9d63ee5e..a2ca7096 100644 --- a/fiboa_cli/datasets/es_cb.py +++ b/fiboa_cli/datasets/es_cb.py @@ -1,7 +1,7 @@ import re from fiboa_cli.conversion.converter_rest import EsriRESTConverterMixin -from fiboa_cli.datasets.es import ESBaseConverter +from fiboa_cli.datasets.es_base import ESBaseConverter class ESCBConverter(EsriRESTConverterMixin, ESBaseConverter): diff --git a/fiboa_cli/datasets/es_cl.py b/fiboa_cli/datasets/es_cl.py index 573f0ceb..1d2cf4d9 100644 --- a/fiboa_cli/datasets/es_cl.py +++ b/fiboa_cli/datasets/es_cl.py @@ -4,7 +4,7 @@ import requests from loguru import logger -from .es import ESBaseConverter +from .es_base import ESBaseConverter regex = re.compile(r"\d+_(RECFE|BURGOS).*\.shp$") diff --git a/fiboa_cli/datasets/es_cm.py b/fiboa_cli/datasets/es_cm.py index c6e6f6ce..789ce436 100644 --- a/fiboa_cli/datasets/es_cm.py +++ b/fiboa_cli/datasets/es_cm.py @@ -3,7 +3,7 @@ import requests from fiboa_cli.conversion.converter_rest import EsriRESTConverterMixin -from fiboa_cli.datasets.es import ESBaseConverter +from fiboa_cli.datasets.es_base import ESBaseConverter class ESCMConverter(EsriRESTConverterMixin, ESBaseConverter): diff --git a/fiboa_cli/datasets/es_ex.py b/fiboa_cli/datasets/es_ex.py index 4ca7fe1f..867c08e3 100644 --- a/fiboa_cli/datasets/es_ex.py +++ b/fiboa_cli/datasets/es_ex.py @@ -3,7 +3,7 @@ import requests -from fiboa_cli.datasets.es import ESBaseConverter +from fiboa_cli.datasets.es_base import ESBaseConverter class EXConverter(ESBaseConverter): diff --git a/fiboa_cli/datasets/es_ga.py b/fiboa_cli/datasets/es_ga.py index 0e94d161..0e2889a6 100644 --- a/fiboa_cli/datasets/es_ga.py +++ b/fiboa_cli/datasets/es_ga.py @@ -1,5 +1,5 @@ from fiboa_cli.conversion.converter_rest import EsriRESTConverterMixin -from fiboa_cli.datasets.es import ESBaseConverter +from fiboa_cli.datasets.es_base import ESBaseConverter class ESGAConverter(EsriRESTConverterMixin, ESBaseConverter): diff --git a/fiboa_cli/datasets/es_ib.py b/fiboa_cli/datasets/es_ib.py index 2c2f9d7e..a7ff02cf 100644 --- a/fiboa_cli/datasets/es_ib.py +++ b/fiboa_cli/datasets/es_ib.py @@ -3,7 +3,7 @@ import pandas as pd from fiboa_cli.conversion.converter_rest import EsriRESTConverterMixin -from fiboa_cli.datasets.es import ESBaseConverter +from fiboa_cli.datasets.es_base import ESBaseConverter class ESIBConverter(EsriRESTConverterMixin, ESBaseConverter): diff --git a/fiboa_cli/datasets/es_md.py b/fiboa_cli/datasets/es_md.py index a69fd6f2..9f4f748d 100644 --- a/fiboa_cli/datasets/es_md.py +++ b/fiboa_cli/datasets/es_md.py @@ -1,4 +1,4 @@ -from .es import ESBaseConverter +from .es_base import ESBaseConverter class ESCLConverter(ESBaseConverter): diff --git a/fiboa_cli/datasets/es_nc.py b/fiboa_cli/datasets/es_nc.py index a06d3dd0..b84cf58f 100644 --- a/fiboa_cli/datasets/es_nc.py +++ b/fiboa_cli/datasets/es_nc.py @@ -6,7 +6,7 @@ from loguru import logger from vecorel_cli.vecorel.util import name_from_uri -from .es import ESBaseConverter +from .es_base import ESBaseConverter class NCConverter(ESBaseConverter): diff --git a/fiboa_cli/datasets/es_pv.py b/fiboa_cli/datasets/es_pv.py index e4fbe3b5..1ff293bd 100644 --- a/fiboa_cli/datasets/es_pv.py +++ b/fiboa_cli/datasets/es_pv.py @@ -2,7 +2,7 @@ import requests from loguru import logger -from .es import ESBaseConverter +from .es_base import ESBaseConverter class ESPVConverter(ESBaseConverter): diff --git a/fiboa_cli/datasets/es_vc.py b/fiboa_cli/datasets/es_vc.py index a03b9dae..9289babd 100644 --- a/fiboa_cli/datasets/es_vc.py +++ b/fiboa_cli/datasets/es_vc.py @@ -3,7 +3,7 @@ import requests -from .es import ESBaseConverter +from .es_base import ESBaseConverter class ESVCConverter(ESBaseConverter): diff --git a/fiboa_cli/registry.py b/fiboa_cli/registry.py index 56e0f04d..76562bc8 100644 --- a/fiboa_cli/registry.py +++ b/fiboa_cli/registry.py @@ -22,7 +22,7 @@ class FiboaRegistry(VecorelRegistry): "determination:details", ] required_extensions = [re.compile(spec_pattern)] - ignored_datasets = VecorelRegistry.ignored_datasets + ["es.py"] + ignored_datasets = VecorelRegistry.ignored_datasets + ["es_base.py"] def register_commands(self): from .convert import ConvertData diff --git a/tests/data-files/convert/es/1501_ALAVA_cd_2025_20250105.gpkg.zip b/tests/data-files/convert/es/1501_ALAVA_cd_2025_20250105.gpkg.zip new file mode 100644 index 00000000..a90e4560 Binary files /dev/null and b/tests/data-files/convert/es/1501_ALAVA_cd_2025_20250105.gpkg.zip differ diff --git a/tests/test_convert.py b/tests/test_convert.py index 2766e5b5..c614158a 100644 --- a/tests/test_convert.py +++ b/tests/test_convert.py @@ -43,6 +43,7 @@ "lv", "ie", "es_cat", + "es", "nz", "lt", "si", @@ -67,6 +68,7 @@ def _input_files(converter, *names): "br_ba_lem": _input_files("br_ba_lem", "LEM_dataset.zip"), "ch": _input_files("ch", "lwb_nutzungsflaechen_v2_0_lv95.gpkg"), "es_cat": _input_files("es_cat", "Cultius_DUN2023_GPKG.zip"), + "es": {"input_files": {f"{test_path}/es/1501_ALAVA_cd_2025_20250105.gpkg.zip": ["*.gpkg"]}}, "lv": _input_files("lv", "1_100.xml"), "nz": _input_files("nz", "irrigated-land-area-raw-2020-update.zip"), "jecam": _input_files("jecam", "BD_JECAM_CIRAD_2023_feb.shp"),