From 79d878dea30286f7c481fa4305bccc23953b52ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lu=C3=A3=20Bida=20Vacaro?= <luabidaa@gmail.com>
Date: Thu, 26 Feb 2026 20:02:55 -0300
Subject: [PATCH 1/6] feat: include a python API to dados.gov.br API

---
 condarecipe/pysus/meta.yaml   |   1 -
 poetry.lock                   | 408 ++++++++++++++++++++++++++++++----
 pyproject.toml                |   5 +-
 pysus/api/dadosgov/client.py  |  53 +++++
 pysus/api/dadosgov/models.py  |  83 +++++++
 pysus/api/dadosgov/schemas.py |   0
 pysus/ftp/README.md           |   0
 pysus/online_data/ESUS.py     |  98 --------
 pysus/preprocessing/ESUS.py   |  69 ------
 pysus/tests/test_esus.py      |  16 --
 setup.cfg                     |   2 +-
 11 files changed, 503 insertions(+), 232 deletions(-)
 create mode 100644 pysus/api/dadosgov/client.py
 create mode 100644 pysus/api/dadosgov/models.py
 create mode 100644 pysus/api/dadosgov/schemas.py
 create mode 100644 pysus/ftp/README.md
 delete mode 100644 pysus/online_data/ESUS.py
 delete mode 100644 pysus/preprocessing/ESUS.py
 delete mode 100644 pysus/tests/test_esus.py

diff --git a/condarecipe/pysus/meta.yaml b/condarecipe/pysus/meta.yaml
index 1eeaaef0..41f45164 100644
--- a/condarecipe/pysus/meta.yaml
+++ b/condarecipe/pysus/meta.yaml
@@ -32,7 +32,6 @@ requirements:
     - pyarrow
     - python
     - requests
-    - elasticsearch
 
 test:
   imports:
diff --git a/poetry.lock b/poetry.lock
index 331da2c8..daacd197 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -28,13 +28,25 @@ files = [
     {file = "alabaster-0.7.16.tar.gz", hash = "sha256:75a8b99c28a5dad50dd7f8ccdd447a121ddb3892da9e53d1ca5cca3106d58d65"},
 ]
 
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+description = "Reusable constraint types to use with typing.Annotated"
+optional = false
+python-versions = ">=3.8"
+groups = ["main", "dev"]
+files = [
+    {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"},
+    {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"},
+]
+
 [[package]]
 name = "anyio"
 version = "4.6.2.post1"
 description = "High level compatibility layer for multiple asynchronous event loop implementations"
 optional = false
 python-versions = ">=3.9"
-groups = ["docs"]
+groups = ["dev", "docs"]
 files = [
     {file = "anyio-4.6.2.post1-py3-none-any.whl", hash = "sha256:6d170c36fba3bdd840c73d3868c1e777e33676a69c3a72cf0a0d5d6d8009b61d"},
     {file = "anyio-4.6.2.post1.tar.gz", hash = "sha256:4c8bc31ccdb51c7f7bd251f51c609e038d63e34219b44aa86e47576389880b4c"},
@@ -64,6 +76,21 @@ files = [
     {file = "appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee"},
 ]
 
+[[package]]
+name = "argcomplete"
+version = "3.6.3"
+description = "Bash tab completion for argparse"
+optional = false
+python-versions = ">=3.8"
+groups = ["dev"]
+files = [
+    {file = "argcomplete-3.6.3-py3-none-any.whl", hash = "sha256:f5007b3a600ccac5d25bbce33089211dfd49eab4a7718da3f10e3082525a92ce"},
+    {file = "argcomplete-3.6.3.tar.gz", hash = "sha256:62e8ed4fd6a45864acc8235409461b72c9a28ee785a2011cc5eb78318786c89c"},
+]
+
+[package.extras]
+test = ["coverage", "mypy", "pexpect", "ruff", "wheel"]
+
 [[package]]
 name = "argon2-cffi"
 version = "23.1.0"
@@ -325,7 +352,7 @@ version = "2024.8.30"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
-groups = ["main", "dev", "docs", "geo"]
+groups = ["dev", "docs", "geo"]
 files = [
     {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"},
     {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"},
@@ -778,6 +805,40 @@ files = [
 docs = ["ipython", "matplotlib", "numpydoc", "sphinx"]
 tests = ["pytest", "pytest-cov", "pytest-xdist"]
 
+[[package]]
+name = "datamodel-code-generator"
+version = "0.54.0"
+description = "Datamodel Code Generator"
+optional = false
+python-versions = ">=3.10"
+groups = ["dev"]
+files = [
+    {file = "datamodel_code_generator-0.54.0-py3-none-any.whl", hash = "sha256:3156df7a7e8fa5a7c9a6d50836e5ba5abe0532f6b71eee6d73a0c8e1fb5b7e47"},
+    {file = "datamodel_code_generator-0.54.0.tar.gz", hash = "sha256:2b183598d049e265146a8224c35d1bb96a80a641ea8ecd2a82e6a0e97b56da6b"},
+]
+
+[package.dependencies]
+argcomplete = ">=2.10.1,<4"
+black = ">=19.10b0"
+genson = ">=1.2.1,<2"
+httpx = {version = ">=0.24.1", optional = true, markers = "extra == \"http\""}
+inflect = ">=4.1,<8"
+isort = ">=4.3.21,<8"
+jinja2 = ">=2.10.1,<4"
+packaging = "*"
+pydantic = ">=1.5"
+pyyaml = ">=6.0.1"
+tomli = {version = ">=2.2.1,<3", markers = "python_version <= \"3.11\""}
+
+[package.extras]
+all = ["graphql-core (>=3.2.3)", "httpx (>=0.24.1)", "openapi-spec-validator (>=0.2.8,<0.8)", "prance (>=0.18.2)", "pysnooper (>=0.4.1,<2)", "ruff (>=0.9.10)", "watchfiles (>=1.1)"]
+debug = ["pysnooper (>=0.4.1,<2)"]
+graphql = ["graphql-core (>=3.2.3)"]
+http = ["httpx (>=0.24.1)"]
+ruff = ["ruff (>=0.9.10)"]
+validation = ["openapi-spec-validator (>=0.2.8,<0.8)", "prance (>=0.18.2)"]
+watch = ["watchfiles (>=1.1)"]
+
 [[package]]
 name = "dateparser"
 version = "1.2.0"
@@ -913,28 +974,6 @@ files = [
     {file = "docutils-0.18.1.tar.gz", hash = "sha256:679987caf361a7539d76e584cbeddc311e3aee937877c87346f31debc63e9d06"},
 ]
 
-[[package]]
-name = "elasticsearch"
-version = "7.16.2"
-description = "Python client for Elasticsearch"
-optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4"
-groups = ["main"]
-files = [
-    {file = "elasticsearch-7.16.2-py2.py3-none-any.whl", hash = "sha256:c05aa792a52b1e6ad9d226340dc19165c4a491ac48fbd91af51ec839bf953210"},
-    {file = "elasticsearch-7.16.2.tar.gz", hash = "sha256:23ac0afb4398c48990e359ac73ab6963741bd05321345299c62d9d23e209eee2"},
-]
-
-[package.dependencies]
-certifi = "*"
-urllib3 = ">=1.21.1,<2"
-
-[package.extras]
-async = ["aiohttp (>=3,<4)"]
-develop = ["black", "coverage", "jinja2", "mock", "pytest", "pytest-cov", "pyyaml", "requests (>=2.0.0,<3.0.0)", "sphinx (<1.7)", "sphinx-rtd-theme"]
-docs = ["sphinx (<1.7)", "sphinx-rtd-theme"]
-requests = ["requests (>=2.4.0,<3.0.0)"]
-
 [[package]]
 name = "exceptiongroup"
 version = "1.2.2"
@@ -1235,6 +1274,18 @@ files = [
     {file = "future-1.0.0.tar.gz", hash = "sha256:bd2968309307861edae1458a4f8a4f3598c03be43b97521076aebf5d94c07b05"},
 ]
 
+[[package]]
+name = "genson"
+version = "1.3.0"
+description = "GenSON is a powerful, user-friendly JSON Schema generator."
+optional = false
+python-versions = "*"
+groups = ["dev"]
+files = [
+    {file = "genson-1.3.0-py3-none-any.whl", hash = "sha256:468feccd00274cc7e4c09e84b08704270ba8d95232aa280f65b986139cec67f7"},
+    {file = "genson-1.3.0.tar.gz", hash = "sha256:e02db9ac2e3fd29e65b5286f7135762e2cd8a986537c075b06fc5f1517308e37"},
+]
+
 [[package]]
 name = "geocoder"
 version = "1.38.1"
@@ -1260,7 +1311,7 @@ version = "0.14.0"
 description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
 optional = false
 python-versions = ">=3.7"
-groups = ["docs"]
+groups = ["dev", "docs"]
 files = [
     {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
     {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
@@ -1272,7 +1323,7 @@ version = "1.0.7"
 description = "A minimal low-level HTTP client."
 optional = false
 python-versions = ">=3.8"
-groups = ["docs"]
+groups = ["dev", "docs"]
 files = [
     {file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"},
     {file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"},
@@ -1294,7 +1345,7 @@ version = "0.27.2"
 description = "The next generation HTTP client."
 optional = false
 python-versions = ">=3.8"
-groups = ["docs"]
+groups = ["dev", "docs"]
 files = [
     {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
     {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
@@ -1371,6 +1422,30 @@ files = [
     {file = "imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a"},
 ]
 
+[[package]]
+name = "inflect"
+version = "7.5.0"
+description = "Correctly generate plurals, singular nouns, ordinals, indefinite articles"
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "inflect-7.5.0-py3-none-any.whl", hash = "sha256:2aea70e5e70c35d8350b8097396ec155ffd68def678c7ff97f51aa69c1d92344"},
+    {file = "inflect-7.5.0.tar.gz", hash = "sha256:faf19801c3742ed5a05a8ce388e0d8fe1a07f8d095c82201eb904f5d27ad571f"},
+]
+
+[package.dependencies]
+more_itertools = ">=8.5.0"
+typeguard = ">=4.0.1"
+
+[package.extras]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""]
+cover = ["pytest-cov"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+enabler = ["pytest-enabler (>=2.2)"]
+test = ["pygments", "pytest (>=6,!=8.1.*)"]
+type = ["pytest-mypy"]
+
 [[package]]
 name = "iniconfig"
 version = "2.0.0"
@@ -2128,6 +2203,18 @@ files = [
     {file = "mistune-3.0.2.tar.gz", hash = "sha256:fc7f93ded930c92394ef2cb6f04a8aabab4117a91449e72dcc8dfa646a508be8"},
 ]
 
+[[package]]
+name = "more-itertools"
+version = "10.8.0"
+description = "More routines for operating on iterables, beyond itertools"
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "more_itertools-10.8.0-py3-none-any.whl", hash = "sha256:52d4362373dcf7c52546bc4af9a86ee7c4579df9a8dc268be0a2f949d376cc9b"},
+    {file = "more_itertools-10.8.0.tar.gz", hash = "sha256:f638ddf8a1a0d134181275fb5d58b086ead7c6a72429ad725c67503f13ba30bd"},
+]
+
 [[package]]
 name = "mypy-extensions"
 version = "1.0.0"
@@ -2911,6 +2998,162 @@ files = [
     {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"},
 ]
 
+[[package]]
+name = "pydantic"
+version = "2.12.5"
+description = "Data validation using Python type hints"
+optional = false
+python-versions = ">=3.9"
+groups = ["main", "dev"]
+files = [
+    {file = "pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d"},
+    {file = "pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49"},
+]
+
+[package.dependencies]
+annotated-types = ">=0.6.0"
+pydantic-core = "2.41.5"
+typing-extensions = ">=4.14.1"
+typing-inspection = ">=0.4.2"
+
+[package.extras]
+email = ["email-validator (>=2.0.0)"]
+timezone = ["tzdata ; python_version >= \"3.9\" and platform_system == \"Windows\""]
+
+[[package]]
+name = "pydantic-core"
+version = "2.41.5"
+description = "Core functionality for Pydantic validation and serialization"
+optional = false
+python-versions = ">=3.9"
+groups = ["main", "dev"]
+files = [
+    {file = "pydantic_core-2.41.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:77b63866ca88d804225eaa4af3e664c5faf3568cea95360d21f4725ab6e07146"},
+    {file = "pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dfa8a0c812ac681395907e71e1274819dec685fec28273a28905df579ef137e2"},
+    {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5921a4d3ca3aee735d9fd163808f5e8dd6c6972101e4adbda9a4667908849b97"},
+    {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25c479382d26a2a41b7ebea1043564a937db462816ea07afa8a44c0866d52f9"},
+    {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f547144f2966e1e16ae626d8ce72b4cfa0caedc7fa28052001c94fb2fcaa1c52"},
+    {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f52298fbd394f9ed112d56f3d11aabd0d5bd27beb3084cc3d8ad069483b8941"},
+    {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:100baa204bb412b74fe285fb0f3a385256dad1d1879f0a5cb1499ed2e83d132a"},
+    {file = "pydantic_core-2.41.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:05a2c8852530ad2812cb7914dc61a1125dc4e06252ee98e5638a12da6cc6fb6c"},
+    {file = "pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:29452c56df2ed968d18d7e21f4ab0ac55e71dc59524872f6fc57dcf4a3249ed2"},
+    {file = "pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:d5160812ea7a8a2ffbe233d8da666880cad0cbaf5d4de74ae15c313213d62556"},
+    {file = "pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:df3959765b553b9440adfd3c795617c352154e497a4eaf3752555cfb5da8fc49"},
+    {file = "pydantic_core-2.41.5-cp310-cp310-win32.whl", hash = "sha256:1f8d33a7f4d5a7889e60dc39856d76d09333d8a6ed0f5f1190635cbec70ec4ba"},
+    {file = "pydantic_core-2.41.5-cp310-cp310-win_amd64.whl", hash = "sha256:62de39db01b8d593e45871af2af9e497295db8d73b085f6bfd0b18c83c70a8f9"},
+    {file = "pydantic_core-2.41.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a3a52f6156e73e7ccb0f8cced536adccb7042be67cb45f9562e12b319c119da6"},
+    {file = "pydantic_core-2.41.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b"},
+    {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a"},
+    {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7b576130c69225432866fe2f4a469a85a54ade141d96fd396dffcf607b558f8"},
+    {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6cb58b9c66f7e4179a2d5e0f849c48eff5c1fca560994d6eb6543abf955a149e"},
+    {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88942d3a3dff3afc8288c21e565e476fc278902ae4d6d134f1eeda118cc830b1"},
+    {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f31d95a179f8d64d90f6831d71fa93290893a33148d890ba15de25642c5d075b"},
+    {file = "pydantic_core-2.41.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c1df3d34aced70add6f867a8cf413e299177e0c22660cc767218373d0779487b"},
+    {file = "pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4009935984bd36bd2c774e13f9a09563ce8de4abaa7226f5108262fa3e637284"},
+    {file = "pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:34a64bc3441dc1213096a20fe27e8e128bd3ff89921706e83c0b1ac971276594"},
+    {file = "pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c9e19dd6e28fdcaa5a1de679aec4141f691023916427ef9bae8584f9c2fb3b0e"},
+    {file = "pydantic_core-2.41.5-cp311-cp311-win32.whl", hash = "sha256:2c010c6ded393148374c0f6f0bf89d206bf3217f201faa0635dcd56bd1520f6b"},
+    {file = "pydantic_core-2.41.5-cp311-cp311-win_amd64.whl", hash = "sha256:76ee27c6e9c7f16f47db7a94157112a2f3a00e958bc626e2f4ee8bec5c328fbe"},
+    {file = "pydantic_core-2.41.5-cp311-cp311-win_arm64.whl", hash = "sha256:4bc36bbc0b7584de96561184ad7f012478987882ebf9f9c389b23f432ea3d90f"},
+    {file = "pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7"},
+    {file = "pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0"},
+    {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69"},
+    {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75"},
+    {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05"},
+    {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc"},
+    {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c"},
+    {file = "pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5"},
+    {file = "pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c"},
+    {file = "pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294"},
+    {file = "pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1"},
+    {file = "pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d"},
+    {file = "pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815"},
+    {file = "pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3"},
+    {file = "pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9"},
+    {file = "pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34"},
+    {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0"},
+    {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33"},
+    {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e"},
+    {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2"},
+    {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586"},
+    {file = "pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d"},
+    {file = "pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740"},
+    {file = "pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e"},
+    {file = "pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858"},
+    {file = "pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36"},
+    {file = "pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11"},
+    {file = "pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd"},
+    {file = "pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a"},
+    {file = "pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14"},
+    {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1"},
+    {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66"},
+    {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869"},
+    {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2"},
+    {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375"},
+    {file = "pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553"},
+    {file = "pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90"},
+    {file = "pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07"},
+    {file = "pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb"},
+    {file = "pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23"},
+    {file = "pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf"},
+    {file = "pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0"},
+    {file = "pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a"},
+    {file = "pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3"},
+    {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c"},
+    {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612"},
+    {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d"},
+    {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9"},
+    {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660"},
+    {file = "pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9"},
+    {file = "pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3"},
+    {file = "pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf"},
+    {file = "pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470"},
+    {file = "pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa"},
+    {file = "pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c"},
+    {file = "pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008"},
+    {file = "pydantic_core-2.41.5-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:8bfeaf8735be79f225f3fefab7f941c712aaca36f1128c9d7e2352ee1aa87bdf"},
+    {file = "pydantic_core-2.41.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:346285d28e4c8017da95144c7f3acd42740d637ff41946af5ce6e5e420502dd5"},
+    {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a75dafbf87d6276ddc5b2bf6fae5254e3d0876b626eb24969a574fff9149ee5d"},
+    {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7b93a4d08587e2b7e7882de461e82b6ed76d9026ce91ca7915e740ecc7855f60"},
+    {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e8465ab91a4bd96d36dde3263f06caa6a8a6019e4113f24dc753d79a8b3a3f82"},
+    {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:299e0a22e7ae2b85c1a57f104538b2656e8ab1873511fd718a1c1c6f149b77b5"},
+    {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:707625ef0983fcfb461acfaf14de2067c5942c6bb0f3b4c99158bed6fedd3cf3"},
+    {file = "pydantic_core-2.41.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f41eb9797986d6ebac5e8edff36d5cef9de40def462311b3eb3eeded1431e425"},
+    {file = "pydantic_core-2.41.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0384e2e1021894b1ff5a786dbf94771e2986ebe2869533874d7e43bc79c6f504"},
+    {file = "pydantic_core-2.41.5-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:f0cd744688278965817fd0839c4a4116add48d23890d468bc436f78beb28abf5"},
+    {file = "pydantic_core-2.41.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:753e230374206729bf0a807954bcc6c150d3743928a73faffee51ac6557a03c3"},
+    {file = "pydantic_core-2.41.5-cp39-cp39-win32.whl", hash = "sha256:873e0d5b4fb9b89ef7c2d2a963ea7d02879d9da0da8d9d4933dee8ee86a8b460"},
+    {file = "pydantic_core-2.41.5-cp39-cp39-win_amd64.whl", hash = "sha256:e4f4a984405e91527a0d62649ee21138f8e3d0ef103be488c1dc11a80d7f184b"},
+    {file = "pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034"},
+    {file = "pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c"},
+    {file = "pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2"},
+    {file = "pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad"},
+    {file = "pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd"},
+    {file = "pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc"},
+    {file = "pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56"},
+    {file = "pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b"},
+    {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b5819cd790dbf0c5eb9f82c73c16b39a65dd6dd4d1439dcdea7816ec9adddab8"},
+    {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5a4e67afbc95fa5c34cf27d9089bca7fcab4e51e57278d710320a70b956d1b9a"},
+    {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ece5c59f0ce7d001e017643d8d24da587ea1f74f6993467d85ae8a5ef9d4f42b"},
+    {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:16f80f7abe3351f8ea6858914ddc8c77e02578544a0ebc15b4c2e1a0e813b0b2"},
+    {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:33cb885e759a705b426baada1fe68cbb0a2e68e34c5d0d0289a364cf01709093"},
+    {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:c8d8b4eb992936023be7dee581270af5c6e0697a8559895f527f5b7105ecd36a"},
+    {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:242a206cd0318f95cd21bdacff3fcc3aab23e79bba5cac3db5a841c9ef9c6963"},
+    {file = "pydantic_core-2.41.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d3a978c4f57a597908b7e697229d996d77a6d3c94901e9edee593adada95ce1a"},
+    {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26"},
+    {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808"},
+    {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc"},
+    {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:287dad91cfb551c363dc62899a80e9e14da1f0e2b6ebde82c806612ca2a13ef1"},
+    {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:03b77d184b9eb40240ae9fd676ca364ce1085f203e1b1256f8ab9984dca80a84"},
+    {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:a668ce24de96165bb239160b3d854943128f4334822900534f2fe947930e5770"},
+    {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f14f8f046c14563f8eb3f45f499cc658ab8d10072961e07225e507adb700e93f"},
+    {file = "pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51"},
+    {file = "pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e"},
+]
+
+[package.dependencies]
+typing-extensions = ">=4.14.1"
+
 [[package]]
 name = "pyflakes"
 version = "2.5.0"
@@ -3659,7 +3902,7 @@ version = "1.3.1"
 description = "Sniff out which async library your code is running under"
 optional = false
 python-versions = ">=3.7"
-groups = ["docs"]
+groups = ["dev", "docs"]
 files = [
     {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
     {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
@@ -3922,16 +4165,61 @@ test = ["pytest", "ruff"]
 
 [[package]]
 name = "tomli"
-version = "2.1.0"
+version = "2.4.0"
 description = "A lil' TOML parser"
 optional = false
 python-versions = ">=3.8"
 groups = ["dev", "docs"]
 files = [
-    {file = "tomli-2.1.0-py3-none-any.whl", hash = "sha256:a5c57c3d1c56f5ccdf89f6523458f60ef716e210fc47c4cfb188c5ba473e0391"},
-    {file = "tomli-2.1.0.tar.gz", hash = "sha256:3f646cae2aec94e17d04973e4249548320197cfabdf130015d023de4b74d8ab8"},
-]
-markers = {dev = "python_version == \"3.10\""}
+    {file = "tomli-2.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b5ef256a3fd497d4973c11bf142e9ed78b150d36f5773f1ca6088c230ffc5867"},
+    {file = "tomli-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5572e41282d5268eb09a697c89a7bee84fae66511f87533a6f88bd2f7b652da9"},
+    {file = "tomli-2.4.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:551e321c6ba03b55676970b47cb1b73f14a0a4dce6a3e1a9458fd6d921d72e95"},
+    {file = "tomli-2.4.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5e3f639a7a8f10069d0e15408c0b96a2a828cfdec6fca05296ebcdcc28ca7c76"},
+    {file = "tomli-2.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1b168f2731796b045128c45982d3a4874057626da0e2ef1fdd722848b741361d"},
+    {file = "tomli-2.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:133e93646ec4300d651839d382d63edff11d8978be23da4cc106f5a18b7d0576"},
+    {file = "tomli-2.4.0-cp311-cp311-win32.whl", hash = "sha256:b6c78bdf37764092d369722d9946cb65b8767bfa4110f902a1b2542d8d173c8a"},
+    {file = "tomli-2.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3d1654e11d724760cdb37a3d7691f0be9db5fbdaef59c9f532aabf87006dbaa"},
+    {file = "tomli-2.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:cae9c19ed12d4e8f3ebf46d1a75090e4c0dc16271c5bce1c833ac168f08fb614"},
+    {file = "tomli-2.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:920b1de295e72887bafa3ad9f7a792f811847d57ea6b1215154030cf131f16b1"},
+    {file = "tomli-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7d6d9a4aee98fac3eab4952ad1d73aee87359452d1c086b5ceb43ed02ddb16b8"},
+    {file = "tomli-2.4.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:36b9d05b51e65b254ea6c2585b59d2c4cb91c8a3d91d0ed0f17591a29aaea54a"},
+    {file = "tomli-2.4.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1c8a885b370751837c029ef9bc014f27d80840e48bac415f3412e6593bbc18c1"},
+    {file = "tomli-2.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8768715ffc41f0008abe25d808c20c3d990f42b6e2e58305d5da280ae7d1fa3b"},
+    {file = "tomli-2.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7b438885858efd5be02a9a133caf5812b8776ee0c969fea02c45e8e3f296ba51"},
+    {file = "tomli-2.4.0-cp312-cp312-win32.whl", hash = "sha256:0408e3de5ec77cc7f81960c362543cbbd91ef883e3138e81b729fc3eea5b9729"},
+    {file = "tomli-2.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:685306e2cc7da35be4ee914fd34ab801a6acacb061b6a7abca922aaf9ad368da"},
+    {file = "tomli-2.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:5aa48d7c2356055feef06a43611fc401a07337d5b006be13a30f6c58f869e3c3"},
+    {file = "tomli-2.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:84d081fbc252d1b6a982e1870660e7330fb8f90f676f6e78b052ad4e64714bf0"},
+    {file = "tomli-2.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9a08144fa4cba33db5255f9b74f0b89888622109bd2776148f2597447f92a94e"},
+    {file = "tomli-2.4.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c73add4bb52a206fd0c0723432db123c0c75c280cbd67174dd9d2db228ebb1b4"},
+    {file = "tomli-2.4.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1fb2945cbe303b1419e2706e711b7113da57b7db31ee378d08712d678a34e51e"},
+    {file = "tomli-2.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bbb1b10aa643d973366dc2cb1ad94f99c1726a02343d43cbc011edbfac579e7c"},
+    {file = "tomli-2.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4cbcb367d44a1f0c2be408758b43e1ffb5308abe0ea222897d6bfc8e8281ef2f"},
+    {file = "tomli-2.4.0-cp313-cp313-win32.whl", hash = "sha256:7d49c66a7d5e56ac959cb6fc583aff0651094ec071ba9ad43df785abc2320d86"},
+    {file = "tomli-2.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:3cf226acb51d8f1c394c1b310e0e0e61fecdd7adcb78d01e294ac297dd2e7f87"},
+    {file = "tomli-2.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:d20b797a5c1ad80c516e41bc1fb0443ddb5006e9aaa7bda2d71978346aeb9132"},
+    {file = "tomli-2.4.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:26ab906a1eb794cd4e103691daa23d95c6919cc2fa9160000ac02370cc9dd3f6"},
+    {file = "tomli-2.4.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:20cedb4ee43278bc4f2fee6cb50daec836959aadaf948db5172e776dd3d993fc"},
+    {file = "tomli-2.4.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:39b0b5d1b6dd03684b3fb276407ebed7090bbec989fa55838c98560c01113b66"},
+    {file = "tomli-2.4.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a26d7ff68dfdb9f87a016ecfd1e1c2bacbe3108f4e0f8bcd2228ef9a766c787d"},
+    {file = "tomli-2.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:20ffd184fb1df76a66e34bd1b36b4a4641bd2b82954befa32fe8163e79f1a702"},
+    {file = "tomli-2.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:75c2f8bbddf170e8effc98f5e9084a8751f8174ea6ccf4fca5398436e0320bc8"},
+    {file = "tomli-2.4.0-cp314-cp314-win32.whl", hash = "sha256:31d556d079d72db7c584c0627ff3a24c5d3fb4f730221d3444f3efb1b2514776"},
+    {file = "tomli-2.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:43e685b9b2341681907759cf3a04e14d7104b3580f808cfde1dfdb60ada85475"},
+    {file = "tomli-2.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:3d895d56bd3f82ddd6faaff993c275efc2ff38e52322ea264122d72729dca2b2"},
+    {file = "tomli-2.4.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:5b5807f3999fb66776dbce568cc9a828544244a8eb84b84b9bafc080c99597b9"},
+    {file = "tomli-2.4.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c084ad935abe686bd9c898e62a02a19abfc9760b5a79bc29644463eaf2840cb0"},
+    {file = "tomli-2.4.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0f2e3955efea4d1cfbcb87bc321e00dc08d2bcb737fd1d5e398af111d86db5df"},
+    {file = "tomli-2.4.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0e0fe8a0b8312acf3a88077a0802565cb09ee34107813bba1c7cd591fa6cfc8d"},
+    {file = "tomli-2.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:413540dce94673591859c4c6f794dfeaa845e98bf35d72ed59636f869ef9f86f"},
+    {file = "tomli-2.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0dc56fef0e2c1c470aeac5b6ca8cc7b640bb93e92d9803ddaf9ea03e198f5b0b"},
+    {file = "tomli-2.4.0-cp314-cp314t-win32.whl", hash = "sha256:d878f2a6707cc9d53a1be1414bbb419e629c3d6e67f69230217bb663e76b5087"},
+    {file = "tomli-2.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:2add28aacc7425117ff6364fe9e06a183bb0251b03f986df0e78e974047571fd"},
+    {file = "tomli-2.4.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2b1e3b80e1d5e52e40e9b924ec43d81570f0e7d09d11081b797bc4692765a3d4"},
+    {file = "tomli-2.4.0-py3-none-any.whl", hash = "sha256:1f776e7d669ebceb01dee46484485f43a4048746235e683bcdffacdf1fb4785a"},
+    {file = "tomli-2.4.0.tar.gz", hash = "sha256:aa89c3f6c277dd275d8e243ad24f3b5e701491a860d5121f2cdd399fbb31fc9c"},
+]
+markers = {dev = "python_version < \"3.12\""}
 
 [[package]]
 name = "tornado"
@@ -3991,6 +4279,21 @@ files = [
 docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"]
 test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"]
 
+[[package]]
+name = "typeguard"
+version = "4.5.1"
+description = "Run-time type checker for Python"
+optional = false
+python-versions = ">=3.9"
+groups = ["dev"]
+files = [
+    {file = "typeguard-4.5.1-py3-none-any.whl", hash = "sha256:44d2bf329d49a244110a090b55f5f91aa82d9a9834ebfd30bcc73651e4a8cc40"},
+    {file = "typeguard-4.5.1.tar.gz", hash = "sha256:f6f8ecbbc819c9bc749983cc67c02391e16a9b43b8b27f15dc70ed7c4a007274"},
+]
+
+[package.dependencies]
+typing_extensions = ">=4.14.0"
+
 [[package]]
 name = "types-python-dateutil"
 version = "2.9.0.20241003"
@@ -4005,17 +4308,32 @@ files = [
 
 [[package]]
 name = "typing-extensions"
-version = "4.12.2"
-description = "Backported and Experimental Type Hints for Python 3.8+"
+version = "4.15.0"
+description = "Backported and Experimental Type Hints for Python 3.9+"
 optional = false
-python-versions = ">=3.8"
-groups = ["main", "docs"]
+python-versions = ">=3.9"
+groups = ["main", "dev", "docs"]
 files = [
-    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
-    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
+    {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"},
+    {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"},
 ]
 markers = {docs = "python_version < \"3.12\""}
 
+[[package]]
+name = "typing-inspection"
+version = "0.4.2"
+description = "Runtime typing introspection tools"
+optional = false
+python-versions = ">=3.9"
+groups = ["main", "dev"]
+files = [
+    {file = "typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7"},
+    {file = "typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464"},
+]
+
+[package.dependencies]
+typing-extensions = ">=4.12.0"
+
 [[package]]
 name = "tzdata"
 version = "2024.2"
@@ -4048,14 +4366,14 @@ devenv = ["check-manifest", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)
 
 [[package]]
 name = "unidecode"
-version = "1.3.8"
+version = "1.4.0"
 description = "ASCII transliterations of Unicode text"
 optional = false
-python-versions = ">=3.5"
+python-versions = ">=3.7"
 groups = ["main"]
 files = [
-    {file = "Unidecode-1.3.8-py3-none-any.whl", hash = "sha256:d130a61ce6696f8148a3bd8fe779c99adeb4b870584eeb9526584e9aa091fd39"},
-    {file = "Unidecode-1.3.8.tar.gz", hash = "sha256:cfdb349d46ed3873ece4586b96aa75258726e2fa8ec21d6f00a591d98806c2f4"},
+    {file = "Unidecode-1.4.0-py3-none-any.whl", hash = "sha256:c3c7606c27503ad8d501270406e345ddb480a7b5f38827eafe4fa82a137f0021"},
+    {file = "Unidecode-1.4.0.tar.gz", hash = "sha256:ce35985008338b676573023acc382d62c264f307c8f7963733405add37ea2b23"},
 ]
 
 [[package]]
@@ -4079,7 +4397,7 @@ version = "1.26.20"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
-groups = ["main", "dev", "docs", "geo"]
+groups = ["dev", "docs", "geo"]
 files = [
     {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"},
     {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"},
@@ -4223,4 +4541,4 @@ preprocessing = []
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<3.14"
-content-hash = "366b0eac64aa0a754cb64d4b487807570e290a57ca811bad2295b3efa5e593a0"
+content-hash = "5a9f9bf4dbb0bcce1c501595176b4f03faed1ee0a5c7c9581d366606e7cddb1c"
diff --git a/pyproject.toml b/pyproject.toml
index cc22b17b..28f231d9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,12 +23,12 @@ Unidecode = "^1.3.6"
 dateparser = "^1.1.8"
 pandas = "^2.2.2"
 urwid = "^2.1.2"
-elasticsearch = { version = "7.16.2", extras=["preprocessing"] }
 # FTP
 bigtree = "^0.12.2"
 aioftp = "^0.21.4"
 humanize = "^4.8.0"
-typing-extensions = "^4.9.0"
+typing-extensions = ">=4.10.0"
+pydantic = "^2.12.5"
 
 [tool.poetry.group.dev.dependencies]
 pytest = ">=6.1.0"
@@ -39,6 +39,7 @@ pre-commit = "^2.20.0"
 pytest-timeout = "^2.1.0"
 nbsphinx = "^0.9.3"
 pytest-retry = "1.7.0"
+datamodel-code-generator = {extras = ["http"], version = "^0.54.0"}
 
 [tool.poetry.group.docs.dependencies]
 sphinx = "^5.1.1"
diff --git a/pysus/api/dadosgov/client.py b/pysus/api/dadosgov/client.py
new file mode 100644
index 00000000..54b45691
--- /dev/null
+++ b/pysus/api/dadosgov/client.py
@@ -0,0 +1,53 @@
+import requests
+from typing import List, Optional
+from pydantic import TypeAdapter
+from pysus.api.dadosgov.models import (
+    DatasetDetail,
+    DatasetSummary,
+)
+from pysus import __version__
+
+
+class DadosGov:
+    def __init__(self, token: str):
+        self.base_url = "https://dados.gov.br/dados/api"
+        self.session = requests.Session()
+        self.session.headers.update(
+            {
+                "Accept": "application/json",
+                "User-Agent": f"PySUS/{__version__}",
+                "chave-api-dados-abertos": token,
+            }
+        )
+
+    def _get(self, endpoint: str, params: Optional[dict] = None):
+        url = f"{self.base_url}/{endpoint.lstrip('/')}"
+        response = self.session.get(url, params=params)
+        response.raise_for_status()
+        return response.json()
+
+    def list_datasets(
+        self,
+        pagina: int = 1,
+        nome_conjunto: Optional[str] = None,
+        dados_abertos: Optional[bool] = None,
+        is_privado: bool = False,
+        id_organizacao: Optional[str] = None,
+    ) -> List[DatasetSummary]:
+        params = {
+            "pagina": pagina,
+            "nomeConjuntoDados": nome_conjunto,
+            "dadosAbertos": dados_abertos,
+            "isPrivado": is_privado,
+            "idOrganizacao": id_organizacao,
+        }
+
+        params = {k: v for k, v in params.items() if v is not None}
+
+        data = self._get("/publico/conjuntos-dados", params=params)
+        adapter = TypeAdapter(List[DatasetSummary])
+        return adapter.validate_python(data)
+
+    def get_dataset(self, id: str) -> DatasetDetail:
+        data = self._get(f"/publico/conjuntos-dados/{id}")
+        return DatasetDetail.model_validate(data)
diff --git a/pysus/api/dadosgov/models.py b/pysus/api/dadosgov/models.py
new file mode 100644
index 00000000..149cb0fb
--- /dev/null
+++ b/pysus/api/dadosgov/models.py
@@ -0,0 +1,83 @@
+from pydantic import BaseModel, Field, BeforeValidator
+from datetime import datetime as dt
+from typing import Optional, List, Any, Annotated
+
+
+def to_datetime(value: Any) -> Optional[dt]:
+    if not value or not isinstance(value, str) or "Indisponível" in value:
+        return None
+    try:
+        return dt.strptime(value, "%d/%m/%Y %H:%M:%S")
+    except ValueError:
+        try:
+            return dt.strptime(value, "%d/%m/%Y")
+        except ValueError:
+            return None
+
+
+def to_bool(value: Any) -> bool:
+    if isinstance(value, bool):
+        return value
+    return str(value).lower() in ("sim", "true", "1")
+
+
+DateTime = Annotated[Optional[dt], BeforeValidator(to_datetime)]
+Bool = Annotated[bool, BeforeValidator(to_bool)]
+
+
+class Tag(BaseModel):
+    id: str
+    name: str
+    display_name: Optional[str] = None
+
+
+class Resource(BaseModel):
+    id: str
+    title: str = Field(alias="titulo")
+    description: str = Field(alias="descricao")
+    url: str = Field(alias="link")
+    format: str = Field(alias="formato")
+    size: int = Field(alias="tamanho")
+    cataloging_date: DateTime = Field(None, alias="dataCatalogacao")
+    last_modified: DateTime = Field(None, alias="dataUltimaAtualizacaoArquivo")
+    download_count: Optional[int] = Field(None, alias="quantidadeDownloads")
+    file_name: Optional[str] = Field(None, alias="nomeArquivo")
+    resource_type: Optional[str] = Field(None, alias="tipo")
+    order_number: Optional[int] = Field(None, alias="numOrdem")
+    dataset_id: Optional[str] = Field(None, alias="idConjuntoDados")
+
+
+class DatasetDetail(BaseModel):
+    id: str
+    title: str = Field(alias="titulo")
+    slug: str = Field(alias="nome")
+    organization: str = Field(alias="organizacao")
+    description: str = Field(alias="descricao")
+    license: Optional[str] = Field(None, alias="licenca")
+    maintainer: Optional[str] = Field(None, alias="responsavel")
+    maintainer_email: Optional[str] = Field(None, alias="emailResponsavel")
+    frequency: Optional[str] = Field(None, alias="periodicidade")
+    themes: List[Any] = Field(default_factory=list, alias="temas")
+    tags: List[Tag] = Field(default_factory=list)
+    resources: List[Resource] = Field(default_factory=list, alias="recursos")
+    is_open_data: Bool = Field(alias="dadosAbertos")
+    is_discontinued: Bool = Field(alias="descontinuado")
+    is_private: Bool = Field(False, alias="privado")
+    metadata_updated: DateTime = Field(None, alias="dataUltimaAtualizacaoMetadados")
+    file_updated: DateTime = Field(None, alias="dataUltimaAtualizacaoArquivo")
+    cataloging_date: DateTime = Field(None, alias="dataCatalogacao")
+    visibility: str = Field(alias="visibilidade")
+    status: Optional[str] = Field(None, alias="atualizado")
+    seal: Optional[str] = Field(None, alias="selo")
+    source: Optional[str] = Field(None, alias="origemCadastro")
+
+
+class DatasetSummary(BaseModel):
+    id: str
+    title: str
+    name: str = Field(alias="nome")
+    organization_name: str = Field(alias="nomeOrganizacao")
+    is_updated: Bool = Field(alias="isAtualizado")
+    cataloging_date: DateTime = Field(None, alias="catalogacao")
+    metadata_modified: DateTime = Field(None, alias="ultimaAlteracaoMetadados")
+    last_update: DateTime = Field(None, alias="ultimaAtualizacaoDados")
diff --git a/pysus/api/dadosgov/schemas.py b/pysus/api/dadosgov/schemas.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pysus/ftp/README.md b/pysus/ftp/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/pysus/online_data/ESUS.py b/pysus/online_data/ESUS.py
deleted file mode 100644
index ef2b990e..00000000
--- a/pysus/online_data/ESUS.py
+++ /dev/null
@@ -1,98 +0,0 @@
-import os
-from datetime import date
-
-import pandas as pd
-from elasticsearch import Elasticsearch, helpers
-from loguru import logger
-from pysus.ftp import CACHEPATH
-
-
-def download(uf, cache=True, checkmemory=True):
-    """
-    Download ESUS data by UF
-    :param uf: rj, mg, etc
-    :param cache: if results should be cached on disk
-    :return: DataFrame if data fits in memory,
-        other an iterator of chunks of size 1000.
-    """
-    uf = uf.lower()
-    user = "user-public-notificacoes"
-    pwd = "Za4qNXdyQNSa9YaA"
-    today = date.today()
-    dt = today.strftime("_%d_%m_%Y")
-    base = f"desc-esus-notifica-estado-{uf}"  # desc-notificacoes-esusve-
-    url = f"https://{user}:{pwd}@elasticsearch-saps.saude.gov.br"  # noqa: E231
-    out = f"ESUS_{uf}_{dt}.parquet"
-
-    cachefile = os.path.join(CACHEPATH, out)
-    tempfile = os.path.join(CACHEPATH, f"ESUS_temp_{uf.upper()}.csv.gz")
-    if os.path.exists(cachefile):
-        logger.info(f"Local parquet file found at {cachefile}")
-        df = pd.read_parquet(cachefile)
-    elif os.path.exists(tempfile):
-        logger.info(f"Local csv file found at {tempfile}")
-        df = pd.read_csv(tempfile, chunksize=1000)
-    else:
-        fname = fetch(base, uf, url)
-        size = os.stat(fname).st_size
-        if size > 50e6 and checkmemory:
-            print(f"Downloaded data is to large: {size / 1e6} MB compressed.")
-            print(
-                "Only loading the first 1000 rows. If your computer has enough"
-                + " memory, set 'checkmemory' to False"
-            )
-            print(f"The full data is in {fname}")
-            df = pd.read_csv(fname, chunksize=1000)
-        else:
-            df = pd.read_csv(fname, low_memory=False)
-            print(f"{df.shape[0]} records downloaded.")
-            os.unlink(fname)
-            if cache:
-                df.to_parquet(cachefile)
-                logger.info(f"Data stored as parquet at {cachefile}")
-
-    return df
-
-
-def fetch(base, uf, url):
-    UF = uf.upper()
-    print(f"Reading ESUS data for {UF}")
-    es = Elasticsearch([url], send_get_body_as="POST")
-    body = {"query": {"match_all": {}}}
-    results = helpers.scan(es, query=body, index=base)
-    # df = pd.DataFrame.from_dict(
-    # [document['_source'] for document in results]
-    # )
-
-    chunker = chunky_fetch(results, 3000)
-    h = 1
-    tempfile = os.path.join(CACHEPATH, f"ESUS_temp_{UF}.csv.gz")
-    for ch in chunker:
-        df = pd.DataFrame.from_dict(ch)
-        df.sintomas = df["sintomas"].str.replace(
-            ";",
-            "",
-        )  # remove os  ;
-        if h:
-            df.to_csv(tempfile)
-            h = 0
-        else:
-            df.to_csv(tempfile, mode="a", header=False)
-    # df = pd.read_csv('temp.csv.gz')
-
-    return tempfile
-
-
-def chunky_fetch(results, chunk_size=3000):
-    """Fetches data in chunks to preserve memory"""
-    data = []
-    i = 0
-    for d in results:
-        data.append(d["_source"])
-        i += 1
-        if i == chunk_size:
-            yield data
-            data = []
-            i = 0
-    else:
-        yield data
diff --git a/pysus/preprocessing/ESUS.py b/pysus/preprocessing/ESUS.py
deleted file mode 100644
index 110215c6..00000000
--- a/pysus/preprocessing/ESUS.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import numpy as np
-import pandas as pd
-from pysus.online_data.ESUS import download
-
-
-def cases_by_age_and_sex(UF, start="2020-03-01", end="2020-08-31"):
-    """
-    Fetches ESUS covid line list and aggregates by age and sex returning these
-    counts between start and end dates.
-    :param UF: State code
-    :param start: Start date
-    :param end: end date
-    :return: dataframe
-    """
-    df = download(uf=UF)
-
-    # Transformando as colunas em datetime type
-    for cname in df:
-        if cname.startswith("data"):
-            df[cname] = pd.to_datetime(df[cname], errors="coerce")
-
-    # Eliminando os valores nulos nas colunas com datas importantes
-    old_size = len(df)
-    df.dropna(
-        subset=["dataNotificacao", "dataInicioSintomas", "dataTeste"],
-        inplace=True,
-    )
-    print(
-        f"Removed {old_size - len(df)} rows with missing dates of symptoms, "
-        "notification or testing"
-    )
-
-    # Desconsiderando os resultados negativos ou inconclusivos
-    df = df.loc[
-        ~df.resultadoTeste.isin(["Negativo", "Inconclusivo ou Indeterminado"])
-    ]
-
-    # Removendo sexo indeterminado
-    df = df.loc[df.sexo.isin(["Masculino", "Feminino"])]
-
-    # determinando a data dos primeiros sintomas como a data do index
-
-    df["datesint"] = df["dataInicioSintomas"]
-    df.set_index("datesint", inplace=True)
-    df.sort_index(inplace=True, ascending=True)
-
-    # vamos limitar a data inicial e a data final considerando apenas a
-    # primeira onda
-
-    df = df.loc[start:end]
-
-    ini = np.arange(0, 81, 5)
-    fin = np.arange(5, 86, 5)
-    fin[-1] = 120
-    faixa_etaria = {
-        f"[{i},{f})": (i, f) for i, f in zip(ini, fin)  # noqa: E231
-    }
-
-    labels = list(faixa_etaria.keys())
-    df["faixa_etaria"] = [
-        labels[i - 1] for i in np.digitize(df.idade, bins=ini)
-    ]
-
-    agreg = (
-        df[["sexo", "faixa_etaria"]].groupby(["faixa_etaria", "sexo"]).size()
-    )
-    agreg = agreg.reset_index()
-    agreg.columns = ["faixa_etaria", "sexo", "n"]
-    return agreg
diff --git a/pysus/tests/test_esus.py b/pysus/tests/test_esus.py
deleted file mode 100644
index 68f159bc..00000000
--- a/pysus/tests/test_esus.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import unittest
-
-import pytest
-from pysus.online_data.ESUS import download
-
-
-class MyTestCase(unittest.TestCase):
-    @pytest.mark.skip(reason="This test takes too long")
-    @pytest.mark.timeout(5)
-    def test_download(self):
-        df = download(uf="se")
-        self.assertGreater(len(df), 0)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/setup.cfg b/setup.cfg
index 157d9dfb..f4ccdd89 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -18,7 +18,7 @@ max-line-length = 79
 ignore = D202,D203,W503,E203
 
 [isort]
-known_third_party = dbfread,elasticsearch,geobr,geocoder,numpy,pandas,pyarrow,pyreaddbc,requests,tqdm,urllib3
+known_third_party = dbfread,geobr,geocoder,numpy,pandas,pyarrow,pyreaddbc,requests,tqdm,urllib3
 ensure_newline_before_comments=true
 line_length = 79
 multi_line_output = 3

From a20f08003473b33db5ee51d940395e1ab6859424 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lu=C3=A3=20Bida=20Vacaro?= <luabidaa@gmail.com>
Date: Tue, 3 Mar 2026 12:23:22 -0300
Subject: [PATCH 2/6] refactor: refactor pysus structure

BREAKING CHANGE: version 2.0.0
---
 poetry.lock                                   |  56 +++++++-
 pyproject.toml                                |   1 +
 pysus/__init__.py                             |   3 +-
 pysus/{ftp => api}/README.md                  |   0
 pysus/api/dadosgov/{schemas.py => README.md}  |   0
 pysus/{utilities => api/dadosgov}/__init__.py |   0
 pysus/api/ducklake/README.md                  |   0
 pysus/api/ducklake/__init__.py                |   0
 pysus/api/ducklake/catalog/models.py          |   0
 pysus/api/ducklake/client.py                  |   0
 pysus/api/ftp/README.md                       |   0
 pysus/api/ftp/__init__.py                     |   3 +
 pysus/{ftp/__init__.py => api/ftp/client.py}  |  33 ++---
 pysus/{ => api}/ftp/databases/__init__.py     |   0
 pysus/{ => api}/ftp/databases/ciha.py         |  13 +-
 pysus/{ => api}/ftp/databases/cnes.py         |  12 +-
 pysus/{ => api}/ftp/databases/ibge_datasus.py |   8 +-
 pysus/{ => api}/ftp/databases/pni.py          |  18 +--
 pysus/{ => api}/ftp/databases/sia.py          |  13 +-
 pysus/{ => api}/ftp/databases/sih.py          |  13 +-
 pysus/{ => api}/ftp/databases/sim.py          |   4 +-
 pysus/{ => api}/ftp/databases/sinan.py        |  13 +-
 pysus/{ => api}/ftp/databases/sinasc.py       |   4 +-
 pysus/data/local.py                           |  12 +-
 pysus/{ => data}/metadata/SINAN/ANIM.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/BOTU.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/CHAG.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/CHIK.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/COLE.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/COQU.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/DENG.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/DIFT.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/ESQU.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/FAMA.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/FMAC.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/FTIF.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/HANS.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/HANT.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/HEPA.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/IEXO.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/LEIV.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/LEPT.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/LTAN.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/MALA.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/MENI.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/PEST.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/RAIV.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/SIFC.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/SIFG.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/TETA.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/TETN.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/TUBE.tar.gz   | Bin
 pysus/{ => data}/metadata/SINAN/typecast.py   |   0
 pysus/data/metadata/__init__.py               |   0
 pysus/{ => data}/preprocessing/SIM.py         |   0
 pysus/{ => data}/preprocessing/__init__.py    |   0
 pysus/{online_data => data/remote}/CIHA.py    |   8 +-
 pysus/{online_data => data/remote}/CNES.py    |   5 +-
 pysus/{online_data => data/remote}/IBGE.py    |  17 +--
 .../remote}/Infodengue.py                     |   0
 pysus/{online_data => data/remote}/PNI.py     |   6 +-
 pysus/{online_data => data/remote}/SIA.py     |   6 +-
 pysus/{online_data => data/remote}/SIH.py     |   6 +-
 pysus/{online_data => data/remote}/SIM.py     |  51 +++----
 pysus/{online_data => data/remote}/SINAN.py   |   9 +-
 pysus/{online_data => data/remote}/SINASC.py  |   6 +-
 .../{online_data => data/remote}/__init__.py  |   0
 .../{online_data => data/remote}/territory.py |   2 +-
 pysus/{online_data => data/remote}/vaccine.py |   6 +-
 pysus/ftp/utils.py                            |  28 ----
 pysus/online_data/Infogripe.py                |  23 ----
 pysus/preprocessing/sinan.py                  | 127 ------------------
 pysus/utils/__init__.py                       |  25 ++++
 pysus/{utilities => utils}/brasil.py          |  23 ++++
 pysus/{preprocessing => utils}/decoders.py    |  27 ++--
 .../{dataset => utils}/geocode_by_cities.json |   0
 pysus/{utilities => utils}/municipios.json    |   0
 77 files changed, 233 insertions(+), 348 deletions(-)
 rename pysus/{ftp => api}/README.md (100%)
 rename pysus/api/dadosgov/{schemas.py => README.md} (100%)
 rename pysus/{utilities => api/dadosgov}/__init__.py (100%)
 create mode 100644 pysus/api/ducklake/README.md
 create mode 100644 pysus/api/ducklake/__init__.py
 create mode 100644 pysus/api/ducklake/catalog/models.py
 create mode 100644 pysus/api/ducklake/client.py
 create mode 100644 pysus/api/ftp/README.md
 create mode 100644 pysus/api/ftp/__init__.py
 rename pysus/{ftp/__init__.py => api/ftp/client.py} (95%)
 rename pysus/{ => api}/ftp/databases/__init__.py (100%)
 rename pysus/{ => api}/ftp/databases/ciha.py (91%)
 rename pysus/{ => api}/ftp/databases/cnes.py (92%)
 rename pysus/{ => api}/ftp/databases/ibge_datasus.py (92%)
 rename pysus/{ => api}/ftp/databases/pni.py (87%)
 rename pysus/{ => api}/ftp/databases/sia.py (92%)
 rename pysus/{ => api}/ftp/databases/sih.py (91%)
 rename pysus/{ => api}/ftp/databases/sim.py (94%)
 rename pysus/{ => api}/ftp/databases/sinan.py (94%)
 rename pysus/{ => api}/ftp/databases/sinasc.py (95%)
 rename pysus/{ => data}/metadata/SINAN/ANIM.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/BOTU.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/CHAG.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/CHIK.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/COLE.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/COQU.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/DENG.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/DIFT.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/ESQU.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/FAMA.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/FMAC.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/FTIF.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/HANS.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/HANT.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/HEPA.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/IEXO.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/LEIV.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/LEPT.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/LTAN.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/MALA.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/MENI.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/PEST.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/RAIV.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/SIFC.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/SIFG.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/TETA.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/TETN.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/TUBE.tar.gz (100%)
 rename pysus/{ => data}/metadata/SINAN/typecast.py (100%)
 create mode 100644 pysus/data/metadata/__init__.py
 rename pysus/{ => data}/preprocessing/SIM.py (100%)
 rename pysus/{ => data}/preprocessing/__init__.py (100%)
 rename pysus/{online_data => data/remote}/CIHA.py (90%)
 rename pysus/{online_data => data/remote}/CNES.py (96%)
 rename pysus/{online_data => data/remote}/IBGE.py (97%)
 rename pysus/{online_data => data/remote}/Infodengue.py (100%)
 rename pysus/{online_data => data/remote}/PNI.py (93%)
 rename pysus/{online_data => data/remote}/SIA.py (96%)
 rename pysus/{online_data => data/remote}/SIH.py (94%)
 rename pysus/{online_data => data/remote}/SIM.py (91%)
 rename pysus/{online_data => data/remote}/SINAN.py (89%)
 rename pysus/{online_data => data/remote}/SINASC.py (92%)
 rename pysus/{online_data => data/remote}/__init__.py (100%)
 rename pysus/{online_data => data/remote}/territory.py (92%)
 rename pysus/{online_data => data/remote}/vaccine.py (98%)
 delete mode 100644 pysus/ftp/utils.py
 delete mode 100644 pysus/online_data/Infogripe.py
 delete mode 100644 pysus/preprocessing/sinan.py
 create mode 100644 pysus/utils/__init__.py
 rename pysus/{utilities => utils}/brasil.py (70%)
 rename pysus/{preprocessing => utils}/decoders.py (94%)
 rename pysus/{dataset => utils}/geocode_by_cities.json (100%)
 rename pysus/{utilities => utils}/municipios.json (100%)

diff --git a/poetry.lock b/poetry.lock
index daacd197..3454fabd 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -974,6 +974,60 @@ files = [
     {file = "docutils-0.18.1.tar.gz", hash = "sha256:679987caf361a7539d76e584cbeddc311e3aee937877c87346f31debc63e9d06"},
 ]
 
+[[package]]
+name = "duckdb"
+version = "1.4.4"
+description = "DuckDB in-process database"
+optional = false
+python-versions = ">=3.9.0"
+groups = ["main"]
+files = [
+    {file = "duckdb-1.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e870a441cb1c41d556205deb665749f26347ed13b3a247b53714f5d589596977"},
+    {file = "duckdb-1.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:49123b579e4a6323e65139210cd72dddc593a72d840211556b60f9703bda8526"},
+    {file = "duckdb-1.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5e1933fac5293fea5926b0ee75a55b8cfe7f516d867310a5b251831ab61fe62b"},
+    {file = "duckdb-1.4.4-cp310-cp310-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:707530f6637e91dc4b8125260595299ec9dd157c09f5d16c4186c5988bfbd09a"},
+    {file = "duckdb-1.4.4-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:453b115f4777467f35103d8081770ac2f223fb5799178db5b06186e3ab51d1f2"},
+    {file = "duckdb-1.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:a3c8542db7ffb128aceb7f3b35502ebaddcd4f73f1227569306cc34bad06680c"},
+    {file = "duckdb-1.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5ba684f498d4e924c7e8f30dd157da8da34c8479746c5011b6c0e037e9c60ad2"},
+    {file = "duckdb-1.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5536eb952a8aa6ae56469362e344d4e6403cc945a80bc8c5c2ebdd85d85eb64b"},
+    {file = "duckdb-1.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:47dd4162da6a2be59a0aef640eb08d6360df1cf83c317dcc127836daaf3b7f7c"},
+    {file = "duckdb-1.4.4-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6cb357cfa3403910e79e2eb46c8e445bb1ee2fd62e9e9588c6b999df4256abc1"},
+    {file = "duckdb-1.4.4-cp311-cp311-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4c25d5b0febda02b7944e94fdae95aecf952797afc8cb920f677b46a7c251955"},
+    {file = "duckdb-1.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:6703dd1bb650025b3771552333d305d62ddd7ff182de121483d4e042ea6e2e00"},
+    {file = "duckdb-1.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:bf138201f56e5d6fc276a25138341b3523e2f84733613fc43f02c54465619a95"},
+    {file = "duckdb-1.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ddcfd9c6ff234da603a1edd5fd8ae6107f4d042f74951b65f91bc5e2643856b3"},
+    {file = "duckdb-1.4.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6792ca647216bd5c4ff16396e4591cfa9b4a72e5ad7cdd312cec6d67e8431a7c"},
+    {file = "duckdb-1.4.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1f8d55843cc940e36261689054f7dfb6ce35b1f5b0953b0d355b6adb654b0d52"},
+    {file = "duckdb-1.4.4-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c65d15c440c31e06baaebfd2c06d71ce877e132779d309f1edf0a85d23c07e92"},
+    {file = "duckdb-1.4.4-cp312-cp312-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b297eff642503fd435a9de5a9cb7db4eccb6f61d61a55b30d2636023f149855f"},
+    {file = "duckdb-1.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:d525de5f282b03aa8be6db86b1abffdceae5f1055113a03d5b50cd2fb8cf2ef8"},
+    {file = "duckdb-1.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:50f2eb173c573811b44aba51176da7a4e5c487113982be6a6a1c37337ec5fa57"},
+    {file = "duckdb-1.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:337f8b24e89bc2e12dadcfe87b4eb1c00fd920f68ab07bc9b70960d6523b8bc3"},
+    {file = "duckdb-1.4.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0509b39ea7af8cff0198a99d206dca753c62844adab54e545984c2e2c1381616"},
+    {file = "duckdb-1.4.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:fb94de6d023de9d79b7edc1ae07ee1d0b4f5fa8a9dcec799650b5befdf7aafec"},
+    {file = "duckdb-1.4.4-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0d636ceda422e7babd5e2f7275f6a0d1a3405e6a01873f00d38b72118d30c10b"},
+    {file = "duckdb-1.4.4-cp313-cp313-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7df7351328ffb812a4a289732f500d621e7de9942a3a2c9b6d4afcf4c0e72526"},
+    {file = "duckdb-1.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:6fb1225a9ea5877421481d59a6c556a9532c32c16c7ae6ca8d127e2b878c9389"},
+    {file = "duckdb-1.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:f28a18cc790217e5b347bb91b2cab27aafc557c58d3d8382e04b4fe55d0c3f66"},
+    {file = "duckdb-1.4.4-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:25874f8b1355e96178079e37312c3ba6d61a2354f51319dae860cf21335c3a20"},
+    {file = "duckdb-1.4.4-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:452c5b5d6c349dc5d1154eb2062ee547296fcbd0c20e9df1ed00b5e1809089da"},
+    {file = "duckdb-1.4.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8e5c2d8a0452df55e092959c0bfc8ab8897ac3ea0f754cb3b0ab3e165cd79aff"},
+    {file = "duckdb-1.4.4-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1af6e76fe8bd24875dc56dd8e38300d64dc708cd2e772f67b9fbc635cc3066a3"},
+    {file = "duckdb-1.4.4-cp314-cp314-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d0440f59e0cd9936a9ebfcf7a13312eda480c79214ffed3878d75947fc3b7d6d"},
+    {file = "duckdb-1.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:59c8d76016dde854beab844935b1ec31de358d4053e792988108e995b18c08e7"},
+    {file = "duckdb-1.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:53cd6423136ab44383ec9955aefe7599b3fb3dd1fe006161e6396d8167e0e0d4"},
+    {file = "duckdb-1.4.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8097201bc5fd0779d7fcc2f3f4736c349197235f4cb7171622936343a1aa8dbf"},
+    {file = "duckdb-1.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cd1be3d48577f5b40eb9706c6b2ae10edfe18e78eb28e31a3b922dcff1183597"},
+    {file = "duckdb-1.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e041f2fbd6888da090eca96ac167a7eb62d02f778385dd9155ed859f1c6b6dc8"},
+    {file = "duckdb-1.4.4-cp39-cp39-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7eec0bf271ac622e57b7f6554a27a6e7d1dd2f43d1871f7962c74bcbbede15ba"},
+    {file = "duckdb-1.4.4-cp39-cp39-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5cdc4126ec925edf3112bc656ac9ed23745294b854935fa7a643a216e4455af6"},
+    {file = "duckdb-1.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:c9566a4ed834ec7999db5849f53da0a7ee83d86830c33f471bf0211a1148ca12"},
+    {file = "duckdb-1.4.4.tar.gz", hash = "sha256:8bba52fd2acb67668a4615ee17ee51814124223de836d9e2fdcbc4c9021b3d3c"},
+]
+
+[package.extras]
+all = ["adbc-driver-manager", "fsspec", "ipython", "numpy", "pandas", "pyarrow"]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.2.2"
@@ -4541,4 +4595,4 @@ preprocessing = []
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<3.14"
-content-hash = "5a9f9bf4dbb0bcce1c501595176b4f03faed1ee0a5c7c9581d366606e7cddb1c"
+content-hash = "4b551ecb1dddda94c2ea6579463188e6aa0ab5da486b63dd8bed941ce9a4d7db"
diff --git a/pyproject.toml b/pyproject.toml
index 28f231d9..781738c0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,6 +29,7 @@ aioftp = "^0.21.4"
 humanize = "^4.8.0"
 typing-extensions = ">=4.10.0"
 pydantic = "^2.12.5"
+duckdb = "^1.4.4"
 
 [tool.poetry.group.dev.dependencies]
 pytest = ">=6.1.0"
diff --git a/pysus/__init__.py b/pysus/__init__.py
index 19a54a36..1cfef31f 100644
--- a/pysus/__init__.py
+++ b/pysus/__init__.py
@@ -3,8 +3,7 @@
 
 from importlib import metadata as importlib_metadata
 
-from pysus.ftp.databases import *  # noqa
-from pysus.ftp.databases import AVAILABLE_DATABASES
+from pysus.api.ftp.databases import *  # noqa
 
 
 def get_version() -> str:
diff --git a/pysus/ftp/README.md b/pysus/api/README.md
similarity index 100%
rename from pysus/ftp/README.md
rename to pysus/api/README.md
diff --git a/pysus/api/dadosgov/schemas.py b/pysus/api/dadosgov/README.md
similarity index 100%
rename from pysus/api/dadosgov/schemas.py
rename to pysus/api/dadosgov/README.md
diff --git a/pysus/utilities/__init__.py b/pysus/api/dadosgov/__init__.py
similarity index 100%
rename from pysus/utilities/__init__.py
rename to pysus/api/dadosgov/__init__.py
diff --git a/pysus/api/ducklake/README.md b/pysus/api/ducklake/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/pysus/api/ducklake/__init__.py b/pysus/api/ducklake/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pysus/api/ducklake/catalog/models.py b/pysus/api/ducklake/catalog/models.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pysus/api/ducklake/client.py b/pysus/api/ducklake/client.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pysus/api/ftp/README.md b/pysus/api/ftp/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/pysus/api/ftp/__init__.py b/pysus/api/ftp/__init__.py
new file mode 100644
index 00000000..65944e50
--- /dev/null
+++ b/pysus/api/ftp/__init__.py
@@ -0,0 +1,3 @@
+from .client import *  # noqa
+from .databases import *  # noqa
+
diff --git a/pysus/ftp/__init__.py b/pysus/api/ftp/client.py
similarity index 95%
rename from pysus/ftp/__init__.py
rename to pysus/api/ftp/client.py
index 93d0dd01..453ed3f4 100644
--- a/pysus/ftp/__init__.py
+++ b/pysus/api/ftp/client.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+__all__ = ["File", "Directory", "Database", "CACHEPATH"]
+
 import asyncio
 import os
 import pathlib
@@ -14,7 +16,6 @@
     Protocol,
     Tuple,
     TypedDict,
-    TypeVar,
     Union,
     runtime_checkable,
 )
@@ -23,13 +24,13 @@
 from aioftp import Client
 from loguru import logger
 from pysus.data.local import Data
+from pysus.utils import to_list
 from tqdm import tqdm
 from typing_extensions import Self
 
 # Type aliases
 PathLike = Union[str, pathlib.Path]
 FileContent = Dict[str, Union["Directory", "File"]]
-T = TypeVar("T")
 
 # Constants
 CACHEPATH: Final[str] = os.getenv(
@@ -39,13 +40,6 @@
 __cachepath__.mkdir(exist_ok=True)
 
 
-def to_list(item: Union[T, List[T], Tuple[T, ...], None]) -> List[T]:
-    """Parse any builtin data type into a list"""
-    if item is None:
-        return []
-    return [item] if not isinstance(item, (list, tuple)) else list(item)
-
-
 # Cache storage
 DIRECTORY_CACHE: Dict[str, "Directory"] = {}
 
@@ -399,17 +393,13 @@ def load_directory_content(path: str) -> FileContent:
         def line_parser(line: str):
             if "<DIR>" in line:
                 date, time, _, name = line.strip().split(maxsplit=3)
-                modify = datetime.strptime(
-                    f"{date} {time}", "%m-%d-%y %I:%M%p"
-                )
+                modify = datetime.strptime(f"{date} {time}", "%m-%d-%y %I:%M%p")
                 info = {"size": 0, "type": "dir", "modify": modify}
                 xpath = f"{path}/{name}"
                 content[name] = Directory(xpath)
             else:
                 date, time, size, name = line.strip().split(maxsplit=3)
-                modify = datetime.strptime(
-                    f"{date} {time}", "%m-%d-%y %I:%M%p"
-                )
+                modify = datetime.strptime(f"{date} {time}", "%m-%d-%y %I:%M%p")
                 info: FileInfo = {
                     "size": size,
                     "type": "file",
@@ -471,7 +461,7 @@ def __init__(self) -> None:
         self.__content__ = {}
 
     def __repr__(self) -> str:
-        return f'{self.name} - {self.metadata["long_name"]}'
+        return f"{self.name} - {self.metadata['long_name']}"
 
     @property
     def content(self) -> List[Union[Directory, File]]:
@@ -482,8 +472,7 @@ def content(self) -> List[Union[Directory, File]]:
         """
         if not self.__content__:
             logger.info(
-                "content is not loaded, use `load()` to load default paths"
-            )
+                "content is not loaded, use `load()` to load default paths")
             return []
         return sorted(list(self.__content__.values()), key=str)
 
@@ -548,9 +537,7 @@ def get_files(self, *args, **kwargs) -> list[File]:
         """
         ...
 
-    def download(
-        self, files: List[File], local_dir: str = CACHEPATH
-    ) -> List[str]:
+    def download(self, files: List[File], local_dir: str = CACHEPATH) -> List[str]:
         """
         Downloads a list of Files.
         """
@@ -565,9 +552,7 @@ def download(
             return dfiles[0]
         return dfiles
 
-    async def async_download(
-        self, files: List[File], local_dir: str = CACHEPATH
-    ):
+    async def async_download(self, files: List[File], local_dir: str = CACHEPATH):
         """
         Asynchronously downloads a list of files
         """
diff --git a/pysus/ftp/databases/__init__.py b/pysus/api/ftp/databases/__init__.py
similarity index 100%
rename from pysus/ftp/databases/__init__.py
rename to pysus/api/ftp/databases/__init__.py
diff --git a/pysus/ftp/databases/ciha.py b/pysus/api/ftp/databases/ciha.py
similarity index 91%
rename from pysus/ftp/databases/ciha.py
rename to pysus/api/ftp/databases/ciha.py
index 5c8c43c4..b84d18ab 100644
--- a/pysus/ftp/databases/ciha.py
+++ b/pysus/api/ftp/databases/ciha.py
@@ -2,8 +2,8 @@
 
 from typing import List, Optional, Union
 
-from pysus.ftp import Database, Directory, File
-from pysus.ftp.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year
+from pysus.api.ftp import Database, Directory, File
+from pysus.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year
 
 
 class CIHA(Database):
@@ -74,17 +74,16 @@ def get_files(
         group: Union[List[str], str] = "CIHA",
     ) -> List[File]:
         files = list(
-            filter(
-                lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files
-            )
+            filter(lambda f: f.extension.upper()
+                   in [".DBC", ".DBF"], self.files)
         )
 
         groups = [gr.upper() for gr in to_list(group)]
 
         if not all(gr in list(self.groups) for gr in groups):
             raise ValueError(
-                "Unknown CIHA Group(s): "
-                f"{set(groups).difference(list(self.groups))}"
+                f"Unknown CIHA Group(s): {set(
+                    groups).difference(list(self.groups))}"
             )
 
         files = list(filter(lambda f: self.format(f)[0] in groups, files))
diff --git a/pysus/ftp/databases/cnes.py b/pysus/api/ftp/databases/cnes.py
similarity index 92%
rename from pysus/ftp/databases/cnes.py
rename to pysus/api/ftp/databases/cnes.py
index 1e070be7..61235fba 100644
--- a/pysus/ftp/databases/cnes.py
+++ b/pysus/api/ftp/databases/cnes.py
@@ -2,8 +2,8 @@
 
 from typing import List, Optional, Union
 
-from pysus.ftp import Database, Directory, File
-from pysus.ftp.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year
+from pysus.api.ftp import Database, Directory, File
+from pysus.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year
 
 
 class CNES(Database):
@@ -55,12 +55,10 @@ def load(
         if groups:
             groups = to_list(groups)
 
-            if not all(
-                group in self.groups for group in [gr.upper() for gr in groups]
-            ):
+            if not all(group in self.groups for group in [gr.upper() for gr in groups]):
                 raise ValueError(
-                    "Unknown CNES group(s): "
-                    f"{set(groups).difference(self.groups)}"
+                    f"Unknown CNES group(s): {set(
+                        groups).difference(self.groups)}"
                 )
 
             for group in groups:
diff --git a/pysus/ftp/databases/ibge_datasus.py b/pysus/api/ftp/databases/ibge_datasus.py
similarity index 92%
rename from pysus/ftp/databases/ibge_datasus.py
rename to pysus/api/ftp/databases/ibge_datasus.py
index d1547ae5..39fa6c02 100644
--- a/pysus/ftp/databases/ibge_datasus.py
+++ b/pysus/api/ftp/databases/ibge_datasus.py
@@ -2,8 +2,8 @@
 
 from typing import List, Literal, Optional, Union
 
-from pysus.ftp import Database, Directory, File
-from pysus.ftp.utils import zfill_year
+from pysus.api.ftp import Database, Directory, File
+from pysus.utils import zfill_year
 
 
 class IBGEDATASUS(Database):
@@ -73,9 +73,7 @@ def get_files(
         if year:
             if isinstance(year, (str, int)):
                 files = [
-                    f
-                    for f in files
-                    if self.describe(f)["year"] == zfill_year(year)
+                    f for f in files if self.describe(f)["year"] == zfill_year(year)
                 ]
             elif isinstance(year, list):
                 files = [
diff --git a/pysus/ftp/databases/pni.py b/pysus/api/ftp/databases/pni.py
similarity index 87%
rename from pysus/ftp/databases/pni.py
rename to pysus/api/ftp/databases/pni.py
index 37cf8484..ef154287 100644
--- a/pysus/ftp/databases/pni.py
+++ b/pysus/api/ftp/databases/pni.py
@@ -2,17 +2,15 @@
 
 from typing import List, Literal, Optional, Union
 
-from pysus.ftp import Database, Directory, File
-from pysus.ftp.utils import UFs, parse_UFs, to_list, zfill_year
+from pysus.api.ftp import Database, Directory, File
+from pysus.utils import UFs, parse_UFs, to_list, zfill_year
 
 
 class PNI(Database):
     name = "PNI"
     paths = (Directory("/dissemin/publicos/PNI/DADOS"),)
     metadata = {
-        "long_name": (
-            "Sistema de Informações do Programa Nacional de Imunizações"
-        ),
+        "long_name": ("Sistema de Informações do Programa Nacional de Imunizações"),
         "source": (
             "https://datasus.saude.gov.br/acesso-a-informacao/morbidade-hospitalar-do-sus-sih-sus/",  # noqa
             "https://datasus.saude.gov.br/acesso-a-informacao/producao-hospitalar-sih-sus/",  # noqa
@@ -58,7 +56,6 @@ def describe(self, file: File) -> dict:
         return {}
 
     def format(self, file: File) -> tuple:
-
         if len(file.name) != 8:
             raise ValueError(f"Can't format {file.name}")
 
@@ -73,17 +70,16 @@ def get_files(
         year: Optional[Union[list, str, int]] = None,
     ) -> List[File]:
         files = list(
-            filter(
-                lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files
-            )
+            filter(lambda f: f.extension.upper()
+                   in [".DBC", ".DBF"], self.files)
         )
 
         groups = [gr.upper() for gr in to_list(group)]
 
         if not all(gr in list(self.groups) for gr in groups):
             raise ValueError(
-                "Unknown PNI Group(s): "
-                f"{set(groups).difference(list(self.groups))}"
+                f"Unknown PNI Group(s): {set(
+                    groups).difference(list(self.groups))}"
             )
 
         files = list(filter(lambda f: self.format(f)[0] in groups, files))
diff --git a/pysus/ftp/databases/sia.py b/pysus/api/ftp/databases/sia.py
similarity index 92%
rename from pysus/ftp/databases/sia.py
rename to pysus/api/ftp/databases/sia.py
index 76b5dd7b..3f28d809 100644
--- a/pysus/ftp/databases/sia.py
+++ b/pysus/api/ftp/databases/sia.py
@@ -2,8 +2,8 @@
 
 from typing import List, Optional, Union
 
-from pysus.ftp import Database, Directory, File
-from pysus.ftp.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year
+from pysus.api.ftp import Database, Directory, File
+from pysus.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year
 
 
 class SIA(Database):
@@ -93,17 +93,16 @@ def get_files(
         month: Optional[Union[list, str, int]] = None,
     ) -> List[File]:
         files = list(
-            filter(
-                lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files
-            )
+            filter(lambda f: f.extension.upper()
+                   in [".DBC", ".DBF"], self.files)
         )
 
         groups = [gr.upper() for gr in to_list(group)]
 
         if not all(gr in list(self.groups) for gr in groups):
             raise ValueError(
-                "Unknown SIA Group(s): "
-                f"{set(groups).difference(list(self.groups))}"
+                f"Unknown SIA Group(s): {set(
+                    groups).difference(list(self.groups))}"
             )
 
         files = list(filter(lambda f: self.format(f)[0] in groups, files))
diff --git a/pysus/ftp/databases/sih.py b/pysus/api/ftp/databases/sih.py
similarity index 91%
rename from pysus/ftp/databases/sih.py
rename to pysus/api/ftp/databases/sih.py
index 97757d8c..0c28400d 100644
--- a/pysus/ftp/databases/sih.py
+++ b/pysus/api/ftp/databases/sih.py
@@ -2,8 +2,8 @@
 
 from typing import List, Optional, Union
 
-from pysus.ftp import Database, Directory, File
-from pysus.ftp.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year
+from pysus.api.ftp import Database, Directory, File
+from pysus.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year
 
 
 class SIH(Database):
@@ -76,17 +76,16 @@ def get_files(
         month: Optional[Union[list, str, int]] = None,
     ) -> List[File]:
         files = list(
-            filter(
-                lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files
-            )
+            filter(lambda f: f.extension.upper()
+                   in [".DBC", ".DBF"], self.files)
         )
 
         groups = [gr.upper() for gr in to_list(group)]
 
         if not all(gr in list(self.groups) for gr in groups):
             raise ValueError(
-                f"Unknown SIH Group(s): "
-                f"{set(groups).difference(list(self.groups))}"
+                f"Unknown SIH Group(s): {set(
+                    groups).difference(list(self.groups))}"
             )
 
         files = list(filter(lambda f: self.format(f)[0] in groups, files))
diff --git a/pysus/ftp/databases/sim.py b/pysus/api/ftp/databases/sim.py
similarity index 94%
rename from pysus/ftp/databases/sim.py
rename to pysus/api/ftp/databases/sim.py
index 83134a49..0a85aa1f 100644
--- a/pysus/ftp/databases/sim.py
+++ b/pysus/api/ftp/databases/sim.py
@@ -2,8 +2,8 @@
 
 from typing import List, Optional, Union
 
-from pysus.ftp import Database, Directory, File
-from pysus.ftp.utils import UFs, parse_UFs, to_list, zfill_year
+from pysus.api.ftp import Database, Directory, File
+from pysus.utils import UFs, parse_UFs, to_list, zfill_year
 
 
 class SIM(Database):
diff --git a/pysus/ftp/databases/sinan.py b/pysus/api/ftp/databases/sinan.py
similarity index 94%
rename from pysus/ftp/databases/sinan.py
rename to pysus/api/ftp/databases/sinan.py
index ccc3ae80..f272d016 100644
--- a/pysus/ftp/databases/sinan.py
+++ b/pysus/api/ftp/databases/sinan.py
@@ -2,8 +2,8 @@
 
 from typing import List, Optional, Union
 
-from pysus.ftp import Database, Directory, File
-from pysus.ftp.utils import to_list, zfill_year
+from pysus.api.ftp import Database, Directory, File
+from pysus.utils import to_list, zfill_year
 
 
 class SINAN(Database):
@@ -122,9 +122,8 @@ def get_files(
         year: Optional[Union[str, int, list]] = None,
     ) -> List[File]:
         files = list(
-            filter(
-                lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files
-            )
+            filter(lambda f: f.extension.upper()
+                   in [".DBC", ".DBF"], self.files)
         )
 
         if dis_code:
@@ -132,8 +131,8 @@ def get_files(
 
             if codes and not all(code in self.diseases for code in codes):
                 raise ValueError(
-                    "Unknown disease(s): "
-                    f"{set(codes).difference(set(self.diseases))}"
+                    f"Unknown disease(s): {set(
+                        codes).difference(set(self.diseases))}"
                 )
 
             files = list(filter(lambda f: self.format(f)[0] in codes, files))
diff --git a/pysus/ftp/databases/sinasc.py b/pysus/api/ftp/databases/sinasc.py
similarity index 95%
rename from pysus/ftp/databases/sinasc.py
rename to pysus/api/ftp/databases/sinasc.py
index aaac7b63..f7e73c29 100644
--- a/pysus/ftp/databases/sinasc.py
+++ b/pysus/api/ftp/databases/sinasc.py
@@ -2,8 +2,8 @@
 
 from typing import List, Optional, Union
 
-from pysus.ftp import Database, Directory, File
-from pysus.ftp.utils import UFs, parse_UFs, to_list, zfill_year
+from pysus.api.ftp import Database, Directory, File
+from pysus.utils import UFs, parse_UFs, to_list, zfill_year
 
 
 class SINASC(Database):
diff --git a/pysus/data/local.py b/pysus/data/local.py
index 5ea7476e..c9346deb 100644
--- a/pysus/data/local.py
+++ b/pysus/data/local.py
@@ -8,10 +8,6 @@
 
 
 class ParquetSet:
-    """
-    A local parquet directory or file
-    """
-
     __path__: Union[PurePosixPath, PureWindowsPath]
     info: Dict
 
@@ -41,6 +37,9 @@ def __init__(self, path: str, _pbar=None) -> None:
     def __str__(self):
         return str(self.__path__)
 
+    def __fspath__(self):
+        return str(self)
+
     def __repr__(self):
         return str(self.__path__)
 
@@ -57,9 +56,8 @@ def to_dataframe(self) -> pd.DataFrame:
         parquets into a single dataframe
         """
         parquets = list(map(str, self.__path__.glob("*.parquet")))
-        chunks_list = [
-            pd.read_parquet(str(f), engine="fastparquet") for f in parquets
-        ]
+        chunks_list = [pd.read_parquet(
+            str(f), engine="fastparquet") for f in parquets]
         _df = pd.concat(chunks_list, ignore_index=True)
         return parse_dftypes(_df)
 
diff --git a/pysus/metadata/SINAN/ANIM.tar.gz b/pysus/data/metadata/SINAN/ANIM.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/ANIM.tar.gz
rename to pysus/data/metadata/SINAN/ANIM.tar.gz
diff --git a/pysus/metadata/SINAN/BOTU.tar.gz b/pysus/data/metadata/SINAN/BOTU.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/BOTU.tar.gz
rename to pysus/data/metadata/SINAN/BOTU.tar.gz
diff --git a/pysus/metadata/SINAN/CHAG.tar.gz b/pysus/data/metadata/SINAN/CHAG.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/CHAG.tar.gz
rename to pysus/data/metadata/SINAN/CHAG.tar.gz
diff --git a/pysus/metadata/SINAN/CHIK.tar.gz b/pysus/data/metadata/SINAN/CHIK.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/CHIK.tar.gz
rename to pysus/data/metadata/SINAN/CHIK.tar.gz
diff --git a/pysus/metadata/SINAN/COLE.tar.gz b/pysus/data/metadata/SINAN/COLE.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/COLE.tar.gz
rename to pysus/data/metadata/SINAN/COLE.tar.gz
diff --git a/pysus/metadata/SINAN/COQU.tar.gz b/pysus/data/metadata/SINAN/COQU.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/COQU.tar.gz
rename to pysus/data/metadata/SINAN/COQU.tar.gz
diff --git a/pysus/metadata/SINAN/DENG.tar.gz b/pysus/data/metadata/SINAN/DENG.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/DENG.tar.gz
rename to pysus/data/metadata/SINAN/DENG.tar.gz
diff --git a/pysus/metadata/SINAN/DIFT.tar.gz b/pysus/data/metadata/SINAN/DIFT.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/DIFT.tar.gz
rename to pysus/data/metadata/SINAN/DIFT.tar.gz
diff --git a/pysus/metadata/SINAN/ESQU.tar.gz b/pysus/data/metadata/SINAN/ESQU.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/ESQU.tar.gz
rename to pysus/data/metadata/SINAN/ESQU.tar.gz
diff --git a/pysus/metadata/SINAN/FAMA.tar.gz b/pysus/data/metadata/SINAN/FAMA.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/FAMA.tar.gz
rename to pysus/data/metadata/SINAN/FAMA.tar.gz
diff --git a/pysus/metadata/SINAN/FMAC.tar.gz b/pysus/data/metadata/SINAN/FMAC.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/FMAC.tar.gz
rename to pysus/data/metadata/SINAN/FMAC.tar.gz
diff --git a/pysus/metadata/SINAN/FTIF.tar.gz b/pysus/data/metadata/SINAN/FTIF.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/FTIF.tar.gz
rename to pysus/data/metadata/SINAN/FTIF.tar.gz
diff --git a/pysus/metadata/SINAN/HANS.tar.gz b/pysus/data/metadata/SINAN/HANS.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/HANS.tar.gz
rename to pysus/data/metadata/SINAN/HANS.tar.gz
diff --git a/pysus/metadata/SINAN/HANT.tar.gz b/pysus/data/metadata/SINAN/HANT.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/HANT.tar.gz
rename to pysus/data/metadata/SINAN/HANT.tar.gz
diff --git a/pysus/metadata/SINAN/HEPA.tar.gz b/pysus/data/metadata/SINAN/HEPA.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/HEPA.tar.gz
rename to pysus/data/metadata/SINAN/HEPA.tar.gz
diff --git a/pysus/metadata/SINAN/IEXO.tar.gz b/pysus/data/metadata/SINAN/IEXO.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/IEXO.tar.gz
rename to pysus/data/metadata/SINAN/IEXO.tar.gz
diff --git a/pysus/metadata/SINAN/LEIV.tar.gz b/pysus/data/metadata/SINAN/LEIV.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/LEIV.tar.gz
rename to pysus/data/metadata/SINAN/LEIV.tar.gz
diff --git a/pysus/metadata/SINAN/LEPT.tar.gz b/pysus/data/metadata/SINAN/LEPT.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/LEPT.tar.gz
rename to pysus/data/metadata/SINAN/LEPT.tar.gz
diff --git a/pysus/metadata/SINAN/LTAN.tar.gz b/pysus/data/metadata/SINAN/LTAN.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/LTAN.tar.gz
rename to pysus/data/metadata/SINAN/LTAN.tar.gz
diff --git a/pysus/metadata/SINAN/MALA.tar.gz b/pysus/data/metadata/SINAN/MALA.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/MALA.tar.gz
rename to pysus/data/metadata/SINAN/MALA.tar.gz
diff --git a/pysus/metadata/SINAN/MENI.tar.gz b/pysus/data/metadata/SINAN/MENI.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/MENI.tar.gz
rename to pysus/data/metadata/SINAN/MENI.tar.gz
diff --git a/pysus/metadata/SINAN/PEST.tar.gz b/pysus/data/metadata/SINAN/PEST.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/PEST.tar.gz
rename to pysus/data/metadata/SINAN/PEST.tar.gz
diff --git a/pysus/metadata/SINAN/RAIV.tar.gz b/pysus/data/metadata/SINAN/RAIV.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/RAIV.tar.gz
rename to pysus/data/metadata/SINAN/RAIV.tar.gz
diff --git a/pysus/metadata/SINAN/SIFC.tar.gz b/pysus/data/metadata/SINAN/SIFC.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/SIFC.tar.gz
rename to pysus/data/metadata/SINAN/SIFC.tar.gz
diff --git a/pysus/metadata/SINAN/SIFG.tar.gz b/pysus/data/metadata/SINAN/SIFG.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/SIFG.tar.gz
rename to pysus/data/metadata/SINAN/SIFG.tar.gz
diff --git a/pysus/metadata/SINAN/TETA.tar.gz b/pysus/data/metadata/SINAN/TETA.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/TETA.tar.gz
rename to pysus/data/metadata/SINAN/TETA.tar.gz
diff --git a/pysus/metadata/SINAN/TETN.tar.gz b/pysus/data/metadata/SINAN/TETN.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/TETN.tar.gz
rename to pysus/data/metadata/SINAN/TETN.tar.gz
diff --git a/pysus/metadata/SINAN/TUBE.tar.gz b/pysus/data/metadata/SINAN/TUBE.tar.gz
similarity index 100%
rename from pysus/metadata/SINAN/TUBE.tar.gz
rename to pysus/data/metadata/SINAN/TUBE.tar.gz
diff --git a/pysus/metadata/SINAN/typecast.py b/pysus/data/metadata/SINAN/typecast.py
similarity index 100%
rename from pysus/metadata/SINAN/typecast.py
rename to pysus/data/metadata/SINAN/typecast.py
diff --git a/pysus/data/metadata/__init__.py b/pysus/data/metadata/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pysus/preprocessing/SIM.py b/pysus/data/preprocessing/SIM.py
similarity index 100%
rename from pysus/preprocessing/SIM.py
rename to pysus/data/preprocessing/SIM.py
diff --git a/pysus/preprocessing/__init__.py b/pysus/data/preprocessing/__init__.py
similarity index 100%
rename from pysus/preprocessing/__init__.py
rename to pysus/data/preprocessing/__init__.py
diff --git a/pysus/online_data/CIHA.py b/pysus/data/remote/CIHA.py
similarity index 90%
rename from pysus/online_data/CIHA.py
rename to pysus/data/remote/CIHA.py
index 9be4ecc0..475aec7e 100644
--- a/pysus/online_data/CIHA.py
+++ b/pysus/data/remote/CIHA.py
@@ -6,19 +6,19 @@
 by fccoelho
 license: GPL V3 or Later
 """
+
 from typing import Union
 
 from loguru import logger
-from pysus.ftp import CACHEPATH
-from pysus.ftp.databases.ciha import CIHA
-from pysus.ftp.utils import parse_UFs
+from pysus.api.ftp import CACHEPATH, CIHA
+from pysus.utils.brasil import parse_UFs
 
 ciha = CIHA().load()
 
 
 def get_available_years(
     states: Union[list, str] = None,
-) -> dict[str : set[int]]:
+) -> dict[str: set[int]]:
     """
     Fetch available years for the `states`.
     :param states: UF code. E.g: "SP" or ["SP", "RJ"]
diff --git a/pysus/online_data/CNES.py b/pysus/data/remote/CNES.py
similarity index 96%
rename from pysus/online_data/CNES.py
rename to pysus/data/remote/CNES.py
index a3b1188e..1881b558 100644
--- a/pysus/online_data/CNES.py
+++ b/pysus/data/remote/CNES.py
@@ -1,9 +1,8 @@
 from typing import Union
 
 from loguru import logger
-from pysus.ftp import CACHEPATH
-from pysus.ftp.databases.cnes import CNES
-from pysus.ftp.utils import parse_UFs
+from pysus.api.ftp import CACHEPATH, CNES
+from pysus.utils.brasil import parse_UFs
 
 cnes = CNES().load()
 
diff --git a/pysus/online_data/IBGE.py b/pysus/data/remote/IBGE.py
similarity index 97%
rename from pysus/online_data/IBGE.py
rename to pysus/data/remote/IBGE.py
index 33fba909..5646a3a9 100644
--- a/pysus/online_data/IBGE.py
+++ b/pysus/data/remote/IBGE.py
@@ -13,7 +13,7 @@
 import requests
 import urllib3
 from pysus.data.local import ParquetSet
-from pysus.ftp.databases.ibge_datasus import IBGEDATASUS
+from pysus.api.ftp import IBGEDATASUS
 
 # requests.packages.urllib3.util.ssl_.DEFAULT_CIPHERS = 'ALL:@SECLEVEL=1'
 
@@ -296,12 +296,10 @@ class FetchData:
             resultados vêm a partir do segundo elemento.
     """
 
-    def __init__(
-        self, agregado: int, periodos: str, variavel: str = "allxp", **kwargs
-    ):
+    def __init__(self, agregado: int, periodos: str, variavel: str = "allxp", **kwargs):
         self.url = (
-            APIBASE
-            + f"agregados/{agregado}/periodos/{periodos}/variaveis/{variavel}?"
+            APIBASE +
+            f"agregados/{agregado}/periodos/{periodos}/variaveis/{variavel}?"
         )
         self.url += "&".join([f"{k}={v}" for k, v in kwargs.items()])
         self.JSON = None
@@ -390,8 +388,7 @@ def get_population(
         opts = ["ALF", "ESCA", "ESCB", "IDOSO", "RENDA"]
         if not censo_data or censo_data not in opts:
             raise ValueError(
-                f"Incorrect 'censo_data' parameter. Options: {opts}"
-            )
+                f"Incorrect 'censo_data' parameter. Options: {opts}")
         file = [f for f in files if censo_data in f.name][0].download()
     else:
         file = files[0].download()
@@ -415,8 +412,6 @@ def _unzip_to_dataframe(file: str) -> pd.DataFrame:
                 return pd.read_csv(zip_file.extract(file, tempdir))
 
             if file.lower().endswith((".dbf", ".dbc")):
-                return ParquetSet(
-                    zip_file.extract(file, tempdir)
-                ).to_dataframe()
+                return ParquetSet(zip_file.extract(file, tempdir)).to_dataframe()
 
         raise ValueError(f"No data found in {zip_file}")
diff --git a/pysus/online_data/Infodengue.py b/pysus/data/remote/Infodengue.py
similarity index 100%
rename from pysus/online_data/Infodengue.py
rename to pysus/data/remote/Infodengue.py
diff --git a/pysus/online_data/PNI.py b/pysus/data/remote/PNI.py
similarity index 93%
rename from pysus/online_data/PNI.py
rename to pysus/data/remote/PNI.py
index 2df41c1c..b9f60b80 100644
--- a/pysus/online_data/PNI.py
+++ b/pysus/data/remote/PNI.py
@@ -1,12 +1,12 @@
 """
 Download data from the national immunization program
 """
+
 from typing import Literal, Union
 
 from loguru import logger
-from pysus.ftp import CACHEPATH
-from pysus.ftp.databases.pni import PNI
-from pysus.ftp.utils import parse_UFs
+from pysus.api.ftp import CACHEPATH, PNI
+from pysus.utils.brasil import parse_UFs
 
 pni = PNI().load()
 
diff --git a/pysus/online_data/SIA.py b/pysus/data/remote/SIA.py
similarity index 96%
rename from pysus/online_data/SIA.py
rename to pysus/data/remote/SIA.py
index 19ff22a4..6b3b8316 100644
--- a/pysus/online_data/SIA.py
+++ b/pysus/data/remote/SIA.py
@@ -6,13 +6,13 @@
 by bcbernardo
 license: GPL V3 or Later
 """
+
 from pprint import pprint
 from typing import Dict, Tuple, Union
 
 from loguru import logger
-from pysus.ftp import CACHEPATH
-from pysus.ftp.databases.sia import SIA
-from pysus.ftp.utils import parse_UFs
+from pysus.api.ftp import CACHEPATH, SIA
+from pysus.utils.brasil import parse_UFs
 
 sia = SIA().load()
 
diff --git a/pysus/online_data/SIH.py b/pysus/data/remote/SIH.py
similarity index 94%
rename from pysus/online_data/SIH.py
rename to pysus/data/remote/SIH.py
index 67749f51..523833b9 100644
--- a/pysus/online_data/SIH.py
+++ b/pysus/data/remote/SIH.py
@@ -4,12 +4,12 @@
 by fccoelho
 license: GPL V3 or Later
 """
+
 from typing import Union
 
 from loguru import logger
-from pysus.ftp import CACHEPATH
-from pysus.ftp.databases.sih import SIH
-from pysus.ftp.utils import parse_UFs
+from pysus.api.ftp import CACHEPATH, SIH
+from pysus.utils.brasil import parse_UFs
 
 sih = SIH().load()
 
diff --git a/pysus/online_data/SIM.py b/pysus/data/remote/SIM.py
similarity index 91%
rename from pysus/online_data/SIM.py
rename to pysus/data/remote/SIM.py
index c021111b..79908e76 100644
--- a/pysus/online_data/SIM.py
+++ b/pysus/data/remote/SIM.py
@@ -4,6 +4,7 @@
 by fccoelho
 license: GPL V3 or Later
 """
+
 import os
 from ftplib import FTP, error_perm
 from typing import Union
@@ -11,9 +12,8 @@
 import pandas as pd
 from dbfread import DBF
 from loguru import logger
-from pysus.ftp import CACHEPATH
-from pysus.ftp.databases.sim import SIM
-from pysus.ftp.utils import parse_UFs
+from pysus.api.ftp import CACHEPATH, SIM
+from pysus.utils.brasil import parse_UFs
 
 sim = SIM().load()
 
@@ -68,17 +68,14 @@ def get_CID10_chapters_table(cache=True):
     ftp = FTP("ftp.datasus.gov.br")
     ftp.login()
     logger.debug(
-        f"Stablishing connection with ftp.datasus.gov.br.\n{ftp.welcome}"
-    )
+        f"Stablishing connection with ftp.datasus.gov.br.\n{ftp.welcome}")
     ftp.cwd("/dissemin/publicos/SIM/CID10/TABELAS")
     logger.debug(
-        "Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS"
-    )
+        "Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS")
 
     fname = "CIDCAP10.DBF"
     cachefile = os.path.join(
-        CACHEPATH, "SIM_" + fname.split(".")[0] + "_.parquet"
-    )
+        CACHEPATH, "SIM_" + fname.split(".")[0] + "_.parquet")
 
     if os.path.exists(cachefile):
         logger.info(f"Local parquet file found at {cachefile}")
@@ -114,17 +111,14 @@ def get_CID10_table(cache=True):
     ftp = FTP("ftp.datasus.gov.br")
     ftp.login()
     logger.debug(
-        f"Stablishing connection with ftp.datasus.gov.br.\n{ftp.welcome}"
-    )
+        f"Stablishing connection with ftp.datasus.gov.br.\n{ftp.welcome}")
     ftp.cwd("/dissemin/publicos/SIM/CID10/TABELAS")
     logger.debug(
-        "Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS"
-    )
+        "Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS")
 
     fname = "CID10.DBF"
     cachefile = os.path.join(
-        CACHEPATH, "SIM_" + fname.split(".")[0] + "_.parquet"
-    )
+        CACHEPATH, "SIM_" + fname.split(".")[0] + "_.parquet")
 
     if os.path.exists(cachefile):
         logger.info(f"Local parquet file found at {cachefile}")
@@ -160,17 +154,14 @@ def get_CID9_table(cache=True):
     ftp = FTP("ftp.datasus.gov.br")
     ftp.login()
     logger.debug(
-        f"Stablishing connection with ftp.datasus.gov.br.\n{ftp.welcome}"
-    )
+        f"Stablishing connection with ftp.datasus.gov.br.\n{ftp.welcome}")
     ftp.cwd("/dissemin/publicos/SIM/CID9/TABELAS")
     logger.debug(
-        "Changing FTP work dir to: /dissemin/publicos/SIM/CID9/TABELAS"
-    )
+        "Changing FTP work dir to: /dissemin/publicos/SIM/CID9/TABELAS")
 
     fname = "CID9.DBF"
     cachefile = os.path.join(
-        CACHEPATH, "SIM_" + fname.split(".")[0] + "_.parquet"
-    )
+        CACHEPATH, "SIM_" + fname.split(".")[0] + "_.parquet")
 
     if os.path.exists(cachefile):
         logger.info(f"Local parquet file found at {cachefile}")
@@ -206,17 +197,14 @@ def get_municipios(cache=True):
     ftp = FTP("ftp.datasus.gov.br")
     ftp.login()
     logger.debug(
-        f"Stablishing connection with ftp.datasus.gov.br.\n{ftp.welcome}"
-    )
+        f"Stablishing connection with ftp.datasus.gov.br.\n{ftp.welcome}")
     ftp.cwd("/dissemin/publicos/SIM/CID10/TABELAS")
     logger.debug(
-        "Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS"
-    )
+        "Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS")
 
     fname = "CADMUN.DBF"
     cachefile = os.path.join(
-        CACHEPATH, "SIM_" + fname.split(".")[0] + "_.parquet"
-    )
+        CACHEPATH, "SIM_" + fname.split(".")[0] + "_.parquet")
 
     if os.path.exists(cachefile):
         logger.info(f"Local parquet file found at {cachefile}")
@@ -252,16 +240,13 @@ def get_ocupations(cache=True):
     ftp = FTP("ftp.datasus.gov.br")
     ftp.login()
     logger.debug(
-        f"Stablishing connection with ftp.datasus.gov.br.\n{ftp.welcome}"
-    )
+        f"Stablishing connection with ftp.datasus.gov.br.\n{ftp.welcome}")
     ftp.cwd("/dissemin/publicos/SIM/CID10/TABELAS")
     logger.debug(
-        "Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS"
-    )
+        "Changing FTP work dir to: /dissemin/publicos/SIM/CID10/TABELAS")
     fname = "TABOCUP.DBF"
     cachefile = os.path.join(
-        CACHEPATH, "SIM_" + fname.split(".")[0] + "_.parquet"
-    )
+        CACHEPATH, "SIM_" + fname.split(".")[0] + "_.parquet")
 
     if os.path.exists(cachefile):
         logger.info(f"Local parquet file found at {cachefile}")
diff --git a/pysus/online_data/SINAN.py b/pysus/data/remote/SINAN.py
similarity index 89%
rename from pysus/online_data/SINAN.py
rename to pysus/data/remote/SINAN.py
index fe5692db..abef5277 100644
--- a/pysus/online_data/SINAN.py
+++ b/pysus/data/remote/SINAN.py
@@ -2,8 +2,7 @@
 from typing import Union
 
 import pandas as pd
-from pysus.ftp import CACHEPATH
-from pysus.ftp.databases.sinan import SINAN
+from pysus.api.ftp import CACHEPATH, SINAN
 
 sinan = SINAN().load()
 
@@ -43,10 +42,8 @@ def download(
 
 def metadata_df(disease_code: str) -> pd.DataFrame:
     metadata_file = (
-        Path(__file__).parent.parent
-        / "metadata"
-        / "SINAN"
-        / f"{disease_code}.tar.gz"
+        Path(__file__).parent.parent / "metadata" /
+        "SINAN" / f"{disease_code}.tar.gz"
     )
     if metadata_file.exists():
         df = pd.read_csv(
diff --git a/pysus/online_data/SINASC.py b/pysus/data/remote/SINASC.py
similarity index 92%
rename from pysus/online_data/SINASC.py
rename to pysus/data/remote/SINASC.py
index 2469d88a..5307475a 100644
--- a/pysus/online_data/SINASC.py
+++ b/pysus/data/remote/SINASC.py
@@ -4,12 +4,12 @@
 by fccoelho
 license: GPL V3 or Later
 """
+
 from typing import Union
 
 from loguru import logger
-from pysus.ftp import CACHEPATH
-from pysus.ftp.databases.sinasc import SINASC
-from pysus.ftp.utils import parse_UFs
+from pysus.api.ftp import CACHEPATH, SINASC
+from pysus.utils.brasil import parse_UFs
 
 sinasc = SINASC().load()
 
diff --git a/pysus/online_data/__init__.py b/pysus/data/remote/__init__.py
similarity index 100%
rename from pysus/online_data/__init__.py
rename to pysus/data/remote/__init__.py
diff --git a/pysus/online_data/territory.py b/pysus/data/remote/territory.py
similarity index 92%
rename from pysus/online_data/territory.py
rename to pysus/data/remote/territory.py
index 404a5ad2..7ee6306d 100644
--- a/pysus/online_data/territory.py
+++ b/pysus/data/remote/territory.py
@@ -1,6 +1,6 @@
 from typing import List, Union
 
-from pysus.ftp import CACHEPATH, Directory, File
+from pysus.api.ftp import CACHEPATH, Directory, File
 
 
 def list_tables() -> List[File]:
diff --git a/pysus/online_data/vaccine.py b/pysus/data/remote/vaccine.py
similarity index 98%
rename from pysus/online_data/vaccine.py
rename to pysus/data/remote/vaccine.py
index 77399f7c..4f874334 100644
--- a/pysus/online_data/vaccine.py
+++ b/pysus/data/remote/vaccine.py
@@ -5,6 +5,7 @@
 
 - COVID-19 in 2020-2021 Downloaded as described [here](http://opendatasus.saude.gov.br/dataset/b772ee55-07cd-44d8-958f-b12edd004e0b/resource/5916b3a4-81e7-4ad5-adb6-b884ff198dc1/download/manual_api_vacina_covid-19.pdf)  # noqa
 """
+
 import json
 import os
 from json import JSONDecodeError
@@ -12,7 +13,7 @@
 import pandas as pd
 import requests
 from loguru import logger
-from pysus.ftp import CACHEPATH
+from pysus.api.ftp import CACHEPATH
 from requests.auth import HTTPBasicAuth
 
 
@@ -41,8 +42,7 @@ def download_covid(uf=None, only_header=False):
     tempfile = os.path.join(CACHEPATH, f"Vaccine_temp_{UF}.csv.gz")
     if os.path.exists(tempfile):
         print(
-            "loading from cache. Returning an iterator of Dataframes in chunks"
-            " of 5000."
+            "loading from cache. Returning an iterator of Dataframes in chunks of 5000."
         )
         return pd.read_csv(tempfile, chunksize=5000)
 
diff --git a/pysus/ftp/utils.py b/pysus/ftp/utils.py
deleted file mode 100644
index b700474d..00000000
--- a/pysus/ftp/utils.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import datetime
-from typing import Union
-
-from pysus.ftp import to_list
-from pysus.utilities.brasil import MONTHS, UFs  # noqa
-
-
-def zfill_year(year: Union[str, int]) -> int:
-    """
-    Formats a len(2) year into len(4) with the correct year preffix
-    E.g: 20 -> 2020; 99 -> 1999
-    """
-    year = str(year)[-2:].zfill(2)
-    current_year = str(datetime.datetime.now().year)[-2:]
-    suffix = "19" if str(year) > current_year else "20"
-    return int(suffix + str(year))
-
-
-def parse_UFs(UF: Union[list[str], str]) -> list:
-    """
-    Formats states abbreviations into correct format and retuns a list.
-    Also checks if there is an incorrect UF in the list.
-    E.g: ['SC', 'mt', 'ba'] -> ['SC', 'MT', 'BA']
-    """
-    ufs = [uf.upper() for uf in to_list(UF)]
-    if not all(uf in list(UFs) for uf in ufs):
-        raise ValueError(f"Unknown UF(s): {set(ufs).difference(list(UFs))}")
-    return ufs
diff --git a/pysus/online_data/Infogripe.py b/pysus/online_data/Infogripe.py
deleted file mode 100644
index bd496c79..00000000
--- a/pysus/online_data/Infogripe.py
+++ /dev/null
@@ -1,23 +0,0 @@
-"""
-Downloads data made available by the Infogripe service
-"""
-
-import pandas as pd
-
-BASEURL = r"https://gitlab.fiocruz.br/marcelo.gomes/infogripe/-/raw/master/Dados/InfoGripe/"  # noqa
-DATASETS = {
-    "Alerta de situação": r"tabela_de_alerta.csv",
-    "Casos por idade, sexo e virus": r"dados_semanais_faixa_etaria_sexo_virus.csv.gz",  # noqa
-    "Casos Totais e estimativas": r"serie_temporal_com_estimativas_recentes.csv.gz",  # noqa
-    "Valores esperados por localidades": "valores_esperados_por_localidade.csv",  # noqa
-}
-
-
-def list_datasets():
-    return list(DATASETS.keys())
-
-
-def download(dataset_name):
-    url = BASEURL + DATASETS[dataset_name] + "?inline=false"
-    df = pd.read_csv(url, delimiter=";", decimal=",")
-    return df
diff --git a/pysus/preprocessing/sinan.py b/pysus/preprocessing/sinan.py
deleted file mode 100644
index cb6945ed..00000000
--- a/pysus/preprocessing/sinan.py
+++ /dev/null
@@ -1,127 +0,0 @@
-import os
-from functools import lru_cache
-
-import geocoder
-import numpy as np
-import pandas as pd
-import requests
-from dbfread import DBF
-
-
-def read_sinan_dbf(fname, encoding) -> pd.DataFrame:
-    """
-    Read SINAN dbf file returning a Pandas Dataframe with
-    :param fname: dbf file name
-    :param encoding: Encoding of the dbf
-    :return: pandas dataframe
-    """
-    db = DBF(fname, encoding=encoding)
-    df = pd.DataFrame(list(db))
-
-    def convert_week(x):
-        try:
-            w = int(x) % 100
-        except ValueError:
-            w = np.nan
-        return w
-
-    for cname in df.columns:
-        df[cname].replace("", np.nan, inplace=True)
-        if cname.startswith(("NU", "ID")):
-            try:
-                df[cname] = pd.to_numeric(df[cname])
-            except ValueError as e:
-                print(f"Column {cname} could not be converted to numeric: {e}")
-                # certain IDs can be alphanumerical
-                pass
-        elif cname.startswith("SEM"):
-            df[cname] = df[cname].map(convert_week)
-
-    return df
-
-
-@lru_cache(maxsize=None)
-def get_geocodes(geoc):
-    """
-    Return city name and state two letter code from geocode
-    :param geoc:
-    :return:
-    """
-    url = (
-        "http://cidades.ibge.gov.br/services/jSonpMuns.php?"
-        "busca=330&featureClass=P&style=full&maxRows=5&name_startsWith={}"
-    ).format(geoc)
-    resp = requests.get(url)
-    for d in resp.json()["municipios"]:
-        if int(geoc) == int(d["c"]):
-            return [d["n"].encode("latin-1").decode("utf-8"), d["s"]]
-
-    else:
-        raise KeyError("could not find geocode {} in ".format(geoc))
-
-
-def _address_generator(df, default=""):
-    for row in df.iterrows():
-        line = dict(row[1])
-        try:
-            line["cidade"] = ",".join(get_geocodes(line["ID_MN_RESI"]))
-        except KeyError:
-            print("Could not find geocode {} using default")
-            line["cidade"] = default
-        yield line[
-            "NU_NOTIFIC"
-        ], "{NM_LOGRADO}, {NU_NUMERO}, {NM_BAIRRO}, {cidade}, Brasil".format(
-            **line
-        )
-
-
-def geocode(sinan_df, outfile, default_city):
-    """
-    Geocode cases based on addresses included.
-    :param default_city: default city to use in case of bad Geocode found in
-        file. It can be "city, state"
-    :param sinan_df: Dataframe generated from sinan DBF
-    :param outfile: File on Which
-    """
-    addrs = _address_generator(sinan_df, default_city)
-    if os.path.exists(outfile):
-        mode = "a"
-        coords = pd.read_csv(outfile)
-        geocoded = coords.NU_NOTIFIC.tolist()
-    else:
-        mode = "w"
-        geocoded = []
-    with open(outfile, mode) as of:
-        if mode == "w":
-            of.write("NU_NOTIFIC,latitude,longitude\n")
-        for nu, ad in addrs:
-            # ad = ad.encode('latin-1').decode('utf-8')
-            if nu in geocoded:
-                continue
-            location = geocoder.google(ad)
-            if location is None:
-                raise NameError("Google could not find {}".format(ad))
-            if location.latlng == []:
-                print(
-                    (
-                        "Search for {} returned {} as coordinates, trying "
-                        "reduced address:"
-                    ).format(ad, location.latlng)
-                )
-                ad = ",".join(ad.split(",")[2:])
-                print(ad)
-                location = geocoder.google(ad)
-            try:
-                of.write(
-                    "{},{},{}\n".format(
-                        nu, location.latlng[0], location.latlng[1]
-                    )
-                )
-                print("Successfully geolocated {}".format(ad))
-            except IndexError:
-                print(
-                    (
-                        "Search for {} returned {} as coordinates, " "skipping"
-                    ).format(ad, location.latlng)
-                )
-                of.write("{},nan,nan\n".format(nu))
diff --git a/pysus/utils/__init__.py b/pysus/utils/__init__.py
new file mode 100644
index 00000000..7414d65c
--- /dev/null
+++ b/pysus/utils/__init__.py
@@ -0,0 +1,25 @@
+import datetime
+from typing import Union, TypeVar, List, Tuple
+
+from .brasil import *  # noqa
+
+
+T = TypeVar("T")
+
+
+def to_list(item: Union[T, List[T], Tuple[T, ...], None]) -> List[T]:
+    """Parse any builtin data type into a list"""
+    if item is None:
+        return []
+    return [item] if not isinstance(item, (list, tuple)) else list(item)
+
+
+def zfill_year(year: Union[str, int]) -> int:
+    """
+    Formats a len(2) year into len(4) with the correct year preffix
+    E.g: 20 -> 2020; 99 -> 1999
+    """
+    year = str(year)[-2:].zfill(2)
+    current_year = str(datetime.datetime.now().year)[-2:]
+    suffix = "19" if str(year) > current_year else "20"
+    return int(suffix + str(year))
diff --git a/pysus/utilities/brasil.py b/pysus/utils/brasil.py
similarity index 70%
rename from pysus/utilities/brasil.py
rename to pysus/utils/brasil.py
index 0024a7e4..ade8b406 100644
--- a/pysus/utilities/brasil.py
+++ b/pysus/utils/brasil.py
@@ -1,7 +1,18 @@
+__all__ = [
+    "MUNICIPALITIES",
+    "MUN_BY_GEOCODE",
+    "UFs",
+    "MONTHS",
+    "get_city_name_by_geocode",
+    "parse_UFs",
+]
+
 import json
 from pathlib import Path
 from typing import Union
 
+from pysus.utils import to_list
+
 with open(
     f"{Path(__file__).parent}/municipios.json", "r", encoding="utf-8-sig"
 ) as muns:
@@ -65,3 +76,15 @@ def get_city_name_by_geocode(geocode: Union[str, int]):
     """
 
     return MUN_BY_GEOCODE[int(geocode)]
+
+
+def parse_UFs(UF: Union[list[str], str]) -> list:
+    """
+    Formats states abbreviations into correct format and retuns a list.
+    Also checks if there is an incorrect UF in the list.
+    E.g: ['SC', 'mt', 'ba'] -> ['SC', 'MT', 'BA']
+    """
+    ufs = [uf.upper() for uf in to_list(UF)]
+    if not all(uf in list(UFs) for uf in ufs):
+        raise ValueError(f"Unknown UF(s): {set(ufs).difference(list(UFs))}")
+    return ufs
diff --git a/pysus/preprocessing/decoders.py b/pysus/utils/decoders.py
similarity index 94%
rename from pysus/preprocessing/decoders.py
rename to pysus/utils/decoders.py
index 23215a6c..710824ca 100644
--- a/pysus/preprocessing/decoders.py
+++ b/pysus/utils/decoders.py
@@ -6,6 +6,21 @@
 license: GPL V3 or Later
 """
 
+__all__ = [
+    "decodifica_idade_SINAN",
+    "get_age_string",
+    "decodifica_idade_SIM",
+    "decodifica_data_SIM",
+    "is_valid_geocode",
+    "get_valid_geocodes",
+    "calculate_digit",
+    "add_dv",
+    "columns_as_category",
+    "translate_variables_SIM",
+    "classify_age",
+    "get_CID10_code_index",
+]
+
 __docformat__ = "restructuredtext en"
 import re
 from datetime import datetime, timedelta
@@ -182,8 +197,7 @@ def translate_variables_SIM(
     # SEXO
     if "SEXO" in variables_names:
         df["SEXO"] = df.SEXO.replace(
-            {0: None, 9: None, 1: "Masculino", 2: "Feminino"}
-        )
+            {0: None, 9: None, 1: "Masculino", 2: "Feminino"})
         df["SEXO"] = df["SEXO"].astype("category")
         df["SEXO"] = df["SEXO"].cat.add_categories(["NA"])
         df["SEXO"] = df["SEXO"].fillna("NA")
@@ -287,9 +301,8 @@ def get_CID10_code_index(datasus_chapters):
             number_range_start = int(chapter_range[0][1:3])
             number_range_finish = int(chapter_range[1][1:3])
             for code in range(number_range_start, number_range_finish + 1):
-                code_index[f"{start_letter}{str(code).zfill(2)}"] = (
-                    ch_array_index + 1
-                )
+                code_index[f"{start_letter}{
+                    str(code).zfill(2)}"] = ch_array_index + 1
         else:
             string_range_start = chapter_range[0][0]
             string_range_end = chapter_range[1][0]
@@ -309,9 +322,7 @@ def get_CID10_code_index(datasus_chapters):
                 else:  # Middle letters
                     number_range_start = 0
                     number_range_end = 99
-                for code_number in range(
-                    number_range_start, number_range_end + 1
-                ):
+                for code_number in range(number_range_start, number_range_end + 1):
                     code_index[f"{letter}{str(code_number).zfill(2)}"] = (
                         ch_array_index + 1
                     )
diff --git a/pysus/dataset/geocode_by_cities.json b/pysus/utils/geocode_by_cities.json
similarity index 100%
rename from pysus/dataset/geocode_by_cities.json
rename to pysus/utils/geocode_by_cities.json
diff --git a/pysus/utilities/municipios.json b/pysus/utils/municipios.json
similarity index 100%
rename from pysus/utilities/municipios.json
rename to pysus/utils/municipios.json

From cb6a314a87be261c63fcb76acc142662808c40b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lu=C3=A3=20Bida=20Vacaro?= <luabidaa@gmail.com>
Date: Tue, 3 Mar 2026 14:05:36 -0300
Subject: [PATCH 3/6] remove circular imports

---
 pysus/utils/brasil.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/pysus/utils/brasil.py b/pysus/utils/brasil.py
index ade8b406..c1d59f3b 100644
--- a/pysus/utils/brasil.py
+++ b/pysus/utils/brasil.py
@@ -11,8 +11,6 @@
 from pathlib import Path
 from typing import Union
 
-from pysus.utils import to_list
-
 with open(
     f"{Path(__file__).parent}/municipios.json", "r", encoding="utf-8-sig"
 ) as muns:
@@ -84,7 +82,12 @@ def parse_UFs(UF: Union[list[str], str]) -> list:
     Also checks if there is an incorrect UF in the list.
     E.g: ['SC', 'mt', 'ba'] -> ['SC', 'MT', 'BA']
     """
-    ufs = [uf.upper() for uf in to_list(UF)]
-    if not all(uf in list(UFs) for uf in ufs):
-        raise ValueError(f"Unknown UF(s): {set(ufs).difference(list(UFs))}")
+    ufs = [uf.upper() for uf in ([UF] if isinstance(UF, str) else UF)]
+
+    valid_ufs = set(UFs)
+    invalid = set(ufs).difference(valid_ufs)
+
+    if invalid:
+        raise ValueError(f"Unknown UF(s): {invalid}")
+
     return ufs

From 15a90c4fbf689e92338ddb8ab423f8bbafb89b80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lu=C3=A3=20Bida=20Vacaro?= <luabidaa@gmail.com>
Date: Sun, 15 Mar 2026 15:40:29 -0300
Subject: [PATCH 4/6] move individual database files to databases.py

---
 .gitignore                                    |   1 +
 poetry.lock                                   | 189 +++-
 pyproject.toml                                |   2 +
 pysus/__init__.py                             |  18 +-
 .../catalog/models.py => __init__.py}         |   0
 pysus/api/dadosgov/models.py                  |  47 +-
 pysus/api/ducklake/catalog.py                 |   0
 pysus/api/ducklake/client.py                  |  56 ++
 pysus/api/ducklake/models.py                  | 167 ++++
 pysus/api/ducklake/storage.py                 |   0
 pysus/api/ftp/__init__.py                     |  25 +
 pysus/api/ftp/client.py                       |  11 +-
 pysus/api/ftp/databases.py                    | 892 ++++++++++++++++++
 pysus/api/ftp/databases/__init__.py           |  34 -
 pysus/api/ftp/databases/ciha.py               | 103 --
 pysus/api/ftp/databases/cnes.py               | 135 ---
 pysus/api/ftp/databases/ibge_datasus.py       |  86 --
 pysus/api/ftp/databases/pni.py                |  95 --
 pysus/api/ftp/databases/sia.py                | 122 ---
 pysus/api/ftp/databases/sih.py                | 105 ---
 pysus/api/ftp/databases/sim.py                |  69 --
 pysus/api/ftp/databases/sinan.py              | 144 ---
 pysus/api/ftp/databases/sinasc.py             |  82 --
 23 files changed, 1388 insertions(+), 995 deletions(-)
 rename pysus/api/{ducklake/catalog/models.py => __init__.py} (100%)
 create mode 100644 pysus/api/ducklake/catalog.py
 create mode 100644 pysus/api/ducklake/models.py
 create mode 100644 pysus/api/ducklake/storage.py
 create mode 100644 pysus/api/ftp/databases.py
 delete mode 100644 pysus/api/ftp/databases/__init__.py
 delete mode 100644 pysus/api/ftp/databases/ciha.py
 delete mode 100644 pysus/api/ftp/databases/cnes.py
 delete mode 100644 pysus/api/ftp/databases/ibge_datasus.py
 delete mode 100644 pysus/api/ftp/databases/pni.py
 delete mode 100644 pysus/api/ftp/databases/sia.py
 delete mode 100644 pysus/api/ftp/databases/sih.py
 delete mode 100644 pysus/api/ftp/databases/sim.py
 delete mode 100644 pysus/api/ftp/databases/sinan.py
 delete mode 100644 pysus/api/ftp/databases/sinasc.py

diff --git a/.gitignore b/.gitignore
index ebed1ee5..7364e04d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -179,6 +179,7 @@ dmypy.json
 
 # pytype static type analyzer
 .pytype/
+.pylintrc
 
 # Cython debug symbols
 cython_debug/
diff --git a/poetry.lock b/poetry.lock
index 3454fabd..f14f4f85 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1028,6 +1028,23 @@ files = [
 [package.extras]
 all = ["adbc-driver-manager", "fsspec", "ipython", "numpy", "pandas", "pyarrow"]
 
+[[package]]
+name = "duckdb-engine"
+version = "0.17.0"
+description = "SQLAlchemy driver for duckdb"
+optional = false
+python-versions = "<4,>=3.9"
+groups = ["main"]
+files = [
+    {file = "duckdb_engine-0.17.0-py3-none-any.whl", hash = "sha256:3aa72085e536b43faab635f487baf77ddc5750069c16a2f8d9c6c3cb6083e979"},
+    {file = "duckdb_engine-0.17.0.tar.gz", hash = "sha256:396b23869754e536aa80881a92622b8b488015cf711c5a40032d05d2cf08f3cf"},
+]
+
+[package.dependencies]
+duckdb = ">=0.5.0"
+packaging = ">=21"
+sqlalchemy = ">=1.3.22"
+
 [[package]]
 name = "exceptiongroup"
 version = "1.2.2"
@@ -1359,6 +1376,74 @@ ratelim = "*"
 requests = "*"
 six = "*"
 
+[[package]]
+name = "greenlet"
+version = "3.3.2"
+description = "Lightweight in-process concurrent programming"
+optional = false
+python-versions = ">=3.10"
+groups = ["main"]
+markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""
+files = [
+    {file = "greenlet-3.3.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9bc885b89709d901859cf95179ec9f6bb67a3d2bb1f0e88456461bd4b7f8fd0d"},
+    {file = "greenlet-3.3.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b568183cf65b94919be4438dc28416b234b678c608cafac8874dfeeb2a9bbe13"},
+    {file = "greenlet-3.3.2-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:527fec58dc9f90efd594b9b700662ed3fb2493c2122067ac9c740d98080a620e"},
+    {file = "greenlet-3.3.2-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508c7f01f1791fbc8e011bd508f6794cb95397fdb198a46cb6635eb5b78d85a7"},
+    {file = "greenlet-3.3.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad0c8917dd42a819fe77e6bdfcb84e3379c0de956469301d9fd36427a1ca501f"},
+    {file = "greenlet-3.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:97245cc10e5515dbc8c3104b2928f7f02b6813002770cfaffaf9a6e0fc2b94ef"},
+    {file = "greenlet-3.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8c1fdd7d1b309ff0da81d60a9688a8bd044ac4e18b250320a96fc68d31c209ca"},
+    {file = "greenlet-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:5d0e35379f93a6d0222de929a25ab47b5eb35b5ef4721c2b9cbcc4036129ff1f"},
+    {file = "greenlet-3.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86"},
+    {file = "greenlet-3.3.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f"},
+    {file = "greenlet-3.3.2-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55"},
+    {file = "greenlet-3.3.2-cp311-cp311-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:4375a58e49522698d3e70cc0b801c19433021b5c37686f7ce9c65b0d5c8677d2"},
+    {file = "greenlet-3.3.2-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e2cd90d413acbf5e77ae41e5d3c9b3ac1d011a756d7284d7f3f2b806bbd6358"},
+    {file = "greenlet-3.3.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:442b6057453c8cb29b4fb36a2ac689382fc71112273726e2423f7f17dc73bf99"},
+    {file = "greenlet-3.3.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:45abe8eb6339518180d5a7fa47fa01945414d7cca5ecb745346fc6a87d2750be"},
+    {file = "greenlet-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e692b2dae4cc7077cbb11b47d258533b48c8fde69a33d0d8a82e2fe8d8531d5"},
+    {file = "greenlet-3.3.2-cp311-cp311-win_arm64.whl", hash = "sha256:02b0a8682aecd4d3c6c18edf52bc8e51eacdd75c8eac52a790a210b06aa295fd"},
+    {file = "greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd"},
+    {file = "greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd"},
+    {file = "greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac"},
+    {file = "greenlet-3.3.2-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae9e21c84035c490506c17002f5c8ab25f980205c3e61ddb3a2a2a2e6c411fcb"},
+    {file = "greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070"},
+    {file = "greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79"},
+    {file = "greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395"},
+    {file = "greenlet-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:34308836d8370bddadb41f5a7ce96879b72e2fdfb4e87729330c6ab52376409f"},
+    {file = "greenlet-3.3.2-cp312-cp312-win_arm64.whl", hash = "sha256:d3a62fa76a32b462a97198e4c9e99afb9ab375115e74e9a83ce180e7a496f643"},
+    {file = "greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4"},
+    {file = "greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986"},
+    {file = "greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92"},
+    {file = "greenlet-3.3.2-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccd21bb86944ca9be6d967cf7691e658e43417782bce90b5d2faeda0ff78a7dd"},
+    {file = "greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab"},
+    {file = "greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a"},
+    {file = "greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b"},
+    {file = "greenlet-3.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:a7945dd0eab63ded0a48e4dcade82939783c172290a7903ebde9e184333ca124"},
+    {file = "greenlet-3.3.2-cp313-cp313-win_arm64.whl", hash = "sha256:394ead29063ee3515b4e775216cb756b2e3b4a7e55ae8fd884f17fa579e6b327"},
+    {file = "greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab"},
+    {file = "greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082"},
+    {file = "greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9"},
+    {file = "greenlet-3.3.2-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e3cb43ce200f59483eb82949bf1835a99cf43d7571e900d7c8d5c62cdf25d2f9"},
+    {file = "greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506"},
+    {file = "greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce"},
+    {file = "greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5"},
+    {file = "greenlet-3.3.2-cp314-cp314-win_amd64.whl", hash = "sha256:8c4dd0f3997cf2512f7601563cc90dfb8957c0cff1e3a1b23991d4ea1776c492"},
+    {file = "greenlet-3.3.2-cp314-cp314-win_arm64.whl", hash = "sha256:cd6f9e2bbd46321ba3bbb4c8a15794d32960e3b0ae2cc4d49a1a53d314805d71"},
+    {file = "greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54"},
+    {file = "greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4"},
+    {file = "greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff"},
+    {file = "greenlet-3.3.2-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:59b3e2c40f6706b05a9cd299c836c6aa2378cabe25d021acd80f13abf81181cf"},
+    {file = "greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4"},
+    {file = "greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727"},
+    {file = "greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e"},
+    {file = "greenlet-3.3.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c04c5e06ec3e022cbfe2cd4a846e1d4e50087444f875ff6d2c2ad8445495cf1a"},
+    {file = "greenlet-3.3.2.tar.gz", hash = "sha256:2eaf067fc6d886931c7962e8c6bede15d2f01965560f3359b27c80bde2d151f2"},
+]
+
+[package.extras]
+docs = ["Sphinx", "furo"]
+test = ["objgraph", "psutil", "setuptools"]
+
 [[package]]
 name = "h11"
 version = "0.14.0"
@@ -4156,6 +4241,108 @@ lint = ["mypy", "ruff (==0.5.5)", "types-docutils"]
 standalone = ["Sphinx (>=5)"]
 test = ["pytest"]
 
+[[package]]
+name = "sqlalchemy"
+version = "2.0.48"
+description = "Database Abstraction Library"
+optional = false
+python-versions = ">=3.7"
+groups = ["main"]
+files = [
+    {file = "sqlalchemy-2.0.48-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7001dc9d5f6bb4deb756d5928eaefe1930f6f4179da3924cbd95ee0e9f4dce89"},
+    {file = "sqlalchemy-2.0.48-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1a89ce07ad2d4b8cfc30bd5889ec40613e028ed80ef47da7d9dd2ce969ad30e0"},
+    {file = "sqlalchemy-2.0.48-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10853a53a4a00417a00913d270dddda75815fcb80675874285f41051c094d7dd"},
+    {file = "sqlalchemy-2.0.48-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:fac0fa4e4f55f118fd87177dacb1c6522fe39c28d498d259014020fec9164c29"},
+    {file = "sqlalchemy-2.0.48-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3713e21ea67bca727eecd4a24bf68bcd414c403faae4989442be60994301ded0"},
+    {file = "sqlalchemy-2.0.48-cp310-cp310-win32.whl", hash = "sha256:d404dc897ce10e565d647795861762aa2d06ca3f4a728c5e9a835096c7059018"},
+    {file = "sqlalchemy-2.0.48-cp310-cp310-win_amd64.whl", hash = "sha256:841a94c66577661c1f088ac958cd767d7c9bf507698f45afffe7a4017049de76"},
+    {file = "sqlalchemy-2.0.48-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b4c575df7368b3b13e0cebf01d4679f9a28ed2ae6c1cd0b1d5beffb6b2007dc"},
+    {file = "sqlalchemy-2.0.48-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e83e3f959aaa1c9df95c22c528096d94848a1bc819f5d0ebf7ee3df0ca63db6c"},
+    {file = "sqlalchemy-2.0.48-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6f7b7243850edd0b8b97043f04748f31de50cf426e939def5c16bedb540698f7"},
+    {file = "sqlalchemy-2.0.48-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:82745b03b4043e04600a6b665cb98697c4339b24e34d74b0a2ac0a2488b6f94d"},
+    {file = "sqlalchemy-2.0.48-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e5e088bf43f6ee6fec7dbf1ef7ff7774a616c236b5c0cb3e00662dd71a56b571"},
+    {file = "sqlalchemy-2.0.48-cp311-cp311-win32.whl", hash = "sha256:9c7d0a77e36b5f4b01ca398482230ab792061d243d715299b44a0b55c89fe617"},
+    {file = "sqlalchemy-2.0.48-cp311-cp311-win_amd64.whl", hash = "sha256:583849c743e0e3c9bb7446f5b5addeacedc168d657a69b418063dfdb2d90081c"},
+    {file = "sqlalchemy-2.0.48-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:348174f228b99f33ca1f773e85510e08927620caa59ffe7803b37170df30332b"},
+    {file = "sqlalchemy-2.0.48-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53667b5f668991e279d21f94ccfa6e45b4e3f4500e7591ae59a8012d0f010dcb"},
+    {file = "sqlalchemy-2.0.48-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34634e196f620c7a61d18d5cf7dc841ca6daa7961aed75d532b7e58b309ac894"},
+    {file = "sqlalchemy-2.0.48-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:546572a1793cc35857a2ffa1fe0e58571af1779bcc1ffa7c9fb0839885ed69a9"},
+    {file = "sqlalchemy-2.0.48-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:07edba08061bc277bfdc772dd2a1a43978f5a45994dd3ede26391b405c15221e"},
+    {file = "sqlalchemy-2.0.48-cp312-cp312-win32.whl", hash = "sha256:908a3fa6908716f803b86896a09a2c4dde5f5ce2bb07aacc71ffebb57986ce99"},
+    {file = "sqlalchemy-2.0.48-cp312-cp312-win_amd64.whl", hash = "sha256:68549c403f79a8e25984376480959975212a670405e3913830614432b5daa07a"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e3070c03701037aa418b55d36532ecb8f8446ed0135acb71c678dbdf12f5b6e4"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2645b7d8a738763b664a12a1542c89c940daa55196e8d73e55b169cc5c99f65f"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b19151e76620a412c2ac1c6f977ab1b9fa7ad43140178345136456d5265b32ed"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5b193a7e29fd9fa56e502920dca47dffe60f97c863494946bd698c6058a55658"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:36ac4ddc3d33e852da9cb00ffb08cea62ca05c39711dc67062ca2bb1fae35fd8"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313-win32.whl", hash = "sha256:389b984139278f97757ea9b08993e7b9d1142912e046ab7d82b3fbaeb0209131"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313-win_amd64.whl", hash = "sha256:d612c976cbc2d17edfcc4c006874b764e85e990c29ce9bd411f926bbfb02b9a2"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69f5bc24904d3bc3640961cddd2523e361257ef68585d6e364166dfbe8c78fae"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd08b90d211c086181caed76931ecfa2bdfc83eea3cfccdb0f82abc6c4b876cb"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:1ccd42229aaac2df431562117ac7e667d702e8e44afdb6cf0e50fa3f18160f0b"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f0dcbc588cd5b725162c076eb9119342f6579c7f7f55057bb7e3c6ff27e13121"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313t-win32.whl", hash = "sha256:9764014ef5e58aab76220c5664abb5d47d5bc858d9debf821e55cfdd0f128485"},
+    {file = "sqlalchemy-2.0.48-cp313-cp313t-win_amd64.whl", hash = "sha256:e2f35b4cccd9ed286ad62e0a3c3ac21e06c02abc60e20aa51a3e305a30f5fa79"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e2d0d88686e3d35a76f3e15a34e8c12d73fc94c1dea1cd55782e695cc14086dd"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49b7bddc1eebf011ea5ab722fdbe67a401caa34a350d278cc7733c0e88fecb1f"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:426c5ca86415d9b8945c7073597e10de9644802e2ff502b8e1f11a7a2642856b"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:288937433bd44e3990e7da2402fabc44a3c6c25d3704da066b85b89a85474ae0"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8183dc57ae7d9edc1346e007e840a9f3d6aa7b7f165203a99e16f447150140d2"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314-win32.whl", hash = "sha256:1182437cb2d97988cfea04cf6cdc0b0bb9c74f4d56ec3d08b81e23d621a28cc6"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314-win_amd64.whl", hash = "sha256:144921da96c08feb9e2b052c5c5c1d0d151a292c6135623c6b2c041f2a45f9e0"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5aee45fd2c6c0f2b9cdddf48c48535e7471e42d6fb81adfde801da0bd5b93241"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7cddca31edf8b0653090cbb54562ca027c421c58ddde2c0685f49ff56a1690e0"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7a936f1bb23d370b7c8cc079d5fce4c7d18da87a33c6744e51a93b0f9e97e9b3"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e004aa9248e8cb0a5f9b96d003ca7c1c0a5da8decd1066e7b53f59eb8ce7c62b"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314t-win32.whl", hash = "sha256:b8438ec5594980d405251451c5b7ea9aa58dda38eb7ac35fb7e4c696712ee24f"},
+    {file = "sqlalchemy-2.0.48-cp314-cp314t-win_amd64.whl", hash = "sha256:d854b3970067297f3a7fbd7a4683587134aa9b3877ee15aa29eea478dc68f933"},
+    {file = "sqlalchemy-2.0.48-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f8649a14caa5f8a243628b1d61cf530ad9ae4578814ba726816adb1121fc493e"},
+    {file = "sqlalchemy-2.0.48-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6bb85c546591569558571aa1b06aba711b26ae62f111e15e56136d69920e1616"},
+    {file = "sqlalchemy-2.0.48-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a6b764fb312bd35e47797ad2e63f0d323792837a6ac785a4ca967019357d2bc7"},
+    {file = "sqlalchemy-2.0.48-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:7c998f2ace8bf76b453b75dbcca500d4f4b9dd3908c13e89b86289b37784848b"},
+    {file = "sqlalchemy-2.0.48-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:d64177f443594c8697369c10e4bbcac70ef558e0f7921a1de7e4a3d1734bcf67"},
+    {file = "sqlalchemy-2.0.48-cp38-cp38-win32.whl", hash = "sha256:01f6bbd4308b23240cf7d3ef117557c8fd097ec9549d5d8a52977544e35b40ad"},
+    {file = "sqlalchemy-2.0.48-cp38-cp38-win_amd64.whl", hash = "sha256:858e433f12b0e5b3ed2f8da917433b634f4937d0e8793e5cb33c54a1a01df565"},
+    {file = "sqlalchemy-2.0.48-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4599a95f9430ae0de82b52ff0d27304fe898c17cb5f4099f7438a51b9998ac77"},
+    {file = "sqlalchemy-2.0.48-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f27f9da0a7d22b9f981108fd4b62f8b5743423388915a563e651c20d06c1f457"},
+    {file = "sqlalchemy-2.0.48-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d8fcccbbc0c13c13702c471da398b8cd72ba740dca5859f148ae8e0e8e0d3e7e"},
+    {file = "sqlalchemy-2.0.48-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a5b429eb84339f9f05e06083f119ad814e6d85e27ecbdf9c551dfdbb128eaf8a"},
+    {file = "sqlalchemy-2.0.48-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:bcb8ebbf2e2c36cfe01a94f2438012c6a9d494cf80f129d9753bcdf33bfc35a6"},
+    {file = "sqlalchemy-2.0.48-cp39-cp39-win32.whl", hash = "sha256:e214d546c8ecb5fc22d6e6011746082abf13a9cf46eefb45769c7b31407c97b5"},
+    {file = "sqlalchemy-2.0.48-cp39-cp39-win_amd64.whl", hash = "sha256:b8fc3454b4f3bd0a368001d0e968852dad45a873f8b4babd41bc302ec851a099"},
+    {file = "sqlalchemy-2.0.48-py3-none-any.whl", hash = "sha256:a66fe406437dd65cacd96a72689a3aaaecaebbcd62d81c5ac1c0fdbeac835096"},
+    {file = "sqlalchemy-2.0.48.tar.gz", hash = "sha256:5ca74f37f3369b45e1f6b7b06afb182af1fd5dde009e4ffd831830d98cbe5fe7"},
+]
+
+[package.dependencies]
+greenlet = {version = ">=1", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""}
+typing-extensions = ">=4.6.0"
+
+[package.extras]
+aiomysql = ["aiomysql (>=0.2.0)", "greenlet (>=1)"]
+aioodbc = ["aioodbc", "greenlet (>=1)"]
+aiosqlite = ["aiosqlite", "greenlet (>=1)", "typing_extensions (!=3.10.0.1)"]
+asyncio = ["greenlet (>=1)"]
+asyncmy = ["asyncmy (>=0.2.3,!=0.2.4,!=0.2.6)", "greenlet (>=1)"]
+mariadb-connector = ["mariadb (>=1.0.1,!=1.1.2,!=1.1.5,!=1.1.10)"]
+mssql = ["pyodbc"]
+mssql-pymssql = ["pymssql"]
+mssql-pyodbc = ["pyodbc"]
+mypy = ["mypy (>=0.910)"]
+mysql = ["mysqlclient (>=1.4.0)"]
+mysql-connector = ["mysql-connector-python"]
+oracle = ["cx_oracle (>=8)"]
+oracle-oracledb = ["oracledb (>=1.0.1)"]
+postgresql = ["psycopg2 (>=2.7)"]
+postgresql-asyncpg = ["asyncpg", "greenlet (>=1)"]
+postgresql-pg8000 = ["pg8000 (>=1.29.1)"]
+postgresql-psycopg = ["psycopg (>=3.0.7)"]
+postgresql-psycopg2binary = ["psycopg2-binary"]
+postgresql-psycopg2cffi = ["psycopg2cffi"]
+postgresql-psycopgbinary = ["psycopg[binary] (>=3.0.7)"]
+pymysql = ["pymysql"]
+sqlcipher = ["sqlcipher3_binary"]
+
 [[package]]
 name = "stack-data"
 version = "0.6.3"
@@ -4595,4 +4782,4 @@ preprocessing = []
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<3.14"
-content-hash = "4b551ecb1dddda94c2ea6579463188e6aa0ab5da486b63dd8bed941ce9a4d7db"
+content-hash = "47cffe061807056ea49f027be88f4c848bd92c22bd4f45054d5b0b3896ae2e87"
diff --git a/pyproject.toml b/pyproject.toml
index 781738c0..f9504e76 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -30,6 +30,8 @@ humanize = "^4.8.0"
 typing-extensions = ">=4.10.0"
 pydantic = "^2.12.5"
 duckdb = "^1.4.4"
+duckdb-engine = "^0.17.0"
+sqlalchemy = "^2.0.48"
 
 [tool.poetry.group.dev.dependencies]
 pytest = ">=6.1.0"
diff --git a/pysus/__init__.py b/pysus/__init__.py
index 1cfef31f..1d64ab45 100644
--- a/pysus/__init__.py
+++ b/pysus/__init__.py
@@ -1,8 +1,16 @@
-# type: ignore[attr-defined]
 """PySUS Python package"""
 
+import os
+import pathlib
+from typing import Final
 from importlib import metadata as importlib_metadata
 
+
+CACHEPATH: Final[str] = os.getenv(
+    "PYSUS_CACHEPATH",
+    os.path.join(str(pathlib.Path.home()), "pysus"),
+)
+
 from pysus.api.ftp.databases import *  # noqa
 
 
@@ -10,14 +18,8 @@ def get_version() -> str:
     try:
         return importlib_metadata.version(__name__)
     except importlib_metadata.PackageNotFoundError:  # pragma: no cover
-        return "1.0.1"  # changed by semantic-release
+        return "1.0.1"
 
 
 version: str = get_version()
 __version__: str = version
-
-__all__ = [
-    "AVAILABLE_DATABASES",
-    "version",
-    "__version__",
-]
diff --git a/pysus/api/ducklake/catalog/models.py b/pysus/api/__init__.py
similarity index 100%
rename from pysus/api/ducklake/catalog/models.py
rename to pysus/api/__init__.py
diff --git a/pysus/api/dadosgov/models.py b/pysus/api/dadosgov/models.py
index 149cb0fb..407e3560 100644
--- a/pysus/api/dadosgov/models.py
+++ b/pysus/api/dadosgov/models.py
@@ -1,6 +1,10 @@
-from pydantic import BaseModel, Field, BeforeValidator
+import requests
+from pathlib import Path
 from datetime import datetime as dt
-from typing import Optional, List, Any, Annotated
+from typing import Optional, List, Any, Annotated, Union
+from pydantic import BaseModel, Field, BeforeValidator
+
+from pysus import CACHEPATH
 
 
 def to_datetime(value: Any) -> Optional[dt]:
@@ -30,6 +34,9 @@ class Tag(BaseModel):
     name: str
     display_name: Optional[str] = None
 
+    def __str__(self):
+        return self.name
+
 
 class Resource(BaseModel):
     id: str
@@ -38,14 +45,37 @@ class Resource(BaseModel):
     url: str = Field(alias="link")
     format: str = Field(alias="formato")
     size: int = Field(alias="tamanho")
-    cataloging_date: DateTime = Field(None, alias="dataCatalogacao")
-    last_modified: DateTime = Field(None, alias="dataUltimaAtualizacaoArquivo")
+    cataloging_date: Optional[str] = Field(None, alias="dataCatalogacao")
+    last_modified: Optional[str] = Field(
+        None,
+        alias="dataUltimaAtualizacaoArquivo",
+    )
     download_count: Optional[int] = Field(None, alias="quantidadeDownloads")
     file_name: Optional[str] = Field(None, alias="nomeArquivo")
     resource_type: Optional[str] = Field(None, alias="tipo")
     order_number: Optional[int] = Field(None, alias="numOrdem")
     dataset_id: Optional[str] = Field(None, alias="idConjuntoDados")
 
+    def __str__(self):
+        return self.file_name
+
+    def download(self, target_dir: Union[str, Path] = CACHEPATH) -> Path:
+        target_path = Path(target_dir)
+        target_path.mkdir(parents=True, exist_ok=True)
+
+        output_file = target_path / (
+            self.file_name or f"{self.id}.{self.format.lower()}"
+        )
+
+        response = requests.get(self.url, stream=True)
+        response.raise_for_status()
+
+        with open(output_file, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+
+        return output_file
+
 
 class DatasetDetail(BaseModel):
     id: str
@@ -63,7 +93,8 @@ class DatasetDetail(BaseModel):
     is_open_data: Bool = Field(alias="dadosAbertos")
     is_discontinued: Bool = Field(alias="descontinuado")
     is_private: Bool = Field(False, alias="privado")
-    metadata_updated: DateTime = Field(None, alias="dataUltimaAtualizacaoMetadados")
+    metadata_updated: DateTime = Field(
+        None, alias="dataUltimaAtualizacaoMetadados")
     file_updated: DateTime = Field(None, alias="dataUltimaAtualizacaoArquivo")
     cataloging_date: DateTime = Field(None, alias="dataCatalogacao")
     visibility: str = Field(alias="visibilidade")
@@ -71,6 +102,9 @@ class DatasetDetail(BaseModel):
     seal: Optional[str] = Field(None, alias="selo")
     source: Optional[str] = Field(None, alias="origemCadastro")
 
+    def __str__(self):
+        return self.id
+
 
 class DatasetSummary(BaseModel):
     id: str
@@ -81,3 +115,6 @@ class DatasetSummary(BaseModel):
     cataloging_date: DateTime = Field(None, alias="catalogacao")
     metadata_modified: DateTime = Field(None, alias="ultimaAlteracaoMetadados")
     last_update: DateTime = Field(None, alias="ultimaAtualizacaoDados")
+
+    def __str__(self):
+        return self.name
diff --git a/pysus/api/ducklake/catalog.py b/pysus/api/ducklake/catalog.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pysus/api/ducklake/client.py b/pysus/api/ducklake/client.py
index e69de29b..6818ce3b 100644
--- a/pysus/api/ducklake/client.py
+++ b/pysus/api/ducklake/client.py
@@ -0,0 +1,56 @@
+import requests
+from pathlib import Path
+
+import duckdb
+
+from pysus import CACHEPATH
+
+
+class DuckLake:
+    def __init__(self):
+        self.endpoint = "nbg1.your-objectstorage.com"
+        self.remote_url = f"https://{self.endpoint}/pysus/public/catalog.db"
+        self.cache_dir = Path(CACHEPATH) / "ducklake"
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.catalog_local = self.cache_dir / "catalog.db"
+        self._ensure_catalog()
+        self.con = self._connect()
+
+    def _remote_size(self):
+        r = requests.head(self.remote_url)
+        r.raise_for_status()
+        return int(r.headers.get("content-length", 0))
+
+    def _local_size(self):
+        if not self.catalog_local.exists():
+            return None
+        return self.catalog_local.stat().st_size
+
+    def _download_catalog(self):
+        r = requests.get(self.remote_url, stream=True)
+        r.raise_for_status()
+        with open(self.catalog_local, "wb") as f:
+            for chunk in r.iter_content(chunk_size=1024 * 1024):
+                f.write(chunk)
+
+    def _ensure_catalog(self):
+        if self._remote_size() != self._local_size():
+            self._download_catalog()
+
+    def _connect(self):
+        con = duckdb.connect()
+        con.execute(
+            f"""
+            SET s3_endpoint='{self.endpoint}';
+            SET s3_region='nbg1';
+            SET s3_url_style='path';
+            SET s3_use_ssl=true;
+            """
+        )
+        con.execute(
+            f"""
+            ATTACH 'ducklake:{self.catalog_local}' AS pysus;
+            USE pysus;
+            """
+        )
+        return con
diff --git a/pysus/api/ducklake/models.py b/pysus/api/ducklake/models.py
new file mode 100644
index 00000000..54d6850c
--- /dev/null
+++ b/pysus/api/ducklake/models.py
@@ -0,0 +1,167 @@
+import enum
+
+from sqlalchemy.orm import declarative_base, relationship
+from sqlalchemy import (
+    Column,
+    Integer,
+    String,
+    ForeignKey,
+    Date,
+    Boolean,
+    Index,
+    Enum,
+)
+
+Base = declarative_base()
+
+
+class Catalog(Base):
+    __abstract__ = True
+    __table_args__ = {"schema": "pysus"}
+
+
+class Dataset(Catalog):
+    __tablename__ = "datasets"
+
+    id = Column(Integer, primary_key=True)
+    name = Column(String, nullable=False, unique=True, index=True)
+    metadata_id = Column(
+        Integer,
+        ForeignKey("pysus.dataset_metadata.id"),
+        index=True,
+    )
+
+    dataset_metadata = relationship(
+        "DatasetMetadata",
+        back_populates="datasets",
+    )
+
+    groups = relationship(
+        "DatasetGroup",
+        back_populates="dataset",
+        cascade="all, delete-orphan",
+    )
+
+    columns = relationship(
+        "ColumnDefinition",
+        back_populates="dataset",
+        cascade="all, delete-orphan",
+    )
+
+
+class ColumnDefinition(Catalog):
+    __tablename__ = "dataset_columns"
+
+    id = Column(Integer, primary_key=True)
+    dataset_id = Column(
+        Integer,
+        ForeignKey("pysus.datasets.id"),
+        nullable=False,
+        index=True,
+    )
+    name = Column(String, nullable=False)
+    type = Column(String, nullable=False)
+    description = Column(String, nullable=True)
+    nullable = Column(Boolean, nullable=False, default=True)
+    position = Column(Integer, nullable=False, index=True)
+
+    dataset = relationship("Dataset", back_populates="columns")
+
+    __table_args__ = (
+        Index("ix_columns_dataset_name", "dataset_id", "name"),
+        {"schema": "pysus"},
+    )
+
+
+class DatasetGroup(Catalog):
+    __tablename__ = "dataset_groups"
+
+    id = Column(Integer, primary_key=True)
+    name = Column(String, nullable=False)
+    dataset_id = Column(
+        Integer,
+        ForeignKey("pysus.datasets.id"),
+        nullable=False,
+        index=True,
+    )
+    metadata_id = Column(
+        Integer,
+        ForeignKey("pysus.dataset_group_metadata.id"),
+        index=True,
+    )
+
+    dataset = relationship(
+        "Dataset",
+        back_populates="groups",
+    )
+
+    group_metadata = relationship(
+        "DatasetGroupMetadata",
+        back_populates="groups",
+    )
+
+    files = relationship(
+        "File",
+        back_populates="group",
+        cascade="all, delete-orphan",
+    )
+
+    __table_args__ = (
+        Index("ix_groups_dataset_name", "dataset_id", "name"),
+        {"schema": "pysus"},
+    )
+
+
+class File(Catalog):
+    __tablename__ = "files"
+
+    id = Column(Integer, primary_key=True)
+
+    group_id = Column(
+        Integer,
+        ForeignKey("pysus.dataset_groups.id"),
+        nullable=False,
+        index=True,
+    )
+    path = Column(String, nullable=False, unique=True)
+    size = Column(Integer, nullable=False)
+    rows = Column(Integer, nullable=False)
+
+    modified = Column(Date, nullable=False)
+
+    group = relationship(
+        "DatasetGroup",
+        back_populates="files",
+    )
+
+
+class DatasetMetadata(Catalog):
+    class Origin(enum.Enum):
+        FTP = "ftp"
+        API = "api"
+
+    __tablename__ = "dataset_metadata"
+
+    id = Column(Integer, primary_key=True)
+    long_name = Column(String, nullable=False)
+    description = Column(String, nullable=True)
+    source = Column(String, nullable=True)
+    origin = Column(Enum(Origin), nullable=False)
+
+    datasets = relationship(
+        "Dataset",
+        back_populates="dataset_metadata",
+    )
+
+
+class DatasetGroupMetadata(Catalog):
+    __tablename__ = "dataset_group_metadata"
+
+    id = Column(Integer, primary_key=True)
+    long_name = Column(String, nullable=False)
+    description = Column(String, nullable=True)
+
+    groups = relationship(
+        "DatasetGroup",
+        back_populates="group_metadata",
+    )
diff --git a/pysus/api/ducklake/storage.py b/pysus/api/ducklake/storage.py
new file mode 100644
index 00000000..e69de29b
diff --git a/pysus/api/ftp/__init__.py b/pysus/api/ftp/__init__.py
index 65944e50..852efe38 100644
--- a/pysus/api/ftp/__init__.py
+++ b/pysus/api/ftp/__init__.py
@@ -1,3 +1,28 @@
 from .client import *  # noqa
 from .databases import *  # noqa
 
+
+AVAILABLE_DATABASES = [
+    CIHA,
+    CNES,
+    IBGEDATASUS,
+    PNI,
+    SIA,
+    SIH,
+    SIM,
+    SINAN,
+    SINASC,
+]
+
+__all__ = [
+    "CIHA",
+    "CNES",
+    "IBGEDATASUS",
+    "PNI",
+    "SIA",
+    "SIH",
+    "SIM",
+    "SINAN",
+    "SINASC",
+    "AVAILABLE_DATABASES",
+]
diff --git a/pysus/api/ftp/client.py b/pysus/api/ftp/client.py
index 453ed3f4..88c46e1b 100644
--- a/pysus/api/ftp/client.py
+++ b/pysus/api/ftp/client.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-__all__ = ["File", "Directory", "Database", "CACHEPATH"]
+__all__ = ["File", "Directory", "Database"]
 
 import asyncio
 import os
@@ -23,19 +23,18 @@
 import humanize
 from aioftp import Client
 from loguru import logger
-from pysus.data.local import Data
-from pysus.utils import to_list
 from tqdm import tqdm
 from typing_extensions import Self
 
+from pysus import CACHEPATH
+from pysus.data.local import Data
+from pysus.utils import to_list
+
 # Type aliases
 PathLike = Union[str, pathlib.Path]
 FileContent = Dict[str, Union["Directory", "File"]]
 
 # Constants
-CACHEPATH: Final[str] = os.getenv(
-    "PYSUS_CACHEPATH", os.path.join(str(pathlib.Path.home()), "pysus")
-)
 __cachepath__: Final[pathlib.Path] = pathlib.Path(CACHEPATH)
 __cachepath__.mkdir(exist_ok=True)
 
diff --git a/pysus/api/ftp/databases.py b/pysus/api/ftp/databases.py
new file mode 100644
index 00000000..c2dcf47c
--- /dev/null
+++ b/pysus/api/ftp/databases.py
@@ -0,0 +1,892 @@
+__all__ = [
+    "CIHA",
+    "CNES",
+    "IBGEDATASUS",
+    "PNI",
+    "SIA",
+    "SIH",
+    "SIM",
+    "SINAN",
+    "SINASC",
+]
+
+from typing import List, Optional, Union, Literal
+
+from pysus.api.ftp import Database, Directory, File
+from pysus.utils import UFs, parse_UFs, to_list, zfill_year, MONTHS
+
+
+class CIHA(Database):
+    name = "CIHA"
+    paths = (Directory("/dissemin/publicos/CIHA/201101_/Dados"),)
+    metadata = {
+        "long_name": "Comunicação de Internação Hospitalar e Ambulatorial",
+        "source": "http://ciha.datasus.gov.br/CIHA/index.php",
+        "description": (
+            "A CIHA foi criada para ampliar o processo de planejamento, "
+            "programação, controle, avaliação e regulação da assistência à "
+            "saúde permitindo um conhecimento mais abrangente e profundo dos "
+            "perfis nosológico e epidemiológico da população brasileira, da "
+            "capacidade instalada e do potencial de produção de serviços do "
+            "conjunto de estabelecimentos de saúde do País. O sistema permite "
+            "o acompanhamento das ações e serviços de saúde custeados "
+            "por: planos privados de assistência à saúde; planos públicos; "
+            "pagamento particular por pessoa física; pagamento particular por "
+            "pessoa jurídica; programas e projetos federais (PRONON, PRONAS, "
+            "PROADI); recursos próprios das secretarias municipais e estaduais"
+            " de saúde; DPVAT; gratuidade e, a partir da publicação da "
+            "Portaria GM/MS nº 2.905/2022, consórcios públicos. As "
+            "informações registradas na CIHA servem como base para o processo "
+            "de Certificação de Entidades Beneficentes de Assistência Social "
+            "em Saúde (CEBAS) e para monitoramento dos programas PRONAS e "
+            "PRONON"
+        ),
+    }
+    groups = {
+        "CIHA": "Comunicação de Internação Hospitalar e Ambulatorial",
+    }
+
+    def describe(self, file: File):
+        if not isinstance(file, File):
+            return file
+
+        if file.extension.upper() in [".DBC", ".DBF"]:
+            group, _uf, year, month = self.format(file)
+
+            try:
+                uf = UFs[_uf]
+            except KeyError:
+                uf = _uf
+
+            description = {
+                "name": str(file.basename),
+                "group": self.groups[group],
+                "uf": uf,
+                "month": MONTHS[int(month)],
+                "year": zfill_year(year),
+                "size": file.info["size"],
+                "last_update": file.info["modify"],
+            }
+
+            return description
+        return file
+
+    def format(self, file: File) -> tuple:
+        group, _uf = file.name[:4].upper(), file.name[4:6].upper()
+        year, month = file.name[-4:-2], file.name[-2:]
+        return group, _uf, zfill_year(year), month
+
+    def get_files(
+        self,
+        uf: Optional[Union[List[str], str]] = None,
+        year: Optional[Union[list, str, int]] = None,
+        month: Optional[Union[list, str, int]] = None,
+        group: Union[List[str], str] = "CIHA",
+    ) -> List[File]:
+        files = list(
+            filter(lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files)
+        )
+
+        groups = [gr.upper() for gr in to_list(group)]
+
+        if not all(gr in list(self.groups) for gr in groups):
+            raise ValueError(
+                f"Unknown CIHA Group(s): {set(groups).difference(list(self.groups))}"
+            )
+
+        files = list(filter(lambda f: self.format(f)[0] in groups, files))
+
+        if uf:
+            ufs = parse_UFs(uf)
+            files = list(filter(lambda f: self.format(f)[1] in ufs, files))
+
+        if year or str(year) in ["0", "00"]:
+            years = [zfill_year(str(m)[-2:]) for m in to_list(year)]
+            files = list(filter(lambda f: self.format(f)[2] in years, files))
+
+        if month:
+            months = [str(y)[-2:].zfill(2) for y in to_list(month)]
+            files = list(filter(lambda f: self.format(f)[3] in months, files))
+
+        return files
+
+
+class CNES(Database):
+    name = "CNES"
+    paths = (Directory("/dissemin/publicos/CNES/200508_/Dados"),)
+    metadata = {
+        "long_name": "Cadastro Nacional de Estabelecimentos de Saúde",
+        "source": "https://cnes.datasus.gov.br/",
+        "description": (
+            "O Cadastro Nacional de Estabelecimentos de Saúde (CNES) é o "
+            "sistema de informação oficial de cadastramento de informações "
+            "de todos os estabelecimentos de saúde no país, independentemente "
+            "de sua natureza jurídica ou de integrarem o Sistema Único de "
+            "Saúde (SUS). Trata-se do cadastro oficial do Ministério da "
+            "Saúde (MS) no tocante à realidade da capacidade instalada e "
+            "mão-de-obra assistencial de saúde no Brasil em estabelecimentos "
+            "de saúde públicos ou privados, com convênio SUS ou não."
+        ),
+    }
+    groups = {
+        "DC": "Dados Complementares",
+        "EE": "Estabelecimento de Ensino",
+        "EF": "Estabelecimento Filantrópico",
+        "EP": "Equipes",
+        "EQ": "Equipamentos",
+        "GM": "Gestão e Metas",
+        "HB": "Habilitação",
+        "IN": "Incentivos",
+        "LT": "Leitos",
+        "PF": "Profissional",
+        "RC": "Regra Contratual",
+        "SR": "Serviço Especializado",
+        "ST": "Estabelecimentos",
+    }
+    __loaded__ = set()
+
+    def load(
+        self,
+        groups: Union[str, List[str]] = None,
+    ):
+        """
+        Loads CNES Groups into content. Will convert the files and directories
+        found within FTP Directories into self.content
+        """
+        if not self.__content__:
+            self.paths[0].load()
+            self.__content__ |= self.paths[0].__content__
+
+        if groups:
+            groups = to_list(groups)
+
+            if not all(group in self.groups for group in [gr.upper() for gr in groups]):
+                raise ValueError(
+                    f"Unknown CNES group(s): {set(groups).difference(self.groups)}"
+                )
+
+            for group in groups:
+                group = group.upper()
+                if group not in self.__loaded__:
+                    directory = self.__content__[group]
+                    directory.load()
+                    self.__content__ |= directory.__content__
+                    self.__loaded__.add(directory.name)
+        return self
+
+    def describe(self, file: File) -> dict:
+        if not isinstance(file, File):
+            return {}
+
+        if file.name == "GMufAAmm":
+            # Leftover
+            return {}
+
+        if file.extension.upper() in [".DBC", ".DBF"]:
+            group, _uf, year, month = self.format(file)
+
+            try:
+                uf = UFs[_uf]
+            except KeyError:
+                uf = _uf
+
+            description = {
+                "name": str(file.basename),
+                "group": self.groups[group],
+                "uf": uf,
+                "month": MONTHS[int(month)],
+                "year": zfill_year(year),
+                "size": file.info["size"],
+                "last_update": file.info["modify"],
+            }
+
+            return description
+        return {}
+
+    def format(self, file: File) -> tuple:
+        group, _uf = file.name[:2].upper(), file.name[2:4].upper()
+        year, month = file.name[-4:-2], file.name[-2:]
+        return group, _uf, zfill_year(year), month
+
+    def get_files(
+        self,
+        group: Union[List[str], str],
+        uf: Optional[Union[List[str], str]] = None,
+        year: Optional[Union[list, str, int]] = None,
+        month: Optional[Union[list, str, int]] = None,
+    ) -> List[File]:
+        if not group:
+            raise ValueError("At least one CNES group is required")
+
+        groups = [gr.upper() for gr in to_list(group)]
+
+        self.load(groups)
+
+        files = list(filter(lambda f: f.name[:2] in groups, self.files))
+
+        if uf:
+            ufs = parse_UFs(uf)
+            files = list(filter(lambda f: f.name[2:4] in ufs, files))
+
+        if year or str(year) in ["0", "00"]:
+            years = [str(m)[-2:].zfill(2) for m in to_list(year)]
+            files = list(filter(lambda f: f.name[-4:-2] in years, files))
+
+        if month:
+            months = [str(y)[-2:].zfill(2) for y in to_list(month)]
+            files = list(filter(lambda f: f.name[-2:] in months, files))
+
+        return files
+
+
+class IBGEDATASUS(Database):
+    name = "IBGE-DataSUS"
+    paths = (
+        Directory("/dissemin/publicos/IBGE/POP"),
+        Directory("/dissemin/publicos/IBGE/censo"),
+        Directory("/dissemin/publicos/IBGE/POPTCU"),
+        Directory("/dissemin/publicos/IBGE/projpop"),
+        # Directory("/dissemin/publicos/IBGE/Auxiliar") # this has a different file name pattern  # noqa
+    )
+    metadata = {
+        "long_name": "Populaçao Residente, Censos, Contagens "
+        "Populacionais e Projeçoes Intercensitarias",
+        "source": "ftp://ftp.datasus.gov.br/dissemin/publicos/IBGE",
+        "description": (
+            "São aqui apresentados informações sobre a população residente, "
+            "estratificadas por município, faixas etárias e sexo, obtidas a "
+            "partir dos Censos Demográficos, Contagens Populacionais "
+            "e Projeções Intercensitárias."
+        ),
+    }
+
+    def describe(self, file: File) -> dict:
+        if file.extension.upper() in [".ZIP"]:
+            year = file.name.split(".")[0][-2:]
+            description = {
+                "name": str(file.basename),
+                "year": zfill_year(year),
+                "size": file.info["size"],
+                "last_update": file.info["modify"],
+            }
+            return description
+        elif file.extension.upper() == ".DBF":
+            year = file.name[-2:]
+            description = {
+                "name": str(file.basename),
+                "year": zfill_year(year),
+                "size": file.info["size"],
+                "last_update": file.info["modify"],
+            }
+            return description
+        return {}
+
+    def format(self, file: File) -> tuple:
+        return (file.name[-2:],)
+
+    def get_files(
+        self,
+        source: Literal["POP", "censo", "POPTCU", "projpop"] = "POPTCU",
+        year: Optional[Union[str, int, list]] = None,
+        *args,
+        **kwargs,
+    ) -> List[File]:
+        sources = ["POP", "censo", "POPTCU", "projpop"]
+        source_dir = None
+
+        for dir in self.paths:
+            if source in sources and source in dir.path:
+                source_dir = dir
+
+        if not source_dir:
+            raise ValueError(f"Unkown source {source}. Options: {sources}")
+
+        files = source_dir.content
+
+        if year:
+            if isinstance(year, (str, int)):
+                files = [
+                    f for f in files if self.describe(f)["year"] == zfill_year(year)
+                ]
+            elif isinstance(year, list):
+                files = [
+                    f
+                    for f in files
+                    if str(self.describe(f)["year"])
+                    in [str(zfill_year(y)) for y in year]
+                ]
+
+        return files
+
+
+class PNI(Database):
+    name = "PNI"
+    paths = (Directory("/dissemin/publicos/PNI/DADOS"),)
+    metadata = {
+        "long_name": ("Sistema de Informações do Programa Nacional de Imunizações"),
+        "source": (
+            "https://datasus.saude.gov.br/acesso-a-informacao/morbidade-hospitalar-do-sus-sih-sus/",  # noqa
+            "https://datasus.saude.gov.br/acesso-a-informacao/producao-hospitalar-sih-sus/",  # noqa
+        ),
+        "description": (
+            "O SI-PNI é um sistema desenvolvido para possibilitar aos "
+            "gestores envolvidos no Programa Nacional de Imunização, a "
+            "avaliação dinâmica do risco quanto à ocorrência de surtos ou "
+            "epidemias, a partir do registro dos imunobiológicos aplicados e "
+            "do quantitativo populacional vacinado, agregados por faixa "
+            "etária, período de tempo e área geográfica. Possibilita também "
+            "o controle do estoque de imunobiológicos necessário aos "
+            "administradores que têm a incumbência de programar sua aquisição "
+            "e distribuição. Controla as indicações de aplicação de "
+            "vacinas de imunobiológicos especiais e seus eventos adversos, "
+            "dentro dos Centros de Referências em imunobiológicos especiais."
+        ),
+    }
+    groups = {
+        "CPNI": "Cobertura Vacinal",  # TODO: may be incorrect
+        "DPNI": "Doses Aplicadas",  # TODO: may be incorrect
+    }
+
+    def describe(self, file: File) -> dict:
+        if file.extension.upper() in [".DBC", ".DBF"]:
+            group, _uf, year = self.format(file)
+
+            try:
+                uf = UFs[_uf]
+            except KeyError:
+                uf = _uf
+
+            description = {
+                "name": file.basename,
+                "group": self.groups[group],
+                "uf": uf,
+                "year": zfill_year(year),
+                "size": file.info["size"],
+                "last_update": file.info["modify"],
+            }
+
+            return description
+        return {}
+
+    def format(self, file: File) -> tuple:
+        if len(file.name) != 8:
+            raise ValueError(f"Can't format {file.name}")
+
+        n = file.name
+        group, _uf, year = n[:4], n[4:6], n[-2:]
+        return group, _uf, zfill_year(year)
+
+    def get_files(
+        self,
+        group: Union[list, Literal["CNPI", "DPNI"]],
+        uf: Optional[Union[List[str], str]] = None,
+        year: Optional[Union[list, str, int]] = None,
+    ) -> List[File]:
+        files = list(
+            filter(lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files)
+        )
+
+        groups = [gr.upper() for gr in to_list(group)]
+
+        if not all(gr in list(self.groups) for gr in groups):
+            raise ValueError(
+                f"Unknown PNI Group(s): {set(groups).difference(list(self.groups))}"
+            )
+
+        files = list(filter(lambda f: self.format(f)[0] in groups, files))
+
+        if uf:
+            ufs = parse_UFs(uf)
+            files = list(filter(lambda f: self.format(f)[1] in ufs, files))
+
+        if year or str(year) in ["0", "00"]:
+            years = [zfill_year(str(m)[-2:]) for m in to_list(year)]
+            files = list(filter(lambda f: self.format(f)[2] in years, files))
+
+        return files
+
+
+class SIA(Database):
+    name = "SIA"
+    paths = (
+        Directory("/dissemin/publicos/SIASUS/199407_200712/Dados"),
+        Directory("/dissemin/publicos/SIASUS/200801_/Dados"),
+    )
+    metadata = {
+        "long_name": "Sistema de Informações Ambulatoriais",
+        "source": "http://sia.datasus.gov.br/principal/index.php",
+        "description": (
+            "O Sistema de Informação Ambulatorial (SIA) foi instituído pela "
+            "Portaria GM/MS n.º 896 de 29 de junho de 1990. Originalmente, o "
+            "SIA foi concebido a partir do projeto SICAPS (Sistema de "
+            "Informação e Controle Ambulatorial da Previdência Social), em "
+            "que os conceitos, os objetivos e as diretrizes criados para o "
+            "desenvolvimento do SICAPS foram extremamente importantes e "
+            "amplamente utilizados para o desenvolvimento do SIA, tais"
+            " como: (i) o acompanhamento das programações físicas e "
+            "orçamentárias; (ii) o acompanhamento das ações de saúde "
+            "produzidas; (iii) a agilização do pagamento e controle "
+            "orçamentário e financeiro; e (iv) a formação de banco de dados "
+            "para contribuir com a construção do SUS."
+        ),
+    }
+    groups = {
+        "AB": "APAC de Cirurgia Bariátrica",
+        "ABO": "APAC de Acompanhamento Pós Cirurgia Bariátrica",
+        "ACF": "APAC de Confecção de Fístula",
+        "AD": "APAC de Laudos Diversos",
+        "AM": "APAC de Medicamentos",
+        "AMP": "APAC de Acompanhamento Multiprofissional",
+        "AN": "APAC de Nefrologia",
+        "AQ": "APAC de Quimioterapia",
+        "AR": "APAC de Radioterapia",
+        "ATD": "APAC de Tratamento Dialítico",
+        "BI": "Boletim de Produção Ambulatorial individualizado",
+        "IMPBO": "",  # TODO
+        "PA": "Produção Ambulatorial",
+        "PAM": "",  # TODO
+        "PAR": "",  # TODO
+        "PAS": "",  # TODO
+        "PS": "RAAS Psicossocial",
+        "SAD": "RAAS de Atenção Domiciliar",
+    }
+
+    def describe(self, file: File) -> dict:
+        if file.extension.upper() == ".DBC":
+            group, _uf, year, month = self.format(file)
+
+            try:
+                uf = UFs[_uf]
+            except KeyError:
+                uf = _uf
+
+            description = {
+                "name": str(file.basename),
+                "group": self.groups[group],
+                "uf": uf,
+                "month": MONTHS[int(month)],
+                "year": zfill_year(year),
+                "size": file.info["size"],
+                "last_update": file.info["modify"],
+            }
+
+            return description
+        return {}
+
+    def format(self, file: File) -> tuple:
+        if file.extension.upper() in [".DBC", ".DBF"]:
+            digits = "".join([d for d in file.name if d.isdigit()])
+            if "_" in file.name:
+                name, _ = file.name.split("_")
+                digits = "".join([d for d in name if d.isdigit()])
+            chars, _ = file.name.split(digits)
+            year, month = digits[:2], digits[2:]
+            group, uf = chars[:-2].upper(), chars[-2:].upper()
+            return group, uf, zfill_year(year), month
+        return ()
+
+    def get_files(
+        self,
+        group: Union[List[str], str],
+        uf: Optional[Union[List[str], str]] = None,
+        year: Optional[Union[list, str, int]] = None,
+        month: Optional[Union[list, str, int]] = None,
+    ) -> List[File]:
+        files = list(
+            filter(lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files)
+        )
+
+        groups = [gr.upper() for gr in to_list(group)]
+
+        if not all(gr in list(self.groups) for gr in groups):
+            raise ValueError(
+                f"Unknown SIA Group(s): {set(groups).difference(list(self.groups))}"
+            )
+
+        files = list(filter(lambda f: self.format(f)[0] in groups, files))
+
+        if uf:
+            ufs = parse_UFs(uf)
+            files = list(filter(lambda f: self.format(f)[1] in ufs, files))
+
+        if year or str(year) in ["0", "00"]:
+            years = [zfill_year(str(m)[-2:]) for m in to_list(year)]
+            files = list(filter(lambda f: self.format(f)[2] in years, files))
+
+        if month:
+            months = [str(y)[-2:].zfill(2) for y in to_list(month)]
+            files = list(filter(lambda f: self.format(f)[3] in months, files))
+
+        return files
+
+
+class SIH(Database):
+    name = "SIH"
+    paths = (
+        Directory("/dissemin/publicos/SIHSUS/199201_200712/Dados"),
+        Directory("/dissemin/publicos/SIHSUS/200801_/Dados"),
+    )
+    metadata = {
+        "long_name": "Sistema de Informações Hospitalares",
+        "source": (
+            "https://datasus.saude.gov.br/acesso-a-informacao/morbidade-hospitalar-do-sus-sih-sus/",  # noqa
+            "https://datasus.saude.gov.br/acesso-a-informacao/producao-hospitalar-sih-sus/",  # noqa
+        ),
+        "description": (
+            "A finalidade do AIH (Sistema SIHSUS) é a de transcrever todos os "
+            "atendimentos que provenientes de internações hospitalares que "
+            "foram financiadas pelo SUS, e após o processamento, gerarem "
+            "relatórios para os gestores que lhes possibilitem fazer os "
+            "pagamentos dos estabelecimentos de saúde. Além disso, o nível "
+            "Federal recebe mensalmente uma base de dados de todas as "
+            "internações autorizadas (aprovadas ou não para pagamento) para "
+            "que possam ser repassados às Secretarias de Saúde os valores de "
+            "Produção de Média e Alta complexidade além dos valores de CNRAC, "
+            "FAEC e de Hospitais Universitários – em suas variadas formas de "
+            "contrato de gestão."
+        ),
+    }
+    groups = {
+        "RD": "AIH Reduzida",
+        "RJ": "AIH Rejeitada",
+        "ER": "AIH Rejeitada com erro",
+        "SP": "Serviços Profissionais",
+        "CH": "Cadastro Hospitalar",
+        "CM": "",  # TODO
+    }
+
+    def describe(self, file: File) -> dict:
+        if file.extension.upper() in [".DBC", ".DBF"]:
+            group, _uf, year, month = self.format(file)
+
+            try:
+                uf = UFs[_uf]
+            except KeyError:
+                uf = _uf
+
+            description = {
+                "name": file.basename,
+                "group": self.groups[group],
+                "uf": uf,
+                "month": MONTHS[int(month)],
+                "year": zfill_year(year),
+                "size": file.info["size"],
+                "last_update": file.info["modify"],
+            }
+
+            return description
+        return {}
+
+    def format(self, file: File) -> tuple:
+        group, _uf = file.name[:2].upper(), file.name[2:4].upper()
+        year, month = file.name[-4:-2], file.name[-2:]
+        return group, _uf, zfill_year(year), month
+
+    def get_files(
+        self,
+        group: Union[List[str], str],
+        uf: Optional[Union[List[str], str]] = None,
+        year: Optional[Union[list, str, int]] = None,
+        month: Optional[Union[list, str, int]] = None,
+    ) -> List[File]:
+        files = list(
+            filter(lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files)
+        )
+
+        groups = [gr.upper() for gr in to_list(group)]
+
+        if not all(gr in list(self.groups) for gr in groups):
+            raise ValueError(
+                f"Unknown SIH Group(s): {set(groups).difference(list(self.groups))}"
+            )
+
+        files = list(filter(lambda f: self.format(f)[0] in groups, files))
+
+        if uf:
+            ufs = parse_UFs(uf)
+            files = list(filter(lambda f: self.format(f)[1] in ufs, files))
+
+        if year or str(year) in ["0", "00"]:
+            years = [zfill_year(str(m)[-2:]) for m in to_list(year)]
+            files = list(filter(lambda f: self.format(f)[2] in years, files))
+
+        if month:
+            months = [str(y)[-2:].zfill(2) for y in to_list(month)]
+            files = list(filter(lambda f: self.format(f)[3] in months, files))
+
+        return files
+
+
+class SIM(Database):
+    name = "SIM"
+    paths = (
+        Directory("/dissemin/publicos/SIM/CID10/DORES"),
+        Directory("/dissemin/publicos/SIM/CID9/DORES"),
+    )
+    metadata = {
+        "long_name": "Sistema de Informação sobre Mortalidade",
+        "source": "http://sim.saude.gov.br",
+        "description": "",
+    }
+    groups = {"CID10": "DO", "CID9": "DOR"}
+
+    def describe(self, file: File) -> dict:
+        group, _uf, year = self.format(file)
+        _groups = {v: k for k, v in self.groups.items()}
+
+        try:
+            uf = UFs[_uf]
+        except KeyError:
+            uf = _uf
+
+        description = {
+            "name": str(file.basename),
+            "uf": uf,
+            "year": year,
+            "group": _groups[group],
+            "size": file.info["size"],
+            "last_update": file.info["modify"],
+        }
+
+        return description
+
+    def format(self, file: File) -> tuple:
+        if "CID9" in str(file.path):
+            group, _uf, year = file.name[:-4], file.name[-4:-2], file.name[-2:]
+        else:
+            group, _uf, year = file.name[:-6], file.name[-6:-4], file.name[-4:]
+        return group, _uf, zfill_year(year)
+
+    def get_files(
+        self,
+        group: Union[list[str], str],
+        uf: Optional[Union[list[str], str]] = None,
+        year: Optional[Union[list, str, int]] = None,
+    ) -> List[File]:
+        files = self.files
+
+        groups = [self.groups[g.upper()] for g in to_list(group)]
+
+        files = list(filter(lambda f: self.format(f)[0] in groups, files))
+
+        if uf:
+            ufs = parse_UFs(uf)
+            files = list(filter(lambda f: self.format(f)[1] in ufs, files))
+
+        if year or str(year) in ["0", "00"]:
+            years = [zfill_year(y) for y in to_list(year)]
+            files = list(filter(lambda f: self.format(f)[2] in years, files))
+
+        return files
+
+
+class SINAN(Database):
+    name = "SINAN"
+    paths = (
+        Directory("/dissemin/publicos/SINAN/DADOS/FINAIS"),
+        Directory("/dissemin/publicos/SINAN/DADOS/PRELIM"),
+    )
+    metadata = {
+        "long_name": "Doenças e Agravos de Notificação",
+        "source": "https://portalsinan.saude.gov.br/",
+        "description": (
+            "The Notifiable Diseases Information System - Sinan is primarily"
+            "fed by the notification and investigation of cases of diseases "
+            "and conditions listed in the national list of compulsorily "
+            "notifiable diseases (Consolidation Ordinance No. 4, September 28,"
+            " 2017, Annex). However, states and municipalities are allowed to "
+            "include other important health problems in their region, such as "
+            "difilobotriasis in the municipality of São Paulo. Its effective "
+            "use enables the dynamic diagnosis of the occurrence of an event "
+            "in the population, providing evidence for causal explanations of "
+            "compulsorily notifiable diseases and indicating risks to which "
+            "people are exposed. This contributes to identifying the "
+            "epidemiological reality of a specific geographical area. Its "
+            "systematic, decentralized use contributes to the democratization "
+            "of information, allowing all healthcare professionals to access "
+            "and make it available to the community. Therefore, it is a "
+            "relevant tool to assist in health planning, define intervention "
+            "priorities, and evaluate the impact of interventions."
+        ),
+    }
+
+    diseases = {
+        "ACBI": "Acidente de trabalho com material biológico",
+        "ACGR": "Acidente de trabalho",
+        "ANIM": "Acidente por Animais Peçonhentos",
+        "ANTR": "Atendimento Antirrabico",
+        "BOTU": "Botulismo",
+        "CANC": "Cancêr relacionado ao trabalho",
+        "CHAG": "Doença de Chagas Aguda",
+        "CHIK": "Febre de Chikungunya",
+        "COLE": "Cólera",
+        "COQU": "Coqueluche",
+        "DENG": "Dengue",
+        "DERM": "Dermatoses ocupacionais",
+        "DIFT": "Difteria",
+        "ESQU": "Esquistossomose",
+        "EXAN": "Doença exantemáticas",
+        "FMAC": "Febre Maculosa",
+        "FTIF": "Febre Tifóide",
+        "HANS": "Hanseníase",
+        "HANT": "Hantavirose",
+        "HEPA": "Hepatites Virais",
+        "IEXO": "Intoxicação Exógena",
+        "INFL": "Influenza Pandêmica",
+        "LEIV": "Leishmaniose Visceral",
+        "LEPT": "Leptospirose",
+        "LERD": "LER/Dort",
+        "LTAN": "Leishmaniose Tegumentar Americana",
+        "MALA": "Malária",
+        "MENI": "Meningite",
+        "MENT": "Transtornos mentais relacionados ao trabalho",
+        "NTRA": "Notificação de Tracoma",
+        "PAIR": "Perda auditiva por ruído relacionado ao trabalho",
+        "PEST": "Peste",
+        "PFAN": "Paralisia Flácida Aguda",
+        "PNEU": "Pneumoconioses realacionadas ao trabalho",
+        "RAIV": "Raiva",
+        "SDTA": "Surto Doenças Transmitidas por Alimentos",
+        "SIFA": "Sífilis Adquirida",
+        "SIFC": "Sífilis Congênita",
+        "SIFG": "Sífilis em Gestante",
+        "SRC": "Síndrome da Rubéola Congênia",
+        "TETA": "Tétano Acidental",
+        "TETN": "Tétano Neonatal",
+        "TOXC": "Toxoplasmose Congênita",
+        "TOXG": "Toxoplasmose Gestacional",
+        "TRAC": "Inquérito de Tracoma",
+        "TUBE": "Tuberculose",
+        "VARC": "Varicela",
+        "VIOL": "Violência doméstica, sexual e/ou outras violências",
+        "ZIKA": "Zika Vírus",
+    }
+
+    def describe(self, file: File) -> dict:
+        if file.extension.upper() == ".DBC":
+            dis_code, year = self.format(file)
+
+            description = {
+                "name": str(file.basename),
+                "disease": self.diseases[dis_code],
+                "year": zfill_year(year),
+                "size": file.info["size"],
+                "last_update": file.info["modify"],
+            }
+            return description
+        return {}
+
+    def format(self, file: File) -> tuple:
+        year = file.name[-2:]
+
+        if file.name.startswith("SRC"):
+            dis_code = file.name[:3]
+        elif file.name == "LEIBR22":
+            dis_code = "LEIV"  # MISPELLED FILE NAME
+        elif file.name == "LERBR19":
+            dis_code = "LERD"  # ANOTHER ONE
+        else:
+            dis_code = file.name[:4]
+
+        return dis_code, zfill_year(year)
+
+    def get_files(
+        self,
+        dis_code: Optional[Union[str, list]] = None,
+        year: Optional[Union[str, int, list]] = None,
+    ) -> List[File]:
+        files = list(
+            filter(lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files)
+        )
+
+        if dis_code:
+            codes = [c.upper() for c in to_list(dis_code)]
+
+            if codes and not all(code in self.diseases for code in codes):
+                raise ValueError(
+                    f"Unknown disease(s): {set(codes).difference(set(self.diseases))}"
+                )
+
+            files = list(filter(lambda f: self.format(f)[0] in codes, files))
+
+        if year or str(year) in ["0", "00"]:
+            years = [zfill_year(str(y)[-2:]) for y in to_list(year)]
+            files = list(filter(lambda f: self.format(f)[1] in years, files))
+
+        return files
+
+
+class SINASC(Database):
+    name = "SINASC"
+    paths = (
+        Directory("/dissemin/publicos/SINASC/NOV/DNRES"),
+        Directory("/dissemin/publicos/SINASC/ANT/DNRES"),
+    )
+    metadata = {
+        "long_name": "Sistema de Informações sobre Nascidos Vivos",
+        "source": "http://sinasc.saude.gov.br/",
+        "description": "",
+    }
+    groups = {
+        "DN": "Declarações de Nascidos Vivos",
+        "DNR": "Dados dos Nascidos Vivos por UF de residência",
+    }
+
+    def describe(self, file: File) -> dict:
+        if file.extension.upper() == ".DBC":
+            group, _uf, year = self.format(file)
+
+            try:
+                uf = UFs[_uf]
+            except KeyError:
+                uf = _uf
+
+            description = {
+                "name": file.basename,
+                "group": self.groups[group],
+                "uf": uf,
+                "year": year,
+                "size": file.info["size"],
+                "last_update": file.info["modify"],
+            }
+
+            return description
+        return {}
+
+    def format(self, file: File) -> tuple:
+        if file.name == "DNEX2021":
+            pass
+
+        year = zfill_year(file.name[-2:])
+        charname = "".join([c for c in file.name if not c.isnumeric()])
+        group, _uf = charname[:-2], charname[-2:]
+        return group, _uf, zfill_year(year)
+
+    def get_files(
+        self,
+        group: Union[List[str], str],
+        uf: Optional[Union[List[str], str]] = None,
+        year: Optional[Union[List, str, int]] = None,
+    ) -> List[File]:
+        files = self.files
+
+        groups = to_list(group)
+
+        files = list(filter(lambda f: self.format(f)[0] in groups, files))
+
+        if uf:
+            if "EX" in to_list(uf):
+                # DNEX2021
+                if len(to_list(uf)) == 1:
+                    return []
+
+                to_list(uf).remove("EX")
+
+            ufs = parse_UFs(uf)
+            files = list(filter(lambda f: self.format(f)[1] in ufs, files))
+
+        if year or str(year) in ["0", "00"]:
+            years = [zfill_year(str(y)[-2:]) for y in to_list(year)]
+            files = list(filter(lambda f: self.format(f)[2] in years, files))
+
+        return files
diff --git a/pysus/api/ftp/databases/__init__.py b/pysus/api/ftp/databases/__init__.py
deleted file mode 100644
index 8ad52e98..00000000
--- a/pysus/api/ftp/databases/__init__.py
+++ /dev/null
@@ -1,34 +0,0 @@
-from .ciha import CIHA
-from .cnes import CNES
-from .ibge_datasus import IBGEDATASUS
-from .pni import PNI
-from .sia import SIA
-from .sih import SIH
-from .sim import SIM
-from .sinan import SINAN
-from .sinasc import SINASC
-
-AVAILABLE_DATABASES = [
-    CIHA,
-    CNES,
-    IBGEDATASUS,
-    PNI,
-    SIA,
-    SIH,
-    SIM,
-    SINAN,
-    SINASC,
-]
-
-__all__ = [
-    "CIHA",
-    "CNES",
-    "IBGEDATASUS",
-    "PNI",
-    "SIA",
-    "SIH",
-    "SIM",
-    "SINAN",
-    "SINASC",
-    "AVAILABLE_DATABASES",
-]
diff --git a/pysus/api/ftp/databases/ciha.py b/pysus/api/ftp/databases/ciha.py
deleted file mode 100644
index b84d18ab..00000000
--- a/pysus/api/ftp/databases/ciha.py
+++ /dev/null
@@ -1,103 +0,0 @@
-__all__ = ["CIHA"]
-
-from typing import List, Optional, Union
-
-from pysus.api.ftp import Database, Directory, File
-from pysus.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year
-
-
-class CIHA(Database):
-    name = "CIHA"
-    paths = (Directory("/dissemin/publicos/CIHA/201101_/Dados"),)
-    metadata = {
-        "long_name": "Comunicação de Internação Hospitalar e Ambulatorial",
-        "source": "http://ciha.datasus.gov.br/CIHA/index.php",
-        "description": (
-            "A CIHA foi criada para ampliar o processo de planejamento, "
-            "programação, controle, avaliação e regulação da assistência à "
-            "saúde permitindo um conhecimento mais abrangente e profundo dos "
-            "perfis nosológico e epidemiológico da população brasileira, da "
-            "capacidade instalada e do potencial de produção de serviços do "
-            "conjunto de estabelecimentos de saúde do País. O sistema permite "
-            "o acompanhamento das ações e serviços de saúde custeados "
-            "por: planos privados de assistência à saúde; planos públicos; "
-            "pagamento particular por pessoa física; pagamento particular por "
-            "pessoa jurídica; programas e projetos federais (PRONON, PRONAS, "
-            "PROADI); recursos próprios das secretarias municipais e estaduais"
-            " de saúde; DPVAT; gratuidade e, a partir da publicação da "
-            "Portaria GM/MS nº 2.905/2022, consórcios públicos. As "
-            "informações registradas na CIHA servem como base para o processo "
-            "de Certificação de Entidades Beneficentes de Assistência Social "
-            "em Saúde (CEBAS) e para monitoramento dos programas PRONAS e "
-            "PRONON"
-        ),
-    }
-    groups = {
-        "CIHA": "Comunicação de Internação Hospitalar e Ambulatorial",
-    }
-
-    def describe(self, file: File):
-        if not isinstance(file, File):
-            return file
-
-        if file.extension.upper() in [".DBC", ".DBF"]:
-            group, _uf, year, month = self.format(file)
-
-            try:
-                uf = UFs[_uf]
-            except KeyError:
-                uf = _uf
-
-            description = {
-                "name": str(file.basename),
-                "group": self.groups[group],
-                "uf": uf,
-                "month": MONTHS[int(month)],
-                "year": zfill_year(year),
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
-
-            return description
-        return file
-
-    def format(self, file: File) -> tuple:
-        group, _uf = file.name[:4].upper(), file.name[4:6].upper()
-        year, month = file.name[-4:-2], file.name[-2:]
-        return group, _uf, zfill_year(year), month
-
-    def get_files(
-        self,
-        uf: Optional[Union[List[str], str]] = None,
-        year: Optional[Union[list, str, int]] = None,
-        month: Optional[Union[list, str, int]] = None,
-        group: Union[List[str], str] = "CIHA",
-    ) -> List[File]:
-        files = list(
-            filter(lambda f: f.extension.upper()
-                   in [".DBC", ".DBF"], self.files)
-        )
-
-        groups = [gr.upper() for gr in to_list(group)]
-
-        if not all(gr in list(self.groups) for gr in groups):
-            raise ValueError(
-                f"Unknown CIHA Group(s): {set(
-                    groups).difference(list(self.groups))}"
-            )
-
-        files = list(filter(lambda f: self.format(f)[0] in groups, files))
-
-        if uf:
-            ufs = parse_UFs(uf)
-            files = list(filter(lambda f: self.format(f)[1] in ufs, files))
-
-        if year or str(year) in ["0", "00"]:
-            years = [zfill_year(str(m)[-2:]) for m in to_list(year)]
-            files = list(filter(lambda f: self.format(f)[2] in years, files))
-
-        if month:
-            months = [str(y)[-2:].zfill(2) for y in to_list(month)]
-            files = list(filter(lambda f: self.format(f)[3] in months, files))
-
-        return files
diff --git a/pysus/api/ftp/databases/cnes.py b/pysus/api/ftp/databases/cnes.py
deleted file mode 100644
index 61235fba..00000000
--- a/pysus/api/ftp/databases/cnes.py
+++ /dev/null
@@ -1,135 +0,0 @@
-__all__ = ["CNES"]
-
-from typing import List, Optional, Union
-
-from pysus.api.ftp import Database, Directory, File
-from pysus.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year
-
-
-class CNES(Database):
-    name = "CNES"
-    paths = (Directory("/dissemin/publicos/CNES/200508_/Dados"),)
-    metadata = {
-        "long_name": "Cadastro Nacional de Estabelecimentos de Saúde",
-        "source": "https://cnes.datasus.gov.br/",
-        "description": (
-            "O Cadastro Nacional de Estabelecimentos de Saúde (CNES) é o "
-            "sistema de informação oficial de cadastramento de informações "
-            "de todos os estabelecimentos de saúde no país, independentemente "
-            "de sua natureza jurídica ou de integrarem o Sistema Único de "
-            "Saúde (SUS). Trata-se do cadastro oficial do Ministério da "
-            "Saúde (MS) no tocante à realidade da capacidade instalada e "
-            "mão-de-obra assistencial de saúde no Brasil em estabelecimentos "
-            "de saúde públicos ou privados, com convênio SUS ou não."
-        ),
-    }
-    groups = {
-        "DC": "Dados Complementares",
-        "EE": "Estabelecimento de Ensino",
-        "EF": "Estabelecimento Filantrópico",
-        "EP": "Equipes",
-        "EQ": "Equipamentos",
-        "GM": "Gestão e Metas",
-        "HB": "Habilitação",
-        "IN": "Incentivos",
-        "LT": "Leitos",
-        "PF": "Profissional",
-        "RC": "Regra Contratual",
-        "SR": "Serviço Especializado",
-        "ST": "Estabelecimentos",
-    }
-    __loaded__ = set()
-
-    def load(
-        self,
-        groups: Union[str, List[str]] = None,
-    ):
-        """
-        Loads CNES Groups into content. Will convert the files and directories
-        found within FTP Directories into self.content
-        """
-        if not self.__content__:
-            self.paths[0].load()
-            self.__content__ |= self.paths[0].__content__
-
-        if groups:
-            groups = to_list(groups)
-
-            if not all(group in self.groups for group in [gr.upper() for gr in groups]):
-                raise ValueError(
-                    f"Unknown CNES group(s): {set(
-                        groups).difference(self.groups)}"
-                )
-
-            for group in groups:
-                group = group.upper()
-                if group not in self.__loaded__:
-                    directory = self.__content__[group]
-                    directory.load()
-                    self.__content__ |= directory.__content__
-                    self.__loaded__.add(directory.name)
-        return self
-
-    def describe(self, file: File) -> dict:
-        if not isinstance(file, File):
-            return {}
-
-        if file.name == "GMufAAmm":
-            # Leftover
-            return {}
-
-        if file.extension.upper() in [".DBC", ".DBF"]:
-            group, _uf, year, month = self.format(file)
-
-            try:
-                uf = UFs[_uf]
-            except KeyError:
-                uf = _uf
-
-            description = {
-                "name": str(file.basename),
-                "group": self.groups[group],
-                "uf": uf,
-                "month": MONTHS[int(month)],
-                "year": zfill_year(year),
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
-
-            return description
-        return {}
-
-    def format(self, file: File) -> tuple:
-        group, _uf = file.name[:2].upper(), file.name[2:4].upper()
-        year, month = file.name[-4:-2], file.name[-2:]
-        return group, _uf, zfill_year(year), month
-
-    def get_files(
-        self,
-        group: Union[List[str], str],
-        uf: Optional[Union[List[str], str]] = None,
-        year: Optional[Union[list, str, int]] = None,
-        month: Optional[Union[list, str, int]] = None,
-    ) -> List[File]:
-        if not group:
-            raise ValueError("At least one CNES group is required")
-
-        groups = [gr.upper() for gr in to_list(group)]
-
-        self.load(groups)
-
-        files = list(filter(lambda f: f.name[:2] in groups, self.files))
-
-        if uf:
-            ufs = parse_UFs(uf)
-            files = list(filter(lambda f: f.name[2:4] in ufs, files))
-
-        if year or str(year) in ["0", "00"]:
-            years = [str(m)[-2:].zfill(2) for m in to_list(year)]
-            files = list(filter(lambda f: f.name[-4:-2] in years, files))
-
-        if month:
-            months = [str(y)[-2:].zfill(2) for y in to_list(month)]
-            files = list(filter(lambda f: f.name[-2:] in months, files))
-
-        return files
diff --git a/pysus/api/ftp/databases/ibge_datasus.py b/pysus/api/ftp/databases/ibge_datasus.py
deleted file mode 100644
index 39fa6c02..00000000
--- a/pysus/api/ftp/databases/ibge_datasus.py
+++ /dev/null
@@ -1,86 +0,0 @@
-__all__ = ["IBGEDATASUS"]
-
-from typing import List, Literal, Optional, Union
-
-from pysus.api.ftp import Database, Directory, File
-from pysus.utils import zfill_year
-
-
-class IBGEDATASUS(Database):
-    name = "IBGE-DataSUS"
-    paths = (
-        Directory("/dissemin/publicos/IBGE/POP"),
-        Directory("/dissemin/publicos/IBGE/censo"),
-        Directory("/dissemin/publicos/IBGE/POPTCU"),
-        Directory("/dissemin/publicos/IBGE/projpop"),
-        # Directory("/dissemin/publicos/IBGE/Auxiliar") # this has a different file name pattern  # noqa
-    )
-    metadata = {
-        "long_name": "Populaçao Residente, Censos, Contagens "
-        "Populacionais e Projeçoes Intercensitarias",
-        "source": "ftp://ftp.datasus.gov.br/dissemin/publicos/IBGE",
-        "description": (
-            "São aqui apresentados informações sobre a população residente, "
-            "estratificadas por município, faixas etárias e sexo, obtidas a "
-            "partir dos Censos Demográficos, Contagens Populacionais "
-            "e Projeções Intercensitárias."
-        ),
-    }
-
-    def describe(self, file: File) -> dict:
-        if file.extension.upper() in [".ZIP"]:
-            year = file.name.split(".")[0][-2:]
-            description = {
-                "name": str(file.basename),
-                "year": zfill_year(year),
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
-            return description
-        elif file.extension.upper() == ".DBF":
-            year = file.name[-2:]
-            description = {
-                "name": str(file.basename),
-                "year": zfill_year(year),
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
-            return description
-        return {}
-
-    def format(self, file: File) -> tuple:
-        return (file.name[-2:],)
-
-    def get_files(
-        self,
-        source: Literal["POP", "censo", "POPTCU", "projpop"] = "POPTCU",
-        year: Optional[Union[str, int, list]] = None,
-        *args,
-        **kwargs,
-    ) -> List[File]:
-        sources = ["POP", "censo", "POPTCU", "projpop"]
-        source_dir = None
-
-        for dir in self.paths:
-            if source in sources and source in dir.path:
-                source_dir = dir
-
-        if not source_dir:
-            raise ValueError(f"Unkown source {source}. Options: {sources}")
-
-        files = source_dir.content
-
-        if year:
-            if isinstance(year, (str, int)):
-                files = [
-                    f for f in files if self.describe(f)["year"] == zfill_year(year)
-                ]
-            elif isinstance(year, list):
-                files = [
-                    f
-                    for f in files
-                    if str(self.describe(f)["year"])
-                    in [str(zfill_year(y)) for y in year]
-                ]
-
-        return files
diff --git a/pysus/api/ftp/databases/pni.py b/pysus/api/ftp/databases/pni.py
deleted file mode 100644
index ef154287..00000000
--- a/pysus/api/ftp/databases/pni.py
+++ /dev/null
@@ -1,95 +0,0 @@
-__all__ = ["PNI"]
-
-from typing import List, Literal, Optional, Union
-
-from pysus.api.ftp import Database, Directory, File
-from pysus.utils import UFs, parse_UFs, to_list, zfill_year
-
-
-class PNI(Database):
-    name = "PNI"
-    paths = (Directory("/dissemin/publicos/PNI/DADOS"),)
-    metadata = {
-        "long_name": ("Sistema de Informações do Programa Nacional de Imunizações"),
-        "source": (
-            "https://datasus.saude.gov.br/acesso-a-informacao/morbidade-hospitalar-do-sus-sih-sus/",  # noqa
-            "https://datasus.saude.gov.br/acesso-a-informacao/producao-hospitalar-sih-sus/",  # noqa
-        ),
-        "description": (
-            "O SI-PNI é um sistema desenvolvido para possibilitar aos "
-            "gestores envolvidos no Programa Nacional de Imunização, a "
-            "avaliação dinâmica do risco quanto à ocorrência de surtos ou "
-            "epidemias, a partir do registro dos imunobiológicos aplicados e "
-            "do quantitativo populacional vacinado, agregados por faixa "
-            "etária, período de tempo e área geográfica. Possibilita também "
-            "o controle do estoque de imunobiológicos necessário aos "
-            "administradores que têm a incumbência de programar sua aquisição "
-            "e distribuição. Controla as indicações de aplicação de "
-            "vacinas de imunobiológicos especiais e seus eventos adversos, "
-            "dentro dos Centros de Referências em imunobiológicos especiais."
-        ),
-    }
-    groups = {
-        "CPNI": "Cobertura Vacinal",  # TODO: may be incorrect
-        "DPNI": "Doses Aplicadas",  # TODO: may be incorrect
-    }
-
-    def describe(self, file: File) -> dict:
-        if file.extension.upper() in [".DBC", ".DBF"]:
-            group, _uf, year = self.format(file)
-
-            try:
-                uf = UFs[_uf]
-            except KeyError:
-                uf = _uf
-
-            description = {
-                "name": file.basename,
-                "group": self.groups[group],
-                "uf": uf,
-                "year": zfill_year(year),
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
-
-            return description
-        return {}
-
-    def format(self, file: File) -> tuple:
-        if len(file.name) != 8:
-            raise ValueError(f"Can't format {file.name}")
-
-        n = file.name
-        group, _uf, year = n[:4], n[4:6], n[-2:]
-        return group, _uf, zfill_year(year)
-
-    def get_files(
-        self,
-        group: Union[list, Literal["CNPI", "DPNI"]],
-        uf: Optional[Union[List[str], str]] = None,
-        year: Optional[Union[list, str, int]] = None,
-    ) -> List[File]:
-        files = list(
-            filter(lambda f: f.extension.upper()
-                   in [".DBC", ".DBF"], self.files)
-        )
-
-        groups = [gr.upper() for gr in to_list(group)]
-
-        if not all(gr in list(self.groups) for gr in groups):
-            raise ValueError(
-                f"Unknown PNI Group(s): {set(
-                    groups).difference(list(self.groups))}"
-            )
-
-        files = list(filter(lambda f: self.format(f)[0] in groups, files))
-
-        if uf:
-            ufs = parse_UFs(uf)
-            files = list(filter(lambda f: self.format(f)[1] in ufs, files))
-
-        if year or str(year) in ["0", "00"]:
-            years = [zfill_year(str(m)[-2:]) for m in to_list(year)]
-            files = list(filter(lambda f: self.format(f)[2] in years, files))
-
-        return files
diff --git a/pysus/api/ftp/databases/sia.py b/pysus/api/ftp/databases/sia.py
deleted file mode 100644
index 3f28d809..00000000
--- a/pysus/api/ftp/databases/sia.py
+++ /dev/null
@@ -1,122 +0,0 @@
-__all__ = ["SIA"]
-
-from typing import List, Optional, Union
-
-from pysus.api.ftp import Database, Directory, File
-from pysus.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year
-
-
-class SIA(Database):
-    name = "SIA"
-    paths = (
-        Directory("/dissemin/publicos/SIASUS/199407_200712/Dados"),
-        Directory("/dissemin/publicos/SIASUS/200801_/Dados"),
-    )
-    metadata = {
-        "long_name": "Sistema de Informações Ambulatoriais",
-        "source": "http://sia.datasus.gov.br/principal/index.php",
-        "description": (
-            "O Sistema de Informação Ambulatorial (SIA) foi instituído pela "
-            "Portaria GM/MS n.º 896 de 29 de junho de 1990. Originalmente, o "
-            "SIA foi concebido a partir do projeto SICAPS (Sistema de "
-            "Informação e Controle Ambulatorial da Previdência Social), em "
-            "que os conceitos, os objetivos e as diretrizes criados para o "
-            "desenvolvimento do SICAPS foram extremamente importantes e "
-            "amplamente utilizados para o desenvolvimento do SIA, tais"
-            " como: (i) o acompanhamento das programações físicas e "
-            "orçamentárias; (ii) o acompanhamento das ações de saúde "
-            "produzidas; (iii) a agilização do pagamento e controle "
-            "orçamentário e financeiro; e (iv) a formação de banco de dados "
-            "para contribuir com a construção do SUS."
-        ),
-    }
-    groups = {
-        "AB": "APAC de Cirurgia Bariátrica",
-        "ABO": "APAC de Acompanhamento Pós Cirurgia Bariátrica",
-        "ACF": "APAC de Confecção de Fístula",
-        "AD": "APAC de Laudos Diversos",
-        "AM": "APAC de Medicamentos",
-        "AMP": "APAC de Acompanhamento Multiprofissional",
-        "AN": "APAC de Nefrologia",
-        "AQ": "APAC de Quimioterapia",
-        "AR": "APAC de Radioterapia",
-        "ATD": "APAC de Tratamento Dialítico",
-        "BI": "Boletim de Produção Ambulatorial individualizado",
-        "IMPBO": "",  # TODO
-        "PA": "Produção Ambulatorial",
-        "PAM": "",  # TODO
-        "PAR": "",  # TODO
-        "PAS": "",  # TODO
-        "PS": "RAAS Psicossocial",
-        "SAD": "RAAS de Atenção Domiciliar",
-    }
-
-    def describe(self, file: File) -> dict:
-        if file.extension.upper() == ".DBC":
-            group, _uf, year, month = self.format(file)
-
-            try:
-                uf = UFs[_uf]
-            except KeyError:
-                uf = _uf
-
-            description = {
-                "name": str(file.basename),
-                "group": self.groups[group],
-                "uf": uf,
-                "month": MONTHS[int(month)],
-                "year": zfill_year(year),
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
-
-            return description
-        return {}
-
-    def format(self, file: File) -> tuple:
-        if file.extension.upper() in [".DBC", ".DBF"]:
-            digits = "".join([d for d in file.name if d.isdigit()])
-            if "_" in file.name:
-                name, _ = file.name.split("_")
-                digits = "".join([d for d in name if d.isdigit()])
-            chars, _ = file.name.split(digits)
-            year, month = digits[:2], digits[2:]
-            group, uf = chars[:-2].upper(), chars[-2:].upper()
-            return group, uf, zfill_year(year), month
-        return ()
-
-    def get_files(
-        self,
-        group: Union[List[str], str],
-        uf: Optional[Union[List[str], str]] = None,
-        year: Optional[Union[list, str, int]] = None,
-        month: Optional[Union[list, str, int]] = None,
-    ) -> List[File]:
-        files = list(
-            filter(lambda f: f.extension.upper()
-                   in [".DBC", ".DBF"], self.files)
-        )
-
-        groups = [gr.upper() for gr in to_list(group)]
-
-        if not all(gr in list(self.groups) for gr in groups):
-            raise ValueError(
-                f"Unknown SIA Group(s): {set(
-                    groups).difference(list(self.groups))}"
-            )
-
-        files = list(filter(lambda f: self.format(f)[0] in groups, files))
-
-        if uf:
-            ufs = parse_UFs(uf)
-            files = list(filter(lambda f: self.format(f)[1] in ufs, files))
-
-        if year or str(year) in ["0", "00"]:
-            years = [zfill_year(str(m)[-2:]) for m in to_list(year)]
-            files = list(filter(lambda f: self.format(f)[2] in years, files))
-
-        if month:
-            months = [str(y)[-2:].zfill(2) for y in to_list(month)]
-            files = list(filter(lambda f: self.format(f)[3] in months, files))
-
-        return files
diff --git a/pysus/api/ftp/databases/sih.py b/pysus/api/ftp/databases/sih.py
deleted file mode 100644
index 0c28400d..00000000
--- a/pysus/api/ftp/databases/sih.py
+++ /dev/null
@@ -1,105 +0,0 @@
-__all__ = ["SIH"]
-
-from typing import List, Optional, Union
-
-from pysus.api.ftp import Database, Directory, File
-from pysus.utils import MONTHS, UFs, parse_UFs, to_list, zfill_year
-
-
-class SIH(Database):
-    name = "SIH"
-    paths = (
-        Directory("/dissemin/publicos/SIHSUS/199201_200712/Dados"),
-        Directory("/dissemin/publicos/SIHSUS/200801_/Dados"),
-    )
-    metadata = {
-        "long_name": "Sistema de Informações Hospitalares",
-        "source": (
-            "https://datasus.saude.gov.br/acesso-a-informacao/morbidade-hospitalar-do-sus-sih-sus/",  # noqa
-            "https://datasus.saude.gov.br/acesso-a-informacao/producao-hospitalar-sih-sus/",  # noqa
-        ),
-        "description": (
-            "A finalidade do AIH (Sistema SIHSUS) é a de transcrever todos os "
-            "atendimentos que provenientes de internações hospitalares que "
-            "foram financiadas pelo SUS, e após o processamento, gerarem "
-            "relatórios para os gestores que lhes possibilitem fazer os "
-            "pagamentos dos estabelecimentos de saúde. Além disso, o nível "
-            "Federal recebe mensalmente uma base de dados de todas as "
-            "internações autorizadas (aprovadas ou não para pagamento) para "
-            "que possam ser repassados às Secretarias de Saúde os valores de "
-            "Produção de Média e Alta complexidade além dos valores de CNRAC, "
-            "FAEC e de Hospitais Universitários – em suas variadas formas de "
-            "contrato de gestão."
-        ),
-    }
-    groups = {
-        "RD": "AIH Reduzida",
-        "RJ": "AIH Rejeitada",
-        "ER": "AIH Rejeitada com erro",
-        "SP": "Serviços Profissionais",
-        "CH": "Cadastro Hospitalar",
-        "CM": "",  # TODO
-    }
-
-    def describe(self, file: File) -> dict:
-        if file.extension.upper() in [".DBC", ".DBF"]:
-            group, _uf, year, month = self.format(file)
-
-            try:
-                uf = UFs[_uf]
-            except KeyError:
-                uf = _uf
-
-            description = {
-                "name": file.basename,
-                "group": self.groups[group],
-                "uf": uf,
-                "month": MONTHS[int(month)],
-                "year": zfill_year(year),
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
-
-            return description
-        return {}
-
-    def format(self, file: File) -> tuple:
-        group, _uf = file.name[:2].upper(), file.name[2:4].upper()
-        year, month = file.name[-4:-2], file.name[-2:]
-        return group, _uf, zfill_year(year), month
-
-    def get_files(
-        self,
-        group: Union[List[str], str],
-        uf: Optional[Union[List[str], str]] = None,
-        year: Optional[Union[list, str, int]] = None,
-        month: Optional[Union[list, str, int]] = None,
-    ) -> List[File]:
-        files = list(
-            filter(lambda f: f.extension.upper()
-                   in [".DBC", ".DBF"], self.files)
-        )
-
-        groups = [gr.upper() for gr in to_list(group)]
-
-        if not all(gr in list(self.groups) for gr in groups):
-            raise ValueError(
-                f"Unknown SIH Group(s): {set(
-                    groups).difference(list(self.groups))}"
-            )
-
-        files = list(filter(lambda f: self.format(f)[0] in groups, files))
-
-        if uf:
-            ufs = parse_UFs(uf)
-            files = list(filter(lambda f: self.format(f)[1] in ufs, files))
-
-        if year or str(year) in ["0", "00"]:
-            years = [zfill_year(str(m)[-2:]) for m in to_list(year)]
-            files = list(filter(lambda f: self.format(f)[2] in years, files))
-
-        if month:
-            months = [str(y)[-2:].zfill(2) for y in to_list(month)]
-            files = list(filter(lambda f: self.format(f)[3] in months, files))
-
-        return files
diff --git a/pysus/api/ftp/databases/sim.py b/pysus/api/ftp/databases/sim.py
deleted file mode 100644
index 0a85aa1f..00000000
--- a/pysus/api/ftp/databases/sim.py
+++ /dev/null
@@ -1,69 +0,0 @@
-__all__ = ["SIM"]
-
-from typing import List, Optional, Union
-
-from pysus.api.ftp import Database, Directory, File
-from pysus.utils import UFs, parse_UFs, to_list, zfill_year
-
-
-class SIM(Database):
-    name = "SIM"
-    paths = (
-        Directory("/dissemin/publicos/SIM/CID10/DORES"),
-        Directory("/dissemin/publicos/SIM/CID9/DORES"),
-    )
-    metadata = {
-        "long_name": "Sistema de Informação sobre Mortalidade",
-        "source": "http://sim.saude.gov.br",
-        "description": "",
-    }
-    groups = {"CID10": "DO", "CID9": "DOR"}
-
-    def describe(self, file: File) -> dict:
-        group, _uf, year = self.format(file)
-        _groups = {v: k for k, v in self.groups.items()}
-
-        try:
-            uf = UFs[_uf]
-        except KeyError:
-            uf = _uf
-
-        description = {
-            "name": str(file.basename),
-            "uf": uf,
-            "year": year,
-            "group": _groups[group],
-            "size": file.info["size"],
-            "last_update": file.info["modify"],
-        }
-
-        return description
-
-    def format(self, file: File) -> tuple:
-        if "CID9" in str(file.path):
-            group, _uf, year = file.name[:-4], file.name[-4:-2], file.name[-2:]
-        else:
-            group, _uf, year = file.name[:-6], file.name[-6:-4], file.name[-4:]
-        return group, _uf, zfill_year(year)
-
-    def get_files(
-        self,
-        group: Union[list[str], str],
-        uf: Optional[Union[list[str], str]] = None,
-        year: Optional[Union[list, str, int]] = None,
-    ) -> List[File]:
-        files = self.files
-
-        groups = [self.groups[g.upper()] for g in to_list(group)]
-
-        files = list(filter(lambda f: self.format(f)[0] in groups, files))
-
-        if uf:
-            ufs = parse_UFs(uf)
-            files = list(filter(lambda f: self.format(f)[1] in ufs, files))
-
-        if year or str(year) in ["0", "00"]:
-            years = [zfill_year(y) for y in to_list(year)]
-            files = list(filter(lambda f: self.format(f)[2] in years, files))
-
-        return files
diff --git a/pysus/api/ftp/databases/sinan.py b/pysus/api/ftp/databases/sinan.py
deleted file mode 100644
index f272d016..00000000
--- a/pysus/api/ftp/databases/sinan.py
+++ /dev/null
@@ -1,144 +0,0 @@
-__all__ = ["SINAN"]
-
-from typing import List, Optional, Union
-
-from pysus.api.ftp import Database, Directory, File
-from pysus.utils import to_list, zfill_year
-
-
-class SINAN(Database):
-    name = "SINAN"
-    paths = (
-        Directory("/dissemin/publicos/SINAN/DADOS/FINAIS"),
-        Directory("/dissemin/publicos/SINAN/DADOS/PRELIM"),
-    )
-    metadata = {
-        "long_name": "Doenças e Agravos de Notificação",
-        "source": "https://portalsinan.saude.gov.br/",
-        "description": (
-            "The Notifiable Diseases Information System - Sinan is primarily"
-            "fed by the notification and investigation of cases of diseases "
-            "and conditions listed in the national list of compulsorily "
-            "notifiable diseases (Consolidation Ordinance No. 4, September 28,"
-            " 2017, Annex). However, states and municipalities are allowed to "
-            "include other important health problems in their region, such as "
-            "difilobotriasis in the municipality of São Paulo. Its effective "
-            "use enables the dynamic diagnosis of the occurrence of an event "
-            "in the population, providing evidence for causal explanations of "
-            "compulsorily notifiable diseases and indicating risks to which "
-            "people are exposed. This contributes to identifying the "
-            "epidemiological reality of a specific geographical area. Its "
-            "systematic, decentralized use contributes to the democratization "
-            "of information, allowing all healthcare professionals to access "
-            "and make it available to the community. Therefore, it is a "
-            "relevant tool to assist in health planning, define intervention "
-            "priorities, and evaluate the impact of interventions."
-        ),
-    }
-
-    diseases = {
-        "ACBI": "Acidente de trabalho com material biológico",
-        "ACGR": "Acidente de trabalho",
-        "ANIM": "Acidente por Animais Peçonhentos",
-        "ANTR": "Atendimento Antirrabico",
-        "BOTU": "Botulismo",
-        "CANC": "Cancêr relacionado ao trabalho",
-        "CHAG": "Doença de Chagas Aguda",
-        "CHIK": "Febre de Chikungunya",
-        "COLE": "Cólera",
-        "COQU": "Coqueluche",
-        "DENG": "Dengue",
-        "DERM": "Dermatoses ocupacionais",
-        "DIFT": "Difteria",
-        "ESQU": "Esquistossomose",
-        "EXAN": "Doença exantemáticas",
-        "FMAC": "Febre Maculosa",
-        "FTIF": "Febre Tifóide",
-        "HANS": "Hanseníase",
-        "HANT": "Hantavirose",
-        "HEPA": "Hepatites Virais",
-        "IEXO": "Intoxicação Exógena",
-        "INFL": "Influenza Pandêmica",
-        "LEIV": "Leishmaniose Visceral",
-        "LEPT": "Leptospirose",
-        "LERD": "LER/Dort",
-        "LTAN": "Leishmaniose Tegumentar Americana",
-        "MALA": "Malária",
-        "MENI": "Meningite",
-        "MENT": "Transtornos mentais relacionados ao trabalho",
-        "NTRA": "Notificação de Tracoma",
-        "PAIR": "Perda auditiva por ruído relacionado ao trabalho",
-        "PEST": "Peste",
-        "PFAN": "Paralisia Flácida Aguda",
-        "PNEU": "Pneumoconioses realacionadas ao trabalho",
-        "RAIV": "Raiva",
-        "SDTA": "Surto Doenças Transmitidas por Alimentos",
-        "SIFA": "Sífilis Adquirida",
-        "SIFC": "Sífilis Congênita",
-        "SIFG": "Sífilis em Gestante",
-        "SRC": "Síndrome da Rubéola Congênia",
-        "TETA": "Tétano Acidental",
-        "TETN": "Tétano Neonatal",
-        "TOXC": "Toxoplasmose Congênita",
-        "TOXG": "Toxoplasmose Gestacional",
-        "TRAC": "Inquérito de Tracoma",
-        "TUBE": "Tuberculose",
-        "VARC": "Varicela",
-        "VIOL": "Violência doméstica, sexual e/ou outras violências",
-        "ZIKA": "Zika Vírus",
-    }
-
-    def describe(self, file: File) -> dict:
-        if file.extension.upper() == ".DBC":
-            dis_code, year = self.format(file)
-
-            description = {
-                "name": str(file.basename),
-                "disease": self.diseases[dis_code],
-                "year": zfill_year(year),
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
-            return description
-        return {}
-
-    def format(self, file: File) -> tuple:
-        year = file.name[-2:]
-
-        if file.name.startswith("SRC"):
-            dis_code = file.name[:3]
-        elif file.name == "LEIBR22":
-            dis_code = "LEIV"  # MISPELLED FILE NAME
-        elif file.name == "LERBR19":
-            dis_code = "LERD"  # ANOTHER ONE
-        else:
-            dis_code = file.name[:4]
-
-        return dis_code, zfill_year(year)
-
-    def get_files(
-        self,
-        dis_code: Optional[Union[str, list]] = None,
-        year: Optional[Union[str, int, list]] = None,
-    ) -> List[File]:
-        files = list(
-            filter(lambda f: f.extension.upper()
-                   in [".DBC", ".DBF"], self.files)
-        )
-
-        if dis_code:
-            codes = [c.upper() for c in to_list(dis_code)]
-
-            if codes and not all(code in self.diseases for code in codes):
-                raise ValueError(
-                    f"Unknown disease(s): {set(
-                        codes).difference(set(self.diseases))}"
-                )
-
-            files = list(filter(lambda f: self.format(f)[0] in codes, files))
-
-        if year or str(year) in ["0", "00"]:
-            years = [zfill_year(str(y)[-2:]) for y in to_list(year)]
-            files = list(filter(lambda f: self.format(f)[1] in years, files))
-
-        return files
diff --git a/pysus/api/ftp/databases/sinasc.py b/pysus/api/ftp/databases/sinasc.py
deleted file mode 100644
index f7e73c29..00000000
--- a/pysus/api/ftp/databases/sinasc.py
+++ /dev/null
@@ -1,82 +0,0 @@
-__all__ = ["SINASC"]
-
-from typing import List, Optional, Union
-
-from pysus.api.ftp import Database, Directory, File
-from pysus.utils import UFs, parse_UFs, to_list, zfill_year
-
-
-class SINASC(Database):
-    name = "SINASC"
-    paths = (
-        Directory("/dissemin/publicos/SINASC/NOV/DNRES"),
-        Directory("/dissemin/publicos/SINASC/ANT/DNRES"),
-    )
-    metadata = {
-        "long_name": "Sistema de Informações sobre Nascidos Vivos",
-        "source": "http://sinasc.saude.gov.br/",
-        "description": "",
-    }
-    groups = {
-        "DN": "Declarações de Nascidos Vivos",
-        "DNR": "Dados dos Nascidos Vivos por UF de residência",
-    }
-
-    def describe(self, file: File) -> dict:
-        if file.extension.upper() == ".DBC":
-            group, _uf, year = self.format(file)
-
-            try:
-                uf = UFs[_uf]
-            except KeyError:
-                uf = _uf
-
-            description = {
-                "name": file.basename,
-                "group": self.groups[group],
-                "uf": uf,
-                "year": year,
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
-
-            return description
-        return {}
-
-    def format(self, file: File) -> tuple:
-        if file.name == "DNEX2021":
-            pass
-
-        year = zfill_year(file.name[-2:])
-        charname = "".join([c for c in file.name if not c.isnumeric()])
-        group, _uf = charname[:-2], charname[-2:]
-        return group, _uf, zfill_year(year)
-
-    def get_files(
-        self,
-        group: Union[List[str], str],
-        uf: Optional[Union[List[str], str]] = None,
-        year: Optional[Union[List, str, int]] = None,
-    ) -> List[File]:
-        files = self.files
-
-        groups = to_list(group)
-
-        files = list(filter(lambda f: self.format(f)[0] in groups, files))
-
-        if uf:
-            if "EX" in to_list(uf):
-                # DNEX2021
-                if len(to_list(uf)) == 1:
-                    return []
-
-                to_list(uf).remove("EX")
-
-            ufs = parse_UFs(uf)
-            files = list(filter(lambda f: self.format(f)[1] in ufs, files))
-
-        if year or str(year) in ["0", "00"]:
-            years = [zfill_year(str(y)[-2:]) for y in to_list(year)]
-            files = list(filter(lambda f: self.format(f)[2] in years, files))
-
-        return files

From bea300d3b1d94b06fe85f387706f78a1f5320dd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lu=C3=A3=20Bida=20Vacaro?= <luabidaa@gmail.com>
Date: Mon, 16 Mar 2026 12:30:13 -0300
Subject: [PATCH 5/6] normalize FileDescription to prepare for ducklake
 implementation

---
 pyproject.toml               |  11 +-
 pysus/api/ducklake/models.py |   2 -
 pysus/api/ftp/client.py      |   2 +-
 pysus/api/ftp/databases.py   | 337 ++++++++++++++++-------------------
 pysus/api/ftp/models.py      |  29 +++
 pysus/management/__init__.py |   0
 6 files changed, 188 insertions(+), 193 deletions(-)
 create mode 100644 pysus/api/ftp/models.py
 create mode 100644 pysus/management/__init__.py

diff --git a/pyproject.toml b/pyproject.toml
index f9504e76..f3df5be4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,10 +2,17 @@
 name = "pysus"
 version = "1.0.1"  # changed by semantic-release
 description = "Tools for dealing with Brazil's Public health data"
-authors = ["Flavio Codeco Coelho <fccoelho@gmail.com>"]
+authors = ["Flavio Codeco Coelho <fccoelho@gmail.com>", "Luã Bida Vacaro <luabidaa@gmail.com>"]
 license = "GPL"
 
-packages = [{include='pysus'}]
+packages = [{ include = "pysus"}]
+
+exclude = [
+  "pysus/tests",
+  "pysus/tests/**",
+  "pysus/management",
+  "pysus/management/**"
+]
 
 [tool.poetry.dependencies]
 python = ">=3.10,<3.14"
diff --git a/pysus/api/ducklake/models.py b/pysus/api/ducklake/models.py
index 54d6850c..1cbfb4b6 100644
--- a/pysus/api/ducklake/models.py
+++ b/pysus/api/ducklake/models.py
@@ -116,7 +116,6 @@ class File(Catalog):
     __tablename__ = "files"
 
     id = Column(Integer, primary_key=True)
-
     group_id = Column(
         Integer,
         ForeignKey("pysus.dataset_groups.id"),
@@ -126,7 +125,6 @@ class File(Catalog):
     path = Column(String, nullable=False, unique=True)
     size = Column(Integer, nullable=False)
     rows = Column(Integer, nullable=False)
-
     modified = Column(Date, nullable=False)
 
     group = relationship(
diff --git a/pysus/api/ftp/client.py b/pysus/api/ftp/client.py
index 88c46e1b..14eec50e 100644
--- a/pysus/api/ftp/client.py
+++ b/pysus/api/ftp/client.py
@@ -133,7 +133,7 @@ def __init__(self, path: str, name: str, info: FileInfo) -> None:
     def info(self) -> Dict[str, str]:
         """Returns a dictionary with human-readable file information"""
         return {
-            "size": humanize.naturalsize(self.__info["size"]),
+            "size": self.__info["size"],
             "type": f"{self.extension[1:].upper()} file",
             "modify": self.__info["modify"].strftime("%Y-%m-%d %I:%M%p"),
         }
diff --git a/pysus/api/ftp/databases.py b/pysus/api/ftp/databases.py
index c2dcf47c..e77cb709 100644
--- a/pysus/api/ftp/databases.py
+++ b/pysus/api/ftp/databases.py
@@ -14,6 +14,7 @@
 
 from pysus.api.ftp import Database, Directory, File
 from pysus.utils import UFs, parse_UFs, to_list, zfill_year, MONTHS
+from .models import FileDescription
 
 
 class CIHA(Database):
@@ -46,30 +47,22 @@ class CIHA(Database):
         "CIHA": "Comunicação de Internação Hospitalar e Ambulatorial",
     }
 
-    def describe(self, file: File):
-        if not isinstance(file, File):
-            return file
-
-        if file.extension.upper() in [".DBC", ".DBF"]:
-            group, _uf, year, month = self.format(file)
-
-            try:
-                uf = UFs[_uf]
-            except KeyError:
-                uf = _uf
-
-            description = {
-                "name": str(file.basename),
-                "group": self.groups[group],
-                "uf": uf,
-                "month": MONTHS[int(month)],
-                "year": zfill_year(year),
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
-
-            return description
-        return file
+    def describe(self, file: File) -> Optional[FileDescription]:
+        if not isinstance(file, File) or file.extension.upper() not in [".DBC", ".DBF"]:
+            return None
+
+        group, _uf, year, month = self.format(file)
+        uf = UFs.get(_uf, _uf)
+
+        return FileDescription(
+            name=str(file.basename),
+            group=self.groups[group],
+            uf=uf,
+            month=MONTHS[int(month)],
+            year=zfill_year(year),
+            size=file.info["size"],
+            last_update=file.info["modify"],
+        )
 
     def format(self, file: File) -> tuple:
         group, _uf = file.name[:4].upper(), file.name[4:6].upper()
@@ -84,14 +77,16 @@ def get_files(
         group: Union[List[str], str] = "CIHA",
     ) -> List[File]:
         files = list(
-            filter(lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files)
+            filter(lambda f: f.extension.upper()
+                   in [".DBC", ".DBF"], self.files)
         )
 
         groups = [gr.upper() for gr in to_list(group)]
 
         if not all(gr in list(self.groups) for gr in groups):
             raise ValueError(
-                f"Unknown CIHA Group(s): {set(groups).difference(list(self.groups))}"
+                f"Unknown CIHA Group(s): {set(
+                    groups).difference(list(self.groups))}"
             )
 
         files = list(filter(lambda f: self.format(f)[0] in groups, files))
@@ -162,7 +157,8 @@ def load(
 
             if not all(group in self.groups for group in [gr.upper() for gr in groups]):
                 raise ValueError(
-                    f"Unknown CNES group(s): {set(groups).difference(self.groups)}"
+                    f"Unknown CNES group(s): {set(
+                        groups).difference(self.groups)}"
                 )
 
             for group in groups:
@@ -174,34 +170,24 @@ def load(
                     self.__loaded__.add(directory.name)
         return self
 
-    def describe(self, file: File) -> dict:
-        if not isinstance(file, File):
-            return {}
+    def describe(self, file: File) -> Optional[FileDescription]:
+        if not isinstance(file, File) or file.name == "GMufAAmm":
+            return None
 
-        if file.name == "GMufAAmm":
-            # Leftover
-            return {}
+        if file.extension.upper() not in [".DBC", ".DBF"]:
+            return None
 
-        if file.extension.upper() in [".DBC", ".DBF"]:
-            group, _uf, year, month = self.format(file)
-
-            try:
-                uf = UFs[_uf]
-            except KeyError:
-                uf = _uf
-
-            description = {
-                "name": str(file.basename),
-                "group": self.groups[group],
-                "uf": uf,
-                "month": MONTHS[int(month)],
-                "year": zfill_year(year),
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
-
-            return description
-        return {}
+        group, _uf, year, month = self.format(file)
+
+        return FileDescription(
+            name=str(file.basename),
+            group=self.groups.get(group, group),
+            uf=UFs.get(_uf, _uf),
+            month=MONTHS.get(int(month), month),
+            year=zfill_year(year),
+            size=file.info.get("size", 0),
+            last_update=file.info.get("modify"),
+        )
 
     def format(self, file: File) -> tuple:
         group, _uf = file.name[:2].upper(), file.name[2:4].upper()
@@ -260,26 +246,23 @@ class IBGEDATASUS(Database):
         ),
     }
 
-    def describe(self, file: File) -> dict:
-        if file.extension.upper() in [".ZIP"]:
+    def describe(self, file: File) -> Optional[FileDescription]:
+        ext = file.extension.upper()
+
+        if ext == ".ZIP":
             year = file.name.split(".")[0][-2:]
-            description = {
-                "name": str(file.basename),
-                "year": zfill_year(year),
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
-            return description
-        elif file.extension.upper() == ".DBF":
+        elif ext == ".DBF":
             year = file.name[-2:]
-            description = {
-                "name": str(file.basename),
-                "year": zfill_year(year),
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
-            return description
-        return {}
+        else:
+            return None
+
+        return FileDescription(
+            name=str(file.basename),
+            group="Population",
+            year=zfill_year(year),
+            size=file.info.get("size", 0),
+            last_update=file.info.get("modify"),
+        )
 
     def format(self, file: File) -> tuple:
         return (file.name[-2:],)
@@ -323,7 +306,7 @@ class PNI(Database):
     name = "PNI"
     paths = (Directory("/dissemin/publicos/PNI/DADOS"),)
     metadata = {
-        "long_name": ("Sistema de Informações do Programa Nacional de Imunizações"),
+        "long_name": ("Sistema de Informações do Programa Nacional de Imunizações"),  # noqa
         "source": (
             "https://datasus.saude.gov.br/acesso-a-informacao/morbidade-hospitalar-do-sus-sih-sus/",  # noqa
             "https://datasus.saude.gov.br/acesso-a-informacao/producao-hospitalar-sih-sus/",  # noqa
@@ -347,26 +330,20 @@ class PNI(Database):
         "DPNI": "Doses Aplicadas",  # TODO: may be incorrect
     }
 
-    def describe(self, file: File) -> dict:
-        if file.extension.upper() in [".DBC", ".DBF"]:
-            group, _uf, year = self.format(file)
-
-            try:
-                uf = UFs[_uf]
-            except KeyError:
-                uf = _uf
+    def describe(self, file: File) -> Optional[FileDescription]:
+        if not isinstance(file, File) or file.extension.upper() not in [".DBC", ".DBF"]:
+            return None
 
-            description = {
-                "name": file.basename,
-                "group": self.groups[group],
-                "uf": uf,
-                "year": zfill_year(year),
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
+        group, _uf, year = self.format(file)
 
-            return description
-        return {}
+        return FileDescription(
+            name=str(file.basename),
+            group=self.groups.get(group, group),
+            uf=UFs.get(_uf, _uf),
+            year=zfill_year(year),
+            size=file.info.get("size", 0),
+            last_update=file.info.get("modify"),
+        )
 
     def format(self, file: File) -> tuple:
         if len(file.name) != 8:
@@ -383,14 +360,16 @@ def get_files(
         year: Optional[Union[list, str, int]] = None,
     ) -> List[File]:
         files = list(
-            filter(lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files)
+            filter(lambda f: f.extension.upper()
+                   in [".DBC", ".DBF"], self.files)
         )
 
         groups = [gr.upper() for gr in to_list(group)]
 
         if not all(gr in list(self.groups) for gr in groups):
             raise ValueError(
-                f"Unknown PNI Group(s): {set(groups).difference(list(self.groups))}"
+                f"Unknown PNI Group(s): {set(
+                    groups).difference(list(self.groups))}"
             )
 
         files = list(filter(lambda f: self.format(f)[0] in groups, files))
@@ -451,27 +430,21 @@ class SIA(Database):
         "SAD": "RAAS de Atenção Domiciliar",
     }
 
-    def describe(self, file: File) -> dict:
-        if file.extension.upper() == ".DBC":
-            group, _uf, year, month = self.format(file)
-
-            try:
-                uf = UFs[_uf]
-            except KeyError:
-                uf = _uf
+    def describe(self, file: File) -> Optional[FileDescription]:
+        if file.extension.upper() != ".DBC":
+            return None
 
-            description = {
-                "name": str(file.basename),
-                "group": self.groups[group],
-                "uf": uf,
-                "month": MONTHS[int(month)],
-                "year": zfill_year(year),
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
+        group_code, _uf, year, month = self.format(file)
 
-            return description
-        return {}
+        return FileDescription(
+            name=str(file.basename),
+            group=self.groups.get(group_code, group_code),
+            uf=UFs.get(_uf, _uf),
+            month=MONTHS.get(int(month), str(month)),
+            year=zfill_year(year),
+            size=file.info.get("size", 0),
+            last_update=file.info.get("modify"),
+        )
 
     def format(self, file: File) -> tuple:
         if file.extension.upper() in [".DBC", ".DBF"]:
@@ -493,14 +466,16 @@ def get_files(
         month: Optional[Union[list, str, int]] = None,
     ) -> List[File]:
         files = list(
-            filter(lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files)
+            filter(lambda f: f.extension.upper()
+                   in [".DBC", ".DBF"], self.files)
         )
 
         groups = [gr.upper() for gr in to_list(group)]
 
         if not all(gr in list(self.groups) for gr in groups):
             raise ValueError(
-                f"Unknown SIA Group(s): {set(groups).difference(list(self.groups))}"
+                f"Unknown SIA Group(s): {set(
+                    groups).difference(list(self.groups))}"
             )
 
         files = list(filter(lambda f: self.format(f)[0] in groups, files))
@@ -555,27 +530,21 @@ class SIH(Database):
         "CM": "",  # TODO
     }
 
-    def describe(self, file: File) -> dict:
-        if file.extension.upper() in [".DBC", ".DBF"]:
-            group, _uf, year, month = self.format(file)
-
-            try:
-                uf = UFs[_uf]
-            except KeyError:
-                uf = _uf
-
-            description = {
-                "name": file.basename,
-                "group": self.groups[group],
-                "uf": uf,
-                "month": MONTHS[int(month)],
-                "year": zfill_year(year),
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
-
-            return description
-        return {}
+    def describe(self, file: File) -> Optional[FileDescription]:
+        if not isinstance(file, File) or file.extension.upper() not in [".DBC", ".DBF"]:
+            return None
+
+        group_code, _uf, year, month = self.format(file)
+
+        return FileDescription(
+            name=str(file.basename),
+            group=self.groups.get(group_code, group_code),
+            uf=UFs.get(_uf, _uf),
+            month=MONTHS.get(int(month), str(month)),
+            year=zfill_year(year),
+            size=file.info.get("size", 0),
+            last_update=file.info.get("modify"),
+        )
 
     def format(self, file: File) -> tuple:
         group, _uf = file.name[:2].upper(), file.name[2:4].upper()
@@ -590,14 +559,16 @@ def get_files(
         month: Optional[Union[list, str, int]] = None,
     ) -> List[File]:
         files = list(
-            filter(lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files)
+            filter(lambda f: f.extension.upper()
+                   in [".DBC", ".DBF"], self.files)
         )
 
         groups = [gr.upper() for gr in to_list(group)]
 
         if not all(gr in list(self.groups) for gr in groups):
             raise ValueError(
-                f"Unknown SIH Group(s): {set(groups).difference(list(self.groups))}"
+                f"Unknown SIH Group(s): {set(
+                    groups).difference(list(self.groups))}"
             )
 
         files = list(filter(lambda f: self.format(f)[0] in groups, files))
@@ -630,25 +601,18 @@ class SIM(Database):
     }
     groups = {"CID10": "DO", "CID9": "DOR"}
 
-    def describe(self, file: File) -> dict:
+    def describe(self, file: File) -> Optional[FileDescription]:
         group, _uf, year = self.format(file)
-        _groups = {v: k for k, v in self.groups.items()}
-
-        try:
-            uf = UFs[_uf]
-        except KeyError:
-            uf = _uf
-
-        description = {
-            "name": str(file.basename),
-            "uf": uf,
-            "year": year,
-            "group": _groups[group],
-            "size": file.info["size"],
-            "last_update": file.info["modify"],
-        }
-
-        return description
+        groups = {v: k for k, v in self.groups.items()}
+
+        return FileDescription(
+            name=str(file.basename),
+            uf=UFs.get(_uf, _uf),
+            year=year,
+            group=groups.get(group, group),
+            size=file.info.get("size", 0),
+            last_update=file.info.get("modify"),
+        )
 
     def format(self, file: File) -> tuple:
         if "CID9" in str(file.path):
@@ -762,19 +726,20 @@ class SINAN(Database):
         "ZIKA": "Zika Vírus",
     }
 
-    def describe(self, file: File) -> dict:
-        if file.extension.upper() == ".DBC":
-            dis_code, year = self.format(file)
+    def describe(self, file: File) -> Optional[FileDescription]:
+        if not isinstance(file, File) or file.extension.upper() != ".DBC":
+            return None
 
-            description = {
-                "name": str(file.basename),
-                "disease": self.diseases[dis_code],
-                "year": zfill_year(year),
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
-            return description
-        return {}
+        dis_code, year = self.format(file)
+
+        return FileDescription(
+            name=str(file.basename),
+            disease=self.diseases.get(dis_code, "Unknown"),
+            group=dis_code,
+            year=zfill_year(year),
+            size=file.info.get("size", 0),
+            last_update=file.info.get("modify"),
+        )
 
     def format(self, file: File) -> tuple:
         year = file.name[-2:]
@@ -796,7 +761,8 @@ def get_files(
         year: Optional[Union[str, int, list]] = None,
     ) -> List[File]:
         files = list(
-            filter(lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files)
+            filter(lambda f: f.extension.upper()
+                   in [".DBC", ".DBF"], self.files)
         )
 
         if dis_code:
@@ -804,7 +770,8 @@ def get_files(
 
             if codes and not all(code in self.diseases for code in codes):
                 raise ValueError(
-                    f"Unknown disease(s): {set(codes).difference(set(self.diseases))}"
+                    f"Unknown disease(s): {set(
+                        codes).difference(set(self.diseases))}"
                 )
 
             files = list(filter(lambda f: self.format(f)[0] in codes, files))
@@ -832,26 +799,20 @@ class SINASC(Database):
         "DNR": "Dados dos Nascidos Vivos por UF de residência",
     }
 
-    def describe(self, file: File) -> dict:
-        if file.extension.upper() == ".DBC":
-            group, _uf, year = self.format(file)
-
-            try:
-                uf = UFs[_uf]
-            except KeyError:
-                uf = _uf
-
-            description = {
-                "name": file.basename,
-                "group": self.groups[group],
-                "uf": uf,
-                "year": year,
-                "size": file.info["size"],
-                "last_update": file.info["modify"],
-            }
-
-            return description
-        return {}
+    def describe(self, file: File) -> Optional[FileDescription]:
+        if not isinstance(file, File) or file.extension.upper() != ".DBC":
+            return None
+
+        group_code, _uf, year = self.format(file)
+
+        return FileDescription(
+            name=str(file.basename),
+            group=self.groups.get(group_code, group_code),
+            uf=UFs.get(_uf, _uf),
+            year=year,
+            size=file.info.get("size", 0),
+            last_update=file.info.get("modify"),
+        )
 
     def format(self, file: File) -> tuple:
         if file.name == "DNEX2021":
diff --git a/pysus/api/ftp/models.py b/pysus/api/ftp/models.py
new file mode 100644
index 00000000..56632e34
--- /dev/null
+++ b/pysus/api/ftp/models.py
@@ -0,0 +1,29 @@
+import dateparser
+from pydantic import BaseModel, ConfigDict, field_validator
+from typing import Optional, Union
+from datetime import datetime
+
+
+class FileDescription(BaseModel):
+    model_config = ConfigDict(coerce_numbers_to_str=True)
+
+    name: str
+    group: str
+    year: int
+    size: int
+    last_update: datetime
+    uf: Optional[str] = None
+    month: Optional[str] = None
+    disease: Optional[str] = None
+
+    @field_validator("last_update", mode="before")
+    @classmethod
+    def parse_modify_date(cls, v: Union[str, datetime]) -> datetime:
+        if isinstance(v, datetime):
+            return v
+
+        parsed = dateparser.parse(str(v))
+        if parsed:
+            return parsed
+
+        return datetime.now()
diff --git a/pysus/management/__init__.py b/pysus/management/__init__.py
new file mode 100644
index 00000000..e69de29b

From 2b1b88845569f0b41bc2faba079138680e62221e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lu=C3=A3=20Bida=20Vacaro?= <luabidaa@gmail.com>
Date: Thu, 19 Mar 2026 13:33:21 -0300
Subject: [PATCH 6/6] start building the dadosgov models to extract & describe
 the files within the datasets

---
 pysus/api/dadosgov/client.py    |   6 +-
 pysus/api/dadosgov/databases.py |  90 ++++++
 pysus/api/dadosgov/models.py    |  92 +++++-
 pysus/api/ducklake/catalog.py   |  24 ++
 pysus/api/ducklake/storage.py   |  15 +
 pysus/api/ftp/__init__.py       |   1 +
 pysus/api/ftp/client.py         | 519 ------------------------------
 pysus/api/ftp/databases.py      |  37 +--
 pysus/api/ftp/models.py         | 537 ++++++++++++++++++++++++++++++--
 pysus/api/models.py             |  29 ++
 pysus/management/ingest.py      | 116 +++++++
 pysus/management/utils.py       |  16 +
 12 files changed, 901 insertions(+), 581 deletions(-)
 create mode 100644 pysus/api/dadosgov/databases.py
 create mode 100644 pysus/api/models.py
 create mode 100644 pysus/management/ingest.py
 create mode 100644 pysus/management/utils.py

diff --git a/pysus/api/dadosgov/client.py b/pysus/api/dadosgov/client.py
index 54b45691..77aae96c 100644
--- a/pysus/api/dadosgov/client.py
+++ b/pysus/api/dadosgov/client.py
@@ -2,7 +2,7 @@
 from typing import List, Optional
 from pydantic import TypeAdapter
 from pysus.api.dadosgov.models import (
-    DatasetDetail,
+    Dataset,
     DatasetSummary,
 )
 from pysus import __version__
@@ -48,6 +48,6 @@ def list_datasets(
         adapter = TypeAdapter(List[DatasetSummary])
         return adapter.validate_python(data)
 
-    def get_dataset(self, id: str) -> DatasetDetail:
+    def get_dataset(self, id: str) -> Dataset:
         data = self._get(f"/publico/conjuntos-dados/{id}")
-        return DatasetDetail.model_validate(data)
+        return Dataset.model_validate(data)
diff --git a/pysus/api/dadosgov/databases.py b/pysus/api/dadosgov/databases.py
new file mode 100644
index 00000000..8d9e3561
--- /dev/null
+++ b/pysus/api/dadosgov/databases.py
@@ -0,0 +1,90 @@
+__all__ = [
+    "CNES",
+    "PNI",
+    "SIA",
+    "SINAN",
+]
+
+from typing import List, Optional, Union
+
+from .models import Dataset, Resource
+from pysus.utils import UFs, parse_UFs, to_list, zfill_year, MONTHS
+from pysus.api.models import FileDescription
+
+
+class CNES(Dataset):
+    name = "CNES"
+    ids = (
+        "40a0d093-b12f-44a4-bdc7-bae8eb54dd04",
+        "9455b341-b06e-408e-8e10-54b32b3d74ec",
+    )
+
+    def describe(self, file: Resource) -> Optional[FileDescription]: ...
+
+    def format(self, file: Resource) -> tuple: ...
+
+    def get_files(
+        self,
+        year: Optional[Union[list, str, int]] = None,
+        month: Optional[Union[list, str, int]] = None,
+    ) -> List[Resource]: ...
+
+
+class PNI(Dataset):
+    name = "PNI"
+    ids = (
+        "2989d396-cb09-47e7-a3b8-a4b951ca0200",
+        "543aa08a-46c4-44e8-802e-198daa30753d",
+        "04292d08-ee4f-463a-b7b5-76cfb76775b3",
+        "7ed6eecc-c254-475c-92c5-daba5727596b",
+        "783b7456-6a6c-4025-a8bd-8e9caa0fb962",
+        "c6c3c6f3-2026-48a2-84ac-d8039714a0ba",
+        "9a25b796-80e3-444a-a4e7-405f5596d8ab",
+    )
+
+    def describe(self, file: Resource) -> Optional[FileDescription]: ...
+
+    def format(self, file: Resource) -> tuple: ...
+
+    def get_files(
+        self,
+        year: Optional[Union[list, str, int]] = None,
+        month: Optional[Union[list, str, int]] = None,
+    ) -> List[Resource]: ...
+
+
+class SIA(Dataset):
+    name = "SIA"
+    ids = ("9a335cb7-2b4f-4fce-8947-e8441b4a90af",)
+
+    def describe(self, file: Resource) -> Optional[FileDescription]: ...
+
+    def format(self, file: Resource) -> tuple: ...
+
+    def get_files(
+        self,
+        group: Union[List[str], str],
+        uf: Optional[Union[List[str], str]] = None,
+        year: Optional[Union[list, str, int]] = None,
+        month: Optional[Union[list, str, int]] = None,
+    ) -> List[Resource]: ...
+
+
+class SINAN(Dataset):
+    name = "SINAN"
+    ids = (
+        "4d5e5d44-58a8-4d67-b8aa-4ef1e4b00a1c",
+        "5699abe0-0510-4da8-b47d-209b3bb32b34",
+        "4557ba96-7d52-4a56-bd6f-f99a5af09f77",
+        "740ce8f4-7a5d-4351-aad4-7623f2490ada",
+    )
+
+    def describe(self, file: Resource) -> Optional[FileDescription]: ...
+
+    def format(self, file: Resource) -> tuple: ...
+
+    def get_files(
+        self,
+        dis_code: Optional[Union[str, list]] = None,
+        year: Optional[Union[str, int, list]] = None,
+    ) -> List[Resource]: ...
diff --git a/pysus/api/dadosgov/models.py b/pysus/api/dadosgov/models.py
index 407e3560..5d388b1d 100644
--- a/pysus/api/dadosgov/models.py
+++ b/pysus/api/dadosgov/models.py
@@ -1,10 +1,15 @@
+import zipfile
 import requests
+import urllib3
 from pathlib import Path
 from datetime import datetime as dt
 from typing import Optional, List, Any, Annotated, Union
-from pydantic import BaseModel, Field, BeforeValidator
+from pydantic import BaseModel, Field, BeforeValidator, field_validator
 
 from pysus import CACHEPATH
+from pysus.api.models import FileDescription
+
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 
 
 def to_datetime(value: Any) -> Optional[dt]:
@@ -41,12 +46,12 @@ def __str__(self):
 class Resource(BaseModel):
     id: str
     title: str = Field(alias="titulo")
-    description: str = Field(alias="descricao")
+    description: Optional[str] = Field(None, alias="descricao")
     url: str = Field(alias="link")
     format: str = Field(alias="formato")
-    size: int = Field(alias="tamanho")
+    api_size: int = Field(alias="tamanho")
     cataloging_date: Optional[str] = Field(None, alias="dataCatalogacao")
-    last_modified: Optional[str] = Field(
+    last_modified: Optional[str | dt] = Field(
         None,
         alias="dataUltimaAtualizacaoArquivo",
     )
@@ -59,30 +64,78 @@ class Resource(BaseModel):
     def __str__(self):
         return self.file_name
 
+    @field_validator("last_modified", mode="before")
+    @classmethod
+    def parse_date(cls, v: Optional[str]) -> Optional[dt]:
+        if not v or isinstance(v, dt):
+            return v
+        try:
+            return dt.strptime(v, "%d/%m/%Y")
+        except ValueError:
+            return None
+
+    @property
+    def basename(self) -> str:
+        name = self.url.split("/")[-1]
+        return name.rstrip(".zip").replace("_csv", ".csv")
+
+    @property
+    def size(self) -> int:
+        try:
+            response = requests.head(
+                self.url,
+                verify=False,
+                allow_redirects=True,
+                timeout=5,
+            )
+            return int(response.headers.get("Content-Length", 0))
+        except (requests.RequestException, ValueError):
+            return self.api_size
+
     def download(self, target_dir: Union[str, Path] = CACHEPATH) -> Path:
         target_path = Path(target_dir)
         target_path.mkdir(parents=True, exist_ok=True)
 
-        output_file = target_path / (
-            self.file_name or f"{self.id}.{self.format.lower()}"
-        )
+        tmp_file = target_path / f"{self.id}.download"
 
-        response = requests.get(self.url, stream=True)
+        response = requests.get(self.url, stream=True, verify=False)
         response.raise_for_status()
 
-        with open(output_file, "wb") as f:
+        with open(tmp_file, "wb") as f:
             for chunk in response.iter_content(chunk_size=8192):
-                f.write(chunk)
+                if chunk:
+                    f.write(chunk)
+
+        if zipfile.is_zipfile(tmp_file):
+            with zipfile.ZipFile(tmp_file) as z:
+                members = z.namelist()
+
+                if len(members) == 1:
+                    name = members[0]
+                    output_file = target_path / name
+                    z.extract(name, target_path)
+                else:
+                    z.extractall(target_path)
+                    output_file = target_path
+
+            tmp_file.unlink()
+            return output_file
+
+        output_file = target_path / (
+            self.file_name or f"{self.id}.{self.format.lower()}"
+        )
+
+        tmp_file.rename(output_file)
 
         return output_file
 
 
-class DatasetDetail(BaseModel):
+class Dataset(BaseModel):
     id: str
     title: str = Field(alias="titulo")
     slug: str = Field(alias="nome")
     organization: str = Field(alias="organizacao")
-    description: str = Field(alias="descricao")
+    description: Optional[str] = Field(None, alias="descricao")
     license: Optional[str] = Field(None, alias="licenca")
     maintainer: Optional[str] = Field(None, alias="responsavel")
     maintainer_email: Optional[str] = Field(None, alias="emailResponsavel")
@@ -93,8 +146,7 @@ class DatasetDetail(BaseModel):
     is_open_data: Bool = Field(alias="dadosAbertos")
     is_discontinued: Bool = Field(alias="descontinuado")
     is_private: Bool = Field(False, alias="privado")
-    metadata_updated: DateTime = Field(
-        None, alias="dataUltimaAtualizacaoMetadados")
+    metadata_updated: DateTime = Field(None, alias="dataUltimaAtualizacaoMetadados")
     file_updated: DateTime = Field(None, alias="dataUltimaAtualizacaoArquivo")
     cataloging_date: DateTime = Field(None, alias="dataCatalogacao")
     visibility: str = Field(alias="visibilidade")
@@ -105,6 +157,18 @@ class DatasetDetail(BaseModel):
     def __str__(self):
         return self.id
 
+    def describe(self, resource: Resource) -> FileDescription:
+        return FileDescription(
+            name=resource.basename,
+            group=self.slug,
+            year=int,
+            size=resource.size,
+            last_update=resource.last_modified or self.file_updated or dt.now(),
+            uf=None,
+            month=None,
+            disease=self.title,
+        )
+
 
 class DatasetSummary(BaseModel):
     id: str
diff --git a/pysus/api/ducklake/catalog.py b/pysus/api/ducklake/catalog.py
index e69de29b..738de713 100644
--- a/pysus/api/ducklake/catalog.py
+++ b/pysus/api/ducklake/catalog.py
@@ -0,0 +1,24 @@
+from typing import List
+
+
+class CatalogBrowser:
+    def __init__(self, client):
+        self.client = client
+
+    def list_datasets(self) -> List[str]:
+        res = self.client.con.execute("SELECT name FROM datasets").fetchall()
+        return [r[0] for r in res]
+
+    def get_groups(self, dataset_name: str):
+        query = f"""
+            SELECT g.name, g.id
+            FROM dataset_groups g
+            JOIN datasets d ON g.dataset_id = d.id
+            WHERE d.name = '{dataset_name}'
+        """
+        return self.client.con.execute(query).df()
+
+    def get_files(self, group_id: int):
+        return self.client.con.execute(
+            f"SELECT * FROM files WHERE group_id = {group_id}"
+        ).df()
diff --git a/pysus/api/ducklake/storage.py b/pysus/api/ducklake/storage.py
index e69de29b..caf36c1e 100644
--- a/pysus/api/ducklake/storage.py
+++ b/pysus/api/ducklake/storage.py
@@ -0,0 +1,15 @@
+import duckdb
+
+
+class StorageManager:
+    def __init__(self, connection: duckdb.DuckDBPyConnection):
+        self.con = connection
+
+    def query(self, sql: str):
+        return self.con.execute(sql).df()
+
+    def get_file_url(self, path: str) -> str:
+        return f"s3://pysus/public/{path}"
+
+    def list_tables(self):
+        return self.con.execute("SHOW TABLES").df()
diff --git a/pysus/api/ftp/__init__.py b/pysus/api/ftp/__init__.py
index 852efe38..af4485c2 100644
--- a/pysus/api/ftp/__init__.py
+++ b/pysus/api/ftp/__init__.py
@@ -1,5 +1,6 @@
 from .client import *  # noqa
 from .databases import *  # noqa
+from .models import *  # noqa
 
 
 AVAILABLE_DATABASES = [
diff --git a/pysus/api/ftp/client.py b/pysus/api/ftp/client.py
index 14eec50e..f74598e0 100644
--- a/pysus/api/ftp/client.py
+++ b/pysus/api/ftp/client.py
@@ -1,56 +1,22 @@
 from __future__ import annotations
 
-__all__ = ["File", "Directory", "Database"]
-
-import asyncio
-import os
 import pathlib
-from datetime import datetime
 from ftplib import FTP
 from typing import (
-    Any,
-    Dict,
     Final,
-    List,
     Optional,
     Protocol,
-    Tuple,
-    TypedDict,
-    Union,
     runtime_checkable,
 )
 
-import humanize
-from aioftp import Client
-from loguru import logger
-from tqdm import tqdm
-from typing_extensions import Self
 
 from pysus import CACHEPATH
 from pysus.data.local import Data
-from pysus.utils import to_list
-
-# Type aliases
-PathLike = Union[str, pathlib.Path]
-FileContent = Dict[str, Union["Directory", "File"]]
 
-# Constants
 __cachepath__: Final[pathlib.Path] = pathlib.Path(CACHEPATH)
 __cachepath__.mkdir(exist_ok=True)
 
 
-# Cache storage
-DIRECTORY_CACHE: Dict[str, "Directory"] = {}
-
-
-class FileInfo(TypedDict):
-    """File information dictionary type"""
-
-    size: Union[int, str]
-    type: str
-    modify: datetime
-
-
 @runtime_checkable
 class Downloadable(Protocol):
     async def download(self, local_dir: str) -> Data:
@@ -77,488 +43,3 @@ def close(cls) -> None:
         if cls._instance and cls._instance.sock:
             cls._instance.close()
             cls._instance = None
-
-
-class File:
-    """
-    FTP File representation with improved type safety.
-
-    This class provides methods for interacting with files on the DataSUS FTP
-    server. It includes functionality for downloading files synchronously and
-    asynchronously, as well as retrieving file information in a human-readable
-    format.
-
-    Attributes:
-        name (str): The name of the file without the extension.
-        extension (str): The file extension.
-        basename (str): The full name of the file including the extension.
-        path (str): The full path to the file on the FTP server.
-        parent_path (str): The directory path where the file is located on the
-            FTP server.
-        __info (FileInfo): Metadata about the file, including size, type, and
-            modification date.
-
-    Methods:
-        info() -> Dict[str, str]:
-            Returns a dictionary with human-readable file information,
-            including size, type, and modification date.
-
-        download(
-            local_dir: str = CACHEPATH, _pbar: Optional[tqdm] = None
-        ) -> Data:
-            Downloads the file to the specified local directory. If a progress
-            bar (_pbar) is provided, it updates the progress bar during the
-            download.
-
-        async_download(local_dir: str = CACHEPATH) -> Data:
-            Asynchronously downloads the file to the specified local directory.
-
-        _line_parser(file_line: bytes) -> Tuple[str, Dict[str, Any]]:
-            Static method to parse a line from the FTP LIST command and
-            extract file information.
-    """
-
-    def __init__(self, path: str, name: str, info: FileInfo) -> None:
-        self.name, self.extension = os.path.splitext(name)
-        self.basename: str = f"{self.name}{self.extension}"
-        self.path: str = (
-            f"{path}/{self.basename}"
-            if not path.endswith("/")
-            else f"{path}{self.basename}"
-        )
-        self.parent_path: str = os.path.dirname(self.path)
-        self.__info: FileInfo = info
-
-    @property
-    def info(self) -> Dict[str, str]:
-        """Returns a dictionary with human-readable file information"""
-        return {
-            "size": self.__info["size"],
-            "type": f"{self.extension[1:].upper()} file",
-            "modify": self.__info["modify"].strftime("%Y-%m-%d %I:%M%p"),
-        }
-
-    def download(
-        self, local_dir: str = CACHEPATH, _pbar: Optional[tqdm] = None
-    ) -> Data:
-        """Downloads the file to the specified local directory"""
-        target_dir = pathlib.Path(local_dir)
-        target_dir.mkdir(exist_ok=True, parents=True)
-
-        filepath = target_dir / self.basename
-        filesize = int(self.__info["size"])
-
-        # Check for existing files
-        for ext in (".parquet", ".dbf", ""):
-            existing = filepath.with_suffix(ext)
-            if existing.exists():
-                if _pbar:
-                    _pbar.update(filesize - _pbar.n)
-                return Data(str(existing), _pbar=_pbar)  # type: ignore
-
-        if _pbar:
-            _pbar.unit = "B"
-            _pbar.unit_scale = True
-            _pbar.reset(total=filesize)
-            _pbar.set_description(self.basename)
-
-        try:
-            ftp = FTPSingleton.get_instance()
-            with open(filepath, "wb") as output:
-
-                def callback(data: bytes) -> None:
-                    output.write(data)
-                    if _pbar:
-                        _pbar.update(len(data))
-
-                ftp.retrbinary(f"RETR {self.path}", callback)
-
-        except Exception as exc:
-            if filepath.exists():
-                filepath.unlink()
-            raise exc
-        finally:
-            FTPSingleton.close()
-
-        if _pbar:
-            _pbar.update(filesize - _pbar.n)
-        return Data(str(filepath), _pbar=_pbar)  # type: ignore
-
-    async def async_download(self, local_dir: str = CACHEPATH) -> Data:
-        """
-        Asynchronously downloads the file to the specified local directory
-        """
-        target_dir = pathlib.Path(local_dir)
-        target_dir.mkdir(exist_ok=True, parents=True)
-        filepath = target_dir / self.basename
-
-        # Check existing files
-        for ext in (".parquet", ".dbf", ""):
-            existing = filepath.with_suffix(ext)
-            if existing.exists():
-                return Data(str(existing))  # type: ignore
-
-        async with Client.context(
-            host="ftp.datasus.gov.br", parse_list_line_custom=self._line_parser
-        ) as client:
-            await client.login()
-            await client.download(self.path, str(filepath), write_into=True)
-
-        return Data(str(filepath))  # type: ignore
-
-    @staticmethod
-    def _line_parser(file_line: bytes) -> Tuple[str, Dict[str, Any]]:
-        """Static method to parse a line from the FTP LIST command and extract
-        file information
-        """
-        line = file_line.decode("utf-8")
-        if "<DIR>" in line:
-            date, time, _, *name = line.strip().split()
-            info = {"size": 0, "type": "dir"}
-            name = " ".join(name)
-        else:
-            date, time, size, name = line.strip().split()
-            info = {"size": size, "type": "file"}
-
-        modify = datetime.strptime(f"{date} {time}", "%m-%d-%y %I:%M%p")
-        info["modify"] = modify.strftime("%m/%d/%Y %I:%M%p")
-        return name, info
-
-    def __str__(self) -> str:
-        return str(self.basename)
-
-    def __repr__(self) -> str:
-        return str(self.basename)
-
-    def __hash__(self):
-        return hash(self.path)
-
-    def __eq__(self, other):
-        if isinstance(other, File):
-            return self.path == other.path
-        return False
-
-
-class Directory:
-    """
-    Directory class with caching and lazy loading.
-
-    The Directory class represents a directory in a file system and includes
-    mechanisms for caching instances and lazy loading of directory content.
-    When a Directory instance is created, it normalizes the provided path
-    and caches the instance. The content of the directory is not loaded
-    immediately; instead, it is loaded when the `content` property or the
-    `load` method is accessed or called.
-
-    Attributes:
-        path (str): The normalized path of the directory.
-        name (str): The name of the directory.
-        parent (Directory): The parent directory instance.
-        loaded (bool): Indicates whether the directory content has been loaded.
-        __content__ (Dict[str, Union[File, Directory]]): A dictionary
-            containing the directory's content, with names as keys and File or
-            Directory instances as values.
-
-    Methods:
-        _normalize_path(path: str) -> str: Normalizes the given path.
-        _get_root_directory() -> Directory: Returns the root directory
-            instance, creating it if necessary.
-        _init_root_child(name: str) -> None: Initializes a root child
-            directory.
-        _init_regular(parent_path: str, name: str) -> None: Initializes a
-            regular directory.
-        content() -> List[Union[Directory, File]]: Returns the content of the
-            directory, loading it if necessary.
-        load() -> Self: Loads the content of the directory and marks it as
-            loaded.
-    """
-
-    name: str
-    path: str
-    parent: "Directory"
-    loaded: bool
-    __content__: Dict[str, Union[File, "Directory"]]
-
-    def __new__(cls, path: str, _is_root_child: bool = False) -> "Directory":
-        normalized_path = os.path.normpath(path)
-
-        # Handle root directory case
-        if normalized_path == "/":
-            return cls._get_root_directory()
-
-        # Return cached instance if exists
-        if normalized_path in DIRECTORY_CACHE:
-            return DIRECTORY_CACHE[normalized_path]
-
-        # Use os.path.split for reliable path splitting
-        parent_path, name = os.path.split(normalized_path)
-
-        # Handle empty parent path
-        if not parent_path:
-            parent_path = "/"
-        # Handle parent paths that don't start with /
-        elif not parent_path.startswith("/"):
-            parent_path = "/" + parent_path
-
-        # Create new instance
-        instance = super().__new__(cls)
-        instance.path = normalized_path
-
-        if _is_root_child:
-            instance._init_root_child(name)
-        else:
-            instance._init_regular(parent_path, name)
-
-        DIRECTORY_CACHE[normalized_path] = instance
-        return instance
-
-    @staticmethod
-    def _normalize_path(path: str) -> str:
-        """Normalizes the given path"""
-        path = f"/{path}" if not path.startswith("/") else path
-        return path.removesuffix("/")
-
-    @classmethod
-    def _get_root_directory(cls) -> Directory:
-        """Returns the root directory instance, creating it if necessary"""
-        if "/" not in DIRECTORY_CACHE:
-            root = super().__new__(cls)
-            root.parent = root
-            root.name = "/"
-            root.path = "/"
-            root.loaded = False
-            root.__content__ = {}
-            DIRECTORY_CACHE["/"] = root
-        return DIRECTORY_CACHE["/"]
-
-    def _init_root_child(self, name: str) -> None:
-        """Initializes a root child directory"""
-        self.parent = DIRECTORY_CACHE["/"]
-        self.name = name
-        self.loaded = False
-        self.__content__ = {}
-
-    def _init_regular(self, parent_path: str, name: str) -> None:
-        """Initializes a regular directory"""
-        self.parent = Directory(parent_path)
-        self.name = name
-        self.loaded = False
-        self.__content__ = {}
-
-    @property
-    def content(self) -> List[Union[Directory, File]]:
-        """Returns the content of the directory, loading it if necessary"""
-        if not self.loaded:
-            self.load()
-        return list(self.__content__.values())
-
-    def load(self) -> Self:
-        """Loads the content of the directory and marks it as loaded"""
-        self.__content__ |= load_directory_content(self.path)
-        self.loaded = True
-        return self
-
-    def reload(self):
-        """
-        Reloads the content of the Directory
-        """
-        self.loaded = False
-        return self.load()
-
-    def __str__(self) -> str:
-        return self.path
-
-    def __repr__(self) -> str:
-        return self.path
-
-    def __hash__(self):
-        return hash(self.path)
-
-    def __eq__(self, other):
-        if isinstance(other, Directory):
-            return self.path == other.path
-        return False
-
-
-def load_directory_content(path: str) -> FileContent:
-    """Directory content loading"""
-    content: FileContent = {}
-
-    try:
-        ftp = FTPSingleton.get_instance()
-        ftp.cwd(path)
-        path = path.removesuffix("/")
-
-        def line_parser(line: str):
-            if "<DIR>" in line:
-                date, time, _, name = line.strip().split(maxsplit=3)
-                modify = datetime.strptime(f"{date} {time}", "%m-%d-%y %I:%M%p")
-                info = {"size": 0, "type": "dir", "modify": modify}
-                xpath = f"{path}/{name}"
-                content[name] = Directory(xpath)
-            else:
-                date, time, size, name = line.strip().split(maxsplit=3)
-                modify = datetime.strptime(f"{date} {time}", "%m-%d-%y %I:%M%p")
-                info: FileInfo = {
-                    "size": size,
-                    "type": "file",
-                    "modify": modify,
-                }
-                content[name] = File(path, name, info)
-
-        ftp.retrlines("LIST", line_parser)
-    except Exception as exc:
-        raise exc
-    finally:
-        FTPSingleton.close()
-
-    to_remove = [
-        name
-        for name in content
-        if name.upper().endswith(".DBF")
-        and name.upper().replace(".DBF", ".DBC") in content
-    ]
-
-    for name in to_remove:
-        del content[name]
-
-    return content
-
-
-class Database:
-    """
-    Base class for PySUS databases. Contains common functions
-    for accessing DataSUS FTP server. With this class, it is
-    possible to construct database classes for different DataSUS
-    files, sharing state and functionalities.
-
-    Parameters
-        ftp [FTP]: ftplib.FTP object for connecting in DataSUS server.
-        name [str]: database name
-        paths [list[Directory]]: server paths where the files are located
-        files [list[Files]]: list of parsed Files from Database content
-        metadata [dict]: dict containing database's metadata information
-
-    Methods
-        load(): Loads the database paths content to its own content
-        describe(file): describes a file according to each database's
-                        spec. Returns a dict with file information
-        format(file): extracts from file name database related info, such as
-                      year, month, UF and/or other useful info for the DB
-        get_files(Any): filters files using database related format, depending
-                        on the database's files specs
-    """
-
-    ftp: FTP
-    name: str
-    paths: Tuple[Directory, ...]
-    metadata: dict
-    __content__: Dict[str, Union[Directory, File]]
-
-    def __init__(self) -> None:
-        self.ftp = FTP("ftp.datasus.gov.br")
-        self.__content__ = {}
-
-    def __repr__(self) -> str:
-        return f"{self.name} - {self.metadata['long_name']}"
-
-    @property
-    def content(self) -> List[Union[Directory, File]]:
-        """
-        Lists Database content. The `paths` will be loaded if this property is
-        called or if explicitly using `load()`. To add specific Directory
-        inside content, `load()` the directory and call `content` again.
-        """
-        if not self.__content__:
-            logger.info(
-                "content is not loaded, use `load()` to load default paths")
-            return []
-        return sorted(list(self.__content__.values()), key=str)
-
-    @property
-    def files(self) -> List[File]:
-        """
-        Lists Files inside content. To load a specific Directory inside
-        content, just `load()` this directory and list files again.
-        """
-        return [f for f in self.content if isinstance(f, File)]
-
-    def load(
-        self,
-        directories: Optional[
-            Union[Directory, List[Directory], Tuple[Directory, ...]]
-        ] = None,
-    ) -> Database:
-        """
-        Loads specific directories to Database content. Will aggregate the
-        files found within Directories into Database.content.
-        """
-        if not directories:
-            directories = list(self.paths)
-
-        directories_list = to_list(directories)
-
-        for directory in directories_list:
-            if not isinstance(directory, Directory):
-                raise ValueError("Invalid directory provided.")
-
-            directory.load()
-            self.__content__.update(directory.__content__)
-        return self
-
-    def describe(self, file: File) -> dict:
-        """
-        Receives a `File` and returns a dict with its information,
-        according to the database's specifications. This method is
-        helpful to return the FTP's file in a humanized format
-
-        Parameters
-            file [File]: a `File` instance
-        """
-        ...
-
-    def format(self, file: File) -> tuple:
-        """
-        Formats a File based on the database specifications,
-        extracting its name's parameters given a pattern.
-
-        Parameters
-            file [File]: a `File` instance
-        """
-        ...
-
-    def get_files(self, *args, **kwargs) -> list[File]:
-        """
-        Filters the list of `File`s according to each database file
-        pattern, as UFs, Groups, Years, Months, etc. This method will
-        also be responsible to look for wrong values within the file
-        pattern and possible extra characters in its basename
-        """
-        ...
-
-    def download(self, files: List[File], local_dir: str = CACHEPATH) -> List[str]:
-        """
-        Downloads a list of Files.
-        """
-        files = to_list(files)
-        pbar = tqdm(total=len(files), dynamic_ncols=True)
-        dfiles = []
-        for file in files:
-            if isinstance(file, File):
-                dfiles.append(file.download(local_dir=local_dir, _pbar=pbar))
-        pbar.close()
-        if len(dfiles) == 1:
-            return dfiles[0]
-        return dfiles
-
-    async def async_download(self, files: List[File], local_dir: str = CACHEPATH):
-        """
-        Asynchronously downloads a list of files
-        """
-
-        async def download_file(file):
-            if isinstance(file, File):
-                await file.async_download(local_dir=local_dir)
-
-        tasks = [download_file(file) for file in files]
-        await asyncio.gather(*tasks)
diff --git a/pysus/api/ftp/databases.py b/pysus/api/ftp/databases.py
index e77cb709..92c7e387 100644
--- a/pysus/api/ftp/databases.py
+++ b/pysus/api/ftp/databases.py
@@ -12,9 +12,9 @@
 
 from typing import List, Optional, Union, Literal
 
-from pysus.api.ftp import Database, Directory, File
+from pysus.api.ftp.models import Database, Directory, File
 from pysus.utils import UFs, parse_UFs, to_list, zfill_year, MONTHS
-from .models import FileDescription
+from pysus.api.models import FileDescription
 
 
 class CIHA(Database):
@@ -77,16 +77,14 @@ def get_files(
         group: Union[List[str], str] = "CIHA",
     ) -> List[File]:
         files = list(
-            filter(lambda f: f.extension.upper()
-                   in [".DBC", ".DBF"], self.files)
+            filter(lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files)
         )
 
         groups = [gr.upper() for gr in to_list(group)]
 
         if not all(gr in list(self.groups) for gr in groups):
             raise ValueError(
-                f"Unknown CIHA Group(s): {set(
-                    groups).difference(list(self.groups))}"
+                f"Unknown CIHA Group(s): {set(groups).difference(list(self.groups))}"
             )
 
         files = list(filter(lambda f: self.format(f)[0] in groups, files))
@@ -157,8 +155,7 @@ def load(
 
             if not all(group in self.groups for group in [gr.upper() for gr in groups]):
                 raise ValueError(
-                    f"Unknown CNES group(s): {set(
-                        groups).difference(self.groups)}"
+                    f"Unknown CNES group(s): {set(groups).difference(self.groups)}"
                 )
 
             for group in groups:
@@ -360,16 +357,14 @@ def get_files(
         year: Optional[Union[list, str, int]] = None,
     ) -> List[File]:
         files = list(
-            filter(lambda f: f.extension.upper()
-                   in [".DBC", ".DBF"], self.files)
+            filter(lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files)
         )
 
         groups = [gr.upper() for gr in to_list(group)]
 
         if not all(gr in list(self.groups) for gr in groups):
             raise ValueError(
-                f"Unknown PNI Group(s): {set(
-                    groups).difference(list(self.groups))}"
+                f"Unknown PNI Group(s): {set(groups).difference(list(self.groups))}"
             )
 
         files = list(filter(lambda f: self.format(f)[0] in groups, files))
@@ -466,16 +461,14 @@ def get_files(
         month: Optional[Union[list, str, int]] = None,
     ) -> List[File]:
         files = list(
-            filter(lambda f: f.extension.upper()
-                   in [".DBC", ".DBF"], self.files)
+            filter(lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files)
         )
 
         groups = [gr.upper() for gr in to_list(group)]
 
         if not all(gr in list(self.groups) for gr in groups):
             raise ValueError(
-                f"Unknown SIA Group(s): {set(
-                    groups).difference(list(self.groups))}"
+                f"Unknown SIA Group(s): {set(groups).difference(list(self.groups))}"
             )
 
         files = list(filter(lambda f: self.format(f)[0] in groups, files))
@@ -559,16 +552,14 @@ def get_files(
         month: Optional[Union[list, str, int]] = None,
     ) -> List[File]:
         files = list(
-            filter(lambda f: f.extension.upper()
-                   in [".DBC", ".DBF"], self.files)
+            filter(lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files)
         )
 
         groups = [gr.upper() for gr in to_list(group)]
 
         if not all(gr in list(self.groups) for gr in groups):
             raise ValueError(
-                f"Unknown SIH Group(s): {set(
-                    groups).difference(list(self.groups))}"
+                f"Unknown SIH Group(s): {set(groups).difference(list(self.groups))}"
             )
 
         files = list(filter(lambda f: self.format(f)[0] in groups, files))
@@ -761,8 +752,7 @@ def get_files(
         year: Optional[Union[str, int, list]] = None,
     ) -> List[File]:
         files = list(
-            filter(lambda f: f.extension.upper()
-                   in [".DBC", ".DBF"], self.files)
+            filter(lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files)
         )
 
         if dis_code:
@@ -770,8 +760,7 @@ def get_files(
 
             if codes and not all(code in self.diseases for code in codes):
                 raise ValueError(
-                    f"Unknown disease(s): {set(
-                        codes).difference(set(self.diseases))}"
+                    f"Unknown disease(s): {set(codes).difference(set(self.diseases))}"
                 )
 
             files = list(filter(lambda f: self.format(f)[0] in codes, files))
diff --git a/pysus/api/ftp/models.py b/pysus/api/ftp/models.py
index 56632e34..d6a0bb0e 100644
--- a/pysus/api/ftp/models.py
+++ b/pysus/api/ftp/models.py
@@ -1,29 +1,524 @@
-import dateparser
-from pydantic import BaseModel, ConfigDict, field_validator
-from typing import Optional, Union
+from __future__ import annotations
+
+__all__ = ["File", "Directory", "Database"]
+
+import asyncio
+import os
+import pathlib
 from datetime import datetime
+from ftplib import FTP
+from typing import (
+    Any,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Union,
+    TypedDict,
+)
+
+from aioftp import Client
+from loguru import logger
+from tqdm import tqdm
+from typing_extensions import Self
+
+from pysus import CACHEPATH
+from pysus.data.local import Data
+from pysus.utils import to_list
+from .client import FTPSingleton
+
+
+DIRECTORY_CACHE: Dict[str, "Directory"] = {}
+FileContent = Dict[str, Union["Directory", "File"]]
+
+
+class FileInfo(TypedDict):
+    """File information dictionary type"""
+
+    size: Union[int, str]
+    type: str
+    modify: datetime
+
+
+class File:
+    """
+    FTP File representation with improved type safety.
+
+    This class provides methods for interacting with files on the DataSUS FTP
+    server. It includes functionality for downloading files synchronously and
+    asynchronously, as well as retrieving file information in a human-readable
+    format.
+
+    Attributes:
+        name (str): The name of the file without the extension.
+        extension (str): The file extension.
+        basename (str): The full name of the file including the extension.
+        path (str): The full path to the file on the FTP server.
+        parent_path (str): The directory path where the file is located on the
+            FTP server.
+        __info (FileInfo): Metadata about the file, including size, type, and
+            modification date.
+
+    Methods:
+        info() -> Dict[str, str]:
+            Returns a dictionary with human-readable file information,
+            including size, type, and modification date.
+
+        download(
+            local_dir: str = CACHEPATH, _pbar: Optional[tqdm] = None
+        ) -> Data:
+            Downloads the file to the specified local directory. If a progress
+            bar (_pbar) is provided, it updates the progress bar during the
+            download.
+
+        async_download(local_dir: str = CACHEPATH) -> Data:
+            Asynchronously downloads the file to the specified local directory.
+
+        _line_parser(file_line: bytes) -> Tuple[str, Dict[str, Any]]:
+            Static method to parse a line from the FTP LIST command and
+            extract file information.
+    """
+
+    def __init__(self, path: str, name: str, info: FileInfo) -> None:
+        self.name, self.extension = os.path.splitext(name)
+        self.basename: str = f"{self.name}{self.extension}"
+        self.path: str = (
+            f"{path}/{self.basename}"
+            if not path.endswith("/")
+            else f"{path}{self.basename}"
+        )
+        self.parent_path: str = os.path.dirname(self.path)
+        self.__info: FileInfo = info
+
+    @property
+    def info(self) -> Dict[str, str]:
+        """Returns a dictionary with human-readable file information"""
+        return {
+            "size": self.__info["size"],
+            "type": f"{self.extension[1:].upper()}",
+            "modify": self.__info["modify"].strftime("%Y-%m-%d %I:%M%p"),
+        }
+
+    def download(
+        self, local_dir: str = CACHEPATH, _pbar: Optional[tqdm] = None
+    ) -> Data:
+        """Downloads the file to the specified local directory"""
+        target_dir = pathlib.Path(local_dir)
+        target_dir.mkdir(exist_ok=True, parents=True)
+
+        filepath = target_dir / self.basename
+        filesize = int(self.__info["size"])
+
+        # Check for existing files
+        for ext in (".parquet", ".dbf", ""):
+            existing = filepath.with_suffix(ext)
+            if existing.exists():
+                if _pbar:
+                    _pbar.update(filesize - _pbar.n)
+                return Data(str(existing), _pbar=_pbar)  # type: ignore
+
+        if _pbar:
+            _pbar.unit = "B"
+            _pbar.unit_scale = True
+            _pbar.reset(total=filesize)
+            _pbar.set_description(self.basename)
 
+        try:
+            ftp = FTPSingleton.get_instance()
+            with open(filepath, "wb") as output:
 
-class FileDescription(BaseModel):
-    model_config = ConfigDict(coerce_numbers_to_str=True)
+                def callback(data: bytes) -> None:
+                    output.write(data)
+                    if _pbar:
+                        _pbar.update(len(data))
+
+                ftp.retrbinary(f"RETR {self.path}", callback)
+
+        except Exception as exc:
+            if filepath.exists():
+                filepath.unlink()
+            raise exc
+        finally:
+            FTPSingleton.close()
+
+        if _pbar:
+            _pbar.update(filesize - _pbar.n)
+        return Data(str(filepath), _pbar=_pbar)  # type: ignore
+
+    async def async_download(self, local_dir: str = CACHEPATH) -> Data:
+        """
+        Asynchronously downloads the file to the specified local directory
+        """
+        target_dir = pathlib.Path(local_dir)
+        target_dir.mkdir(exist_ok=True, parents=True)
+        filepath = target_dir / self.basename
+
+        # Check existing files
+        for ext in (".parquet", ".dbf", ""):
+            existing = filepath.with_suffix(ext)
+            if existing.exists():
+                return Data(str(existing))  # type: ignore
+
+        async with Client.context(
+            host="ftp.datasus.gov.br", parse_list_line_custom=self._line_parser
+        ) as client:
+            await client.login()
+            await client.download(self.path, str(filepath), write_into=True)
+
+        return Data(str(filepath))  # type: ignore
+
+    @staticmethod
+    def _line_parser(file_line: bytes) -> Tuple[str, Dict[str, Any]]:
+        """Static method to parse a line from the FTP LIST command and extract
+        file information
+        """
+        line = file_line.decode("utf-8")
+        if "<DIR>" in line:
+            date, time, _, *name = line.strip().split()
+            info = {"size": 0, "type": "dir"}
+            name = " ".join(name)
+        else:
+            date, time, size, name = line.strip().split()
+            info = {"size": size, "type": "file"}
+
+        modify = datetime.strptime(f"{date} {time}", "%m-%d-%y %I:%M%p")
+        info["modify"] = modify.strftime("%m/%d/%Y %I:%M%p")
+        return name, info
+
+    def __str__(self) -> str:
+        return str(self.basename)
+
+    def __repr__(self) -> str:
+        return str(self.basename)
+
+    def __hash__(self):
+        return hash(self.path)
+
+    def __eq__(self, other):
+        if isinstance(other, File):
+            return self.path == other.path
+        return False
+
+
+class Directory:
+    """
+    Directory class with caching and lazy loading.
+
+    The Directory class represents a directory in a file system and includes
+    mechanisms for caching instances and lazy loading of directory content.
+    When a Directory instance is created, it normalizes the provided path
+    and caches the instance. The content of the directory is not loaded
+    immediately; instead, it is loaded when the `content` property or the
+    `load` method is accessed or called.
+
+    Attributes:
+        path (str): The normalized path of the directory.
+        name (str): The name of the directory.
+        parent (Directory): The parent directory instance.
+        loaded (bool): Indicates whether the directory content has been loaded.
+        __content__ (Dict[str, Union[File, Directory]]): A dictionary
+            containing the directory's content, with names as keys and File or
+            Directory instances as values.
+
+    Methods:
+        _normalize_path(path: str) -> str: Normalizes the given path.
+        _get_root_directory() -> Directory: Returns the root directory
+            instance, creating it if necessary.
+        _init_root_child(name: str) -> None: Initializes a root child
+            directory.
+        _init_regular(parent_path: str, name: str) -> None: Initializes a
+            regular directory.
+        content() -> List[Union[Directory, File]]: Returns the content of the
+            directory, loading it if necessary.
+        load() -> Self: Loads the content of the directory and marks it as
+            loaded.
+    """
 
     name: str
-    group: str
-    year: int
-    size: int
-    last_update: datetime
-    uf: Optional[str] = None
-    month: Optional[str] = None
-    disease: Optional[str] = None
-
-    @field_validator("last_update", mode="before")
+    path: str
+    parent: "Directory"
+    loaded: bool
+    __content__: Dict[str, Union[File, "Directory"]]
+
+    def __new__(cls, path: str, _is_root_child: bool = False) -> "Directory":
+        normalized_path = os.path.normpath(path)
+
+        # Handle root directory case
+        if normalized_path == "/":
+            return cls._get_root_directory()
+
+        # Return cached instance if exists
+        if normalized_path in DIRECTORY_CACHE:
+            return DIRECTORY_CACHE[normalized_path]
+
+        # Use os.path.split for reliable path splitting
+        parent_path, name = os.path.split(normalized_path)
+
+        # Handle empty parent path
+        if not parent_path:
+            parent_path = "/"
+        # Handle parent paths that don't start with /
+        elif not parent_path.startswith("/"):
+            parent_path = "/" + parent_path
+
+        # Create new instance
+        instance = super().__new__(cls)
+        instance.path = normalized_path
+
+        if _is_root_child:
+            instance._init_root_child(name)
+        else:
+            instance._init_regular(parent_path, name)
+
+        DIRECTORY_CACHE[normalized_path] = instance
+        return instance
+
+    @staticmethod
+    def _normalize_path(path: str) -> str:
+        """Normalizes the given path"""
+        path = f"/{path}" if not path.startswith("/") else path
+        return path.removesuffix("/")
+
     @classmethod
-    def parse_modify_date(cls, v: Union[str, datetime]) -> datetime:
-        if isinstance(v, datetime):
-            return v
+    def _get_root_directory(cls) -> Directory:
+        """Returns the root directory instance, creating it if necessary"""
+        if "/" not in DIRECTORY_CACHE:
+            root = super().__new__(cls)
+            root.parent = root
+            root.name = "/"
+            root.path = "/"
+            root.loaded = False
+            root.__content__ = {}
+            DIRECTORY_CACHE["/"] = root
+        return DIRECTORY_CACHE["/"]
+
+    def _init_root_child(self, name: str) -> None:
+        """Initializes a root child directory"""
+        self.parent = DIRECTORY_CACHE["/"]
+        self.name = name
+        self.loaded = False
+        self.__content__ = {}
+
+    def _init_regular(self, parent_path: str, name: str) -> None:
+        """Initializes a regular directory"""
+        self.parent = Directory(parent_path)
+        self.name = name
+        self.loaded = False
+        self.__content__ = {}
+
+    @property
+    def content(self) -> List[Union[Directory, File]]:
+        """Returns the content of the directory, loading it if necessary"""
+        if not self.loaded:
+            self.load()
+        return list(self.__content__.values())
+
+    def load(self) -> Self:
+        """Loads the content of the directory and marks it as loaded"""
+        self.__content__ |= load_directory_content(self.path)
+        self.loaded = True
+        return self
+
+    def reload(self):
+        """
+        Reloads the content of the Directory
+        """
+        self.loaded = False
+        return self.load()
+
+    def __str__(self) -> str:
+        return self.path
+
+    def __repr__(self) -> str:
+        return self.path
+
+    def __hash__(self):
+        return hash(self.path)
+
+    def __eq__(self, other):
+        if isinstance(other, Directory):
+            return self.path == other.path
+        return False
+
+
+def load_directory_content(path: str) -> FileContent:
+    """Directory content loading"""
+    content: FileContent = {}
+
+    try:
+        ftp = FTPSingleton.get_instance()
+        ftp.cwd(path)
+        path = path.removesuffix("/")
+
+        def line_parser(line: str):
+            if "<DIR>" in line:
+                date, time, _, name = line.strip().split(maxsplit=3)
+                modify = datetime.strptime(f"{date} {time}", "%m-%d-%y %I:%M%p")
+                info = {"size": 0, "type": "dir", "modify": modify}
+                xpath = f"{path}/{name}"
+                content[name] = Directory(xpath)
+            else:
+                date, time, size, name = line.strip().split(maxsplit=3)
+                modify = datetime.strptime(f"{date} {time}", "%m-%d-%y %I:%M%p")
+                info: FileInfo = {
+                    "size": size,
+                    "type": "file",
+                    "modify": modify,
+                }
+                content[name] = File(path, name, info)
+
+        ftp.retrlines("LIST", line_parser)
+    except Exception as exc:
+        raise exc
+    finally:
+        FTPSingleton.close()
+
+    to_remove = [
+        name
+        for name in content
+        if name.upper().endswith(".DBF")
+        and name.upper().replace(".DBF", ".DBC") in content
+    ]
+
+    for name in to_remove:
+        del content[name]
+
+    return content
+
+
+class Database:
+    """
+    Base class for PySUS databases. Contains common functions
+    for accessing DataSUS FTP server. With this class, it is
+    possible to construct database classes for different DataSUS
+    files, sharing state and functionalities.
+
+    Parameters
+        ftp [FTP]: ftplib.FTP object for connecting in DataSUS server.
+        name [str]: database name
+        paths [list[Directory]]: server paths where the files are located
+        files [list[Files]]: list of parsed Files from Database content
+        metadata [dict]: dict containing database's metadata information
+
+    Methods
+        load(): Loads the database paths content to its own content
+        describe(file): describes a file according to each database's
+                        spec. Returns a dict with file information
+        format(file): extracts from file name database related info, such as
+                      year, month, UF and/or other useful info for the DB
+        get_files(Any): filters files using database related format, depending
+                        on the database's files specs
+    """
+
+    ftp: FTP
+    name: str
+    paths: Tuple[Directory, ...]
+    metadata: dict
+    __content__: Dict[str, Union[Directory, File]]
+
+    def __init__(self) -> None:
+        self.ftp = FTP("ftp.datasus.gov.br")
+        self.__content__ = {}
+
+    def __repr__(self) -> str:
+        return f"{self.name} - {self.metadata['long_name']}"
+
+    @property
+    def content(self) -> List[Union[Directory, File]]:
+        """
+        Lists Database content. The `paths` will be loaded if this property is
+        called or if explicitly using `load()`. To add specific Directory
+        inside content, `load()` the directory and call `content` again.
+        """
+        if not self.__content__:
+            logger.info("content is not loaded, use `load()` to load default paths")
+            return []
+        return sorted(list(self.__content__.values()), key=str)
+
+    @property
+    def files(self) -> List[File]:
+        """
+        Lists Files inside content. To load a specific Directory inside
+        content, just `load()` this directory and list files again.
+        """
+        return [f for f in self.content if isinstance(f, File)]
+
+    def load(
+        self,
+        directories: Optional[
+            Union[Directory, List[Directory], Tuple[Directory, ...]]
+        ] = None,
+    ) -> Database:
+        """
+        Loads specific directories to Database content. Will aggregate the
+        files found within Directories into Database.content.
+        """
+        if not directories:
+            directories = list(self.paths)
+
+        directories_list = to_list(directories)
+
+        for directory in directories_list:
+            if not isinstance(directory, Directory):
+                raise ValueError("Invalid directory provided.")
+
+            directory.load()
+            self.__content__.update(directory.__content__)
+        return self
+
+    def describe(self, file: File) -> dict:
+        """
+        Receives a `File` and returns a dict with its information,
+        according to the database's specifications. This method is
+        helpful to return the FTP's file in a humanized format
+
+        Parameters
+            file [File]: a `File` instance
+        """
+        ...
+
+    def format(self, file: File) -> tuple:
+        """
+        Formats a File based on the database specifications,
+        extracting its name's parameters given a pattern.
+
+        Parameters
+            file [File]: a `File` instance
+        """
+        ...
+
+    def get_files(self, *args, **kwargs) -> list[File]:
+        """
+        Filters the list of `File`s according to each database file
+        pattern, as UFs, Groups, Years, Months, etc. This method will
+        also be responsible to look for wrong values within the file
+        pattern and possible extra characters in its basename
+        """
+        ...
+
+    def download(self, files: List[File], local_dir: str = CACHEPATH) -> List[str]:
+        """
+        Downloads a list of Files.
+        """
+        files = to_list(files)
+        pbar = tqdm(total=len(files), dynamic_ncols=True)
+        dfiles = []
+        for file in files:
+            if isinstance(file, File):
+                dfiles.append(file.download(local_dir=local_dir, _pbar=pbar))
+        pbar.close()
+        if len(dfiles) == 1:
+            return dfiles[0]
+        return dfiles
+
+    async def async_download(self, files: List[File], local_dir: str = CACHEPATH):
+        """
+        Asynchronously downloads a list of files
+        """
 
-        parsed = dateparser.parse(str(v))
-        if parsed:
-            return parsed
+        async def download_file(file):
+            if isinstance(file, File):
+                await file.async_download(local_dir=local_dir)
 
-        return datetime.now()
+        tasks = [download_file(file) for file in files]
+        await asyncio.gather(*tasks)
diff --git a/pysus/api/models.py b/pysus/api/models.py
new file mode 100644
index 00000000..56632e34
--- /dev/null
+++ b/pysus/api/models.py
@@ -0,0 +1,29 @@
+import dateparser
+from pydantic import BaseModel, ConfigDict, field_validator
+from typing import Optional, Union
+from datetime import datetime
+
+
+class FileDescription(BaseModel):
+    model_config = ConfigDict(coerce_numbers_to_str=True)
+
+    name: str
+    group: str
+    year: int
+    size: int
+    last_update: datetime
+    uf: Optional[str] = None
+    month: Optional[str] = None
+    disease: Optional[str] = None
+
+    @field_validator("last_update", mode="before")
+    @classmethod
+    def parse_modify_date(cls, v: Union[str, datetime]) -> datetime:
+        if isinstance(v, datetime):
+            return v
+
+        parsed = dateparser.parse(str(v))
+        if parsed:
+            return parsed
+
+        return datetime.now()
diff --git a/pysus/management/ingest.py b/pysus/management/ingest.py
new file mode 100644
index 00000000..fc72c3d2
--- /dev/null
+++ b/pysus/management/ingest.py
@@ -0,0 +1,116 @@
+import requests
+from typing import Literal, List
+from pathlib import Path
+
+import boto3
+import duckdb
+from sqlalchemy.orm import sessionmaker
+from sqlalchemy import create_engine
+from botocore.config import Config
+
+from pysus import CACHEPATH
+from pysus.api.ducklake.models import Dataset, DatasetGroup, File, DatasetMetadata
+from pysus.api.ftp import File as FTPFile
+from pysus.api.dadosgov.models import Resource
+
+
+class S3Client:
+    def __init__(self, access_key: str, secret_key: str):
+        self.access_key = access_key
+        self.secret_key = secret_key
+        self.bucket = "pysus"
+        self.endpoint = "nbg1.your-objectstorage.com"
+        self.catalog_local = CACHEPATH / "catalog.db"
+        self.catalog_remote = "public/catalog.db"
+
+        self.s3 = boto3.client(
+            "s3",
+            endpoint_url=f"https://{self.endpoint}",
+            aws_access_key_id=access_key,
+            aws_secret_access_key=secret_key,
+            region_name="nbg1",
+            config=Config(signature_version="s3v4"),
+        )
+        self.db = None
+
+    def __enter__(self):
+        self.download_catalog()
+        self.db = duckdb.connect()
+        self._configure_duckdb()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        if self.db:
+            self.db.close()
+        if exc_type is None:
+            self.upload_catalog()
+
+    @property
+    def catalog_url(self) -> str:
+        return f"https://{self.endpoint}/{self.bucket}/{self.catalog_remote}"
+
+    def _configure_duckdb(self):
+        self.db.execute("INSTALL ducklake; LOAD ducklake;")
+        self.db.execute(f"""
+            SET s3_endpoint='{self.endpoint}';
+            SET s3_region='nbg1';
+            SET s3_url_style='path';
+            SET s3_use_ssl=true;
+            SET s3_access_key_id='{self.access_key}';
+            SET s3_secret_access_key='{self.secret_key}';
+        """)
+        self.db.execute(f"ATTACH 'ducklake:{self.catalog_local}' AS pysus;")
+        self.db.execute("USE pysus;")
+
+    def download_catalog(self):
+        self.catalog_local.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            r = requests.get(self.catalog_url)
+            r.raise_for_status()
+            with self.catalog_local.open("wb") as f:
+                f.write(r.content)
+        except requests.exceptions.RequestException:
+            pass
+
+    def upload_catalog(self):
+        self.s3.upload_file(
+            str(self.catalog_local),
+            self.bucket,
+            self.catalog_remote,
+        )
+
+
+class Ingestor:
+    def __init__(
+        self,
+        client: S3Client,
+    ):
+        self.client = client
+        self.session = sessionmaker(
+            bind=create_engine(f"duckdb:///{client.catalog_local}")
+        )
+
+    def ingest(
+        self,
+        origin: Literal["ftp", "dadosgov"],
+        file: FTPFile | Resource,
+        force: bool = False,
+    ) -> None: ...
+
+    def bulk_ingest(
+        self,
+        origin: Literal["ftp", "dadosgov"],
+        files: List[FTPFile | Resource],
+    ) -> None: ...
+
+    def _ftp_ingest(self, file: FTPFile) -> None: ...
+
+    def _dadosgov_ingest(self, file: Resource) -> None: ...
+
+    def _should_insert(self, file: FTPFile | Resource) -> bool: ...
+
+    def _download_file(self, file: FTPFile | Resource) -> Path: ...
+
+    def _extract_metadata(self, file: FTPFile | Resource) -> File: ...
+
+    def _upload_parquet(self, parquet: Path, metadata: File) -> None: ...
diff --git a/pysus/management/utils.py b/pysus/management/utils.py
new file mode 100644
index 00000000..cbe14e9b
--- /dev/null
+++ b/pysus/management/utils.py
@@ -0,0 +1,16 @@
+import duckdb
+from pathlib import Path
+
+
+def csv_to_parquet(csv_file: Path) -> Path:
+    parquet = csv_file.with_suffix(".parquet")
+    con = duckdb.connect()
+    con.execute(f"""
+        COPY (
+            SELECT *
+            FROM read_csv_auto('{csv_file}')
+        )
+        TO '{parquet}'
+        (FORMAT PARQUET)
+    """)
+    return parquet