Skip to content

Commit 3d96d5c

Browse files
authored
Merge pull request #208 from ehinman/add-waterdata-demo
Add waterdata demo
2 parents 67444e5 + 52c6a41 commit 3d96d5c

8 files changed

Lines changed: 701 additions & 39 deletions

File tree

README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,12 @@
66

77
## Latest Announcements
88

9-
:mega: **12/04/2025:** `dataretrieval` now features the new `waterdata` module,
9+
:mega: **01/16/2025:** `dataretrieval` now features the `waterdata` module,
1010
which provides access to USGS's modernized [Water Data
1111
APIs](https://api.waterdata.usgs.gov/). The Water Data API endpoints include
1212
daily values, **instantaneous values**, field measurements, time series metadata,
13-
and discrete water quality data from the Samples database. This new module will
14-
eventually replace the `nwis` module, which provides access to the legacy [NWIS
15-
Water Services](https://waterservices.usgs.gov/).
13+
and discrete water quality data from the [Samples database](https://waterdata.usgs.gov/download-samples/#dataProfile=site). This new module replaces the `nwis` module, which provides access to the legacy [NWIS
14+
Water Services](https://waterservices.usgs.gov/). Take a look at the new [`waterdata` module demo notebook](demos/WaterData_demo.ipynb), which walks through an extended example using a majority of the available `waterdata` functions.
1615

1716
Check out the [NEWS](NEWS.md) file for all updates and announcements.
1817

dataretrieval/waterdata/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
# Public API exports
1313
from .api import (
14-
_check_profiles,
1514
get_codes,
1615
get_continuous,
1716
get_daily,
@@ -41,7 +40,6 @@
4140
"get_reference_table",
4241
"get_samples",
4342
"get_time_series_metadata",
44-
"_check_profiles",
4543
"CODE_SERVICES",
4644
"SERVICES",
4745
"PROFILES",

dataretrieval/waterdata/api.py

Lines changed: 26 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@
1717
from dataretrieval.waterdata.types import (
1818
CODE_SERVICES,
1919
METADATA_COLLECTIONS,
20-
PROFILE_LOOKUP,
2120
PROFILES,
2221
SERVICES,
2322
)
2423
from dataretrieval.waterdata.utils import (
2524
SAMPLES_URL,
2625
get_ogc_data,
2726
_construct_api_requests,
28-
_walk_pages
27+
_walk_pages,
28+
_check_profiles
2929
)
3030

3131
# Set up logger for this module
@@ -691,9 +691,13 @@ def get_time_series_metadata(
691691
parameter_name: Optional[Union[str, List[str]]] = None,
692692
properties: Optional[Union[str, List[str]]] = None,
693693
statistic_id: Optional[Union[str, List[str]]] = None,
694+
hydrologic_unit_code: Optional[Union[str, List[str]]] = None,
695+
state_name: Optional[Union[str, List[str]]] = None,
694696
last_modified: Optional[Union[str, List[str]]] = None,
695697
begin: Optional[Union[str, List[str]]] = None,
696698
end: Optional[Union[str, List[str]]] = None,
699+
begin_utc: Optional[Union[str, List[str]]] = None,
700+
end_utc: Optional[Union[str, List[str]]] = None,
697701
unit_of_measure: Optional[Union[str, List[str]]] = None,
698702
computation_period_identifier: Optional[Union[str, List[str]]] = None,
699703
computation_identifier: Optional[Union[str, List[str]]] = None,
@@ -742,6 +746,17 @@ def get_time_series_metadata(
742746
Example codes include 00001 (max), 00002 (min), and 00003 (mean).
743747
A complete list of codes and their descriptions can be found at
744748
https://help.waterdata.usgs.gov/code/stat_cd_nm_query?stat_nm_cd=%25&fmt=html.
749+
hydrologic_unit_code : string or list of strings, optional
750+
The United States is divided and sub-divided into successively smaller
751+
hydrologic units which are classified into four levels: regions,
752+
sub-regions, accounting units, and cataloging units. The hydrologic
753+
units are arranged within each other, from the smallest (cataloging units)
754+
to the largest (regions). Each hydrologic unit is identified by a unique
755+
hydrologic unit code (HUC) consisting of two to eight digits based on the
756+
four levels of classification in the hydrologic unit system.
757+
state_name : string or list of strings, optional
758+
The name of the state or state equivalent in which the monitoring location
759+
is located.
745760
last_modified : string, optional
746761
The last time a record was refreshed in our database. This may happen
747762
due to regular operational processes and does not necessarily indicate
@@ -760,6 +775,14 @@ def get_time_series_metadata(
760775
for the last 36 hours
761776
762777
begin : string or list of strings, optional
778+
This field contains the same information as "begin_utc", but in the
779+
local time of the monitoring location. It is retained for backwards
780+
compatibility, but will be removed in V1 of these APIs.
781+
end : string or list of strings, optional
782+
This field contains the same information as "end_utc", but in the
783+
local time of the monitoring location. It is retained for backwards
784+
compatibility, but will be removed in V1 of these APIs.
785+
begin_utc : string or list of strings, optional
763786
The datetime of the earliest observation in the time series. Together
764787
with end, this field represents the period of record of a time series.
765788
Note that some time series may have large gaps in their collection
@@ -776,7 +799,7 @@ def get_time_series_metadata(
776799
* Half-bounded intervals: "2018-02-12T00:00:00Z/.." or "../2018-03-18T12:31:12Z"
777800
* Duration objects: "P1M" for data from the past month or "PT36H" for the last 36 hours
778801
779-
end : string or list of strings, optional
802+
end_utc : string or list of strings, optional
780803
The datetime of the most recent observation in the time series. Data returned by
781804
this endpoint updates at most once per day, and potentially less frequently than
782805
that, and as such there may be more recent observations within a time series
@@ -1703,31 +1726,3 @@ def get_samples(
17031726

17041727
return df, BaseMetadata(response)
17051728

1706-
1707-
def _check_profiles(
1708-
service: SERVICES,
1709-
profile: PROFILES,
1710-
) -> None:
1711-
"""Check whether a service profile is valid.
1712-
1713-
Parameters
1714-
----------
1715-
service : string
1716-
One of the service names from the "services" list.
1717-
profile : string
1718-
One of the profile names from "results_profiles",
1719-
"locations_profiles", "activities_profiles",
1720-
"projects_profiles" or "organizations_profiles".
1721-
"""
1722-
valid_services = get_args(SERVICES)
1723-
if service not in valid_services:
1724-
raise ValueError(
1725-
f"Invalid service: '{service}'. Valid options are: {valid_services}."
1726-
)
1727-
1728-
valid_profiles = PROFILE_LOOKUP[service]
1729-
if profile not in valid_profiles:
1730-
raise ValueError(
1731-
f"Invalid profile: '{profile}' for service '{service}'. "
1732-
f"Valid options are: {valid_profiles}."
1733-
)

dataretrieval/waterdata/utils.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import os
55
import re
66
from datetime import datetime
7-
from typing import Any, Dict, List, Optional, Tuple, Union
7+
from typing import Any, Dict, List, Optional, Tuple, Union, get_args
88

99
import pandas as pd
1010
import requests
@@ -13,6 +13,12 @@
1313
from dataretrieval.utils import BaseMetadata
1414
from dataretrieval import __version__
1515

16+
from dataretrieval.waterdata.types import (
17+
PROFILE_LOOKUP,
18+
PROFILES,
19+
SERVICES,
20+
)
21+
1622
try:
1723
import geopandas as gpd
1824

@@ -498,6 +504,7 @@ def _get_resp_data(resp: requests.Response, geopd: bool) -> pd.DataFrame:
498504
)
499505
df.columns = [col.replace("properties_", "") for col in df.columns]
500506
df.rename(columns={"geometry_coordinates": "geometry"}, inplace=True)
507+
df = df.loc[:, ~df.columns.duplicated()]
501508
return df
502509

503510
# Organize json into geodataframe and make sure id column comes along.
@@ -824,3 +831,31 @@ def get_ogc_data(
824831
return return_list, metadata
825832

826833

834+
def _check_profiles(
835+
service: SERVICES,
836+
profile: PROFILES,
837+
) -> None:
838+
"""Check whether a service profile is valid.
839+
840+
Parameters
841+
----------
842+
service : string
843+
One of the service names from the "services" list.
844+
profile : string
845+
One of the profile names from "results_profiles",
846+
"locations_profiles", "activities_profiles",
847+
"projects_profiles" or "organizations_profiles".
848+
"""
849+
valid_services = get_args(SERVICES)
850+
if service not in valid_services:
851+
raise ValueError(
852+
f"Invalid service: '{service}'. Valid options are: {valid_services}."
853+
)
854+
855+
valid_profiles = PROFILE_LOOKUP[service]
856+
if profile not in valid_profiles:
857+
raise ValueError(
858+
f"Invalid profile: '{profile}' for service '{service}'. "
859+
f"Valid options are: {valid_profiles}."
860+
)
861+

0 commit comments

Comments
 (0)