Example: Investigating Significant Wave Height - Southern California

Hide code cell content
import intake_erddap
import intake
import numpy as np

import cartopy.crs as ccrs

import pandas as pd

from matplotlib import pyplot as plt
from shapely import geometry

from datetime import datetime

def figure(*args, figsize=(18, 8), facecolor='white', **kwargs):
    return plt.subplots(*args, figsize=figsize, facecolor=facecolor, **kwargs)

Here’s an example of finding all stations that have significant wave height from the main IOOS ERDDAP server.

server = 'https://erddap.sensors.ioos.us/erddap'
cat = intake.open_erddap_cat(
    server=server,
    standard_names=["sea_surface_wind_wave_significant_height"]
)
df = pd.DataFrame([i.metadata for i in cat.values()])
sub_df = df[['datasetID', 'minTime', 'maxTime', 'title']][:5]
sub_df.style.set_table_attributes('class="dataframe docutils"').hide(axis="index")
datasetID minTime maxTime title
wmo_46023 2010-01-01T00:50:00Z 2010-09-08T13:50:00Z 17 NM WNW of Point Arguello, CA (46023)
ism-cencoos-wmo_46059 2021-12-04T03:30:00Z 2023-09-12T11:10:00Z 357 NM West of San Francisco, CA (46059)
wmo_46059 2010-12-12T01:50:00Z 2023-09-12T10:20:00Z 357 NM West of San Francisco, CA (46059)
ism-secoora-wmo_41001 2021-07-29T18:00:00Z 2023-09-12T11:00:00Z 41001 - EAST HATTERAS - 150 NM East of Cape Hatteras
wmo_41001 2015-12-11T02:20:00Z 2023-09-12T11:00:00Z 41001 - EAST HATTERAS - 150 NM East of Cape Hatteras

We can plot the locations of these stations on the globe.

fig, ax = figure(subplot_kw=dict(projection=ccrs.PlateCarree()))
ax.coastlines()
ax.gridlines(draw_labels=True)
ax.scatter(df['minLongitude'], df['minLatitude'])
<matplotlib.collections.PathCollection at 0x7f44dfee1fa0>
/home/docs/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/110m_physical/ne_110m_coastline.zip
  warnings.warn(f'Downloading: {url}', DownloadWarning)
../_images/75fa2b8a2a40d5ca17ef03eaa316bc78ac9f43b0371a32187e10cbacd255f6a2.png

Since our region of interest is off the coast of Southern California, we’ll specify a bounding box and highlight the stations that intersect our region.

# Southern California Region
bbox = (-122.42, 32.04, -115.40, 35.28)
box = geometry.box(*bbox)

fig, ax = figure(subplot_kw=dict(projection=ccrs.PlateCarree()))
ax.coastlines()
ax.gridlines(draw_labels=True)
ax.scatter(df['minLongitude'], df['minLatitude'])
ax.add_geometries([box], facecolor='red', alpha=0.4, crs=ccrs.PlateCarree())
ax.set_extent([-130., -60., 20., 45.], crs=ccrs.PlateCarree())
/home/docs/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/50m_physical/ne_50m_coastline.zip
  warnings.warn(f'Downloading: {url}', DownloadWarning)
../_images/f736aa5af350925282d4e1cd63ff4c5f414749d5ab2819206a1d9232d9e250d4.png

We can pass this bounding box directly to the ERDDAP Catalog constructor, as well as limit our query only to stations that contain data after 2014:

cat = intake.open_erddap_cat(
    server=server,
    bbox=bbox,
    start_time=datetime(2014, 1, 1),
    standard_names=["sea_surface_wind_wave_significant_height"]
)

len(cat)
13
df = pd.DataFrame([i.metadata for i in cat.values()])
sub_df = df[['datasetID', 'minTime', 'maxTime', 'title']]
sub_df.style.set_table_attributes('class="dataframe docutils"').hide(axis="index")
datasetID minTime maxTime title
ism-cencoos-wmo_46025 2021-07-29T17:50:00Z 2023-09-12T11:00:00Z 46025 - Santa Monica Basin - 33NM WSW of Santa Monica, CA
ism-cencoos-wmo_46047 2021-07-29T17:50:00Z 2023-09-12T11:40:00Z 46047 - TANNER BANK - 121NM West of San Diego, CA
ism-cencoos-wmo_46086 2021-07-29T17:50:00Z 2023-09-12T11:00:00Z 46086 - SAN CLEMENTE BASIN - 27NM SE OF San Clemente Is, CA
ism-cencoos-edu_ucsd_cdip_46219 2021-07-29T18:26:00Z 2023-02-27T22:26:00Z 46219 - San Nicolas Island, CA (067)
edu_ucsd_cdip_46242 2015-05-05T12:28:00Z 2019-07-27T15:28:00Z 46242 - Camp Pendleton Nearshore, CA (043)
ism-cencoos-edu_ucsd_cdip_46251 2021-07-29T17:56:00Z 2023-02-27T22:26:00Z 46251 - Santa Cruz Basin, CA
edu_ucsd_cdip_46255 2015-12-11T02:07:00Z 2017-04-02T04:16:00Z 46255 - Begg Rock, CA (138)
ism-cencoos-mil_army_usace_46259 2021-07-29T17:56:00Z 2023-02-28T00:56:00Z 46259 - Santa Lucia Escarpment, CA (222)
edu_ucsd_cdip_46217 2015-05-05T14:07:00Z 2019-05-13T02:04:00Z Anacapa Passage, CA - 111 (46217)
ism-cencoos-edu_ucsd_cdip_46215 2021-07-29T17:56:00Z 2023-04-20T15:26:00Z Diablo Canyon, CA - 076 (46215)
ism-cencoos-wmo_46011 2021-07-29T17:50:00Z 2023-09-12T11:00:00Z Santa Maria - 21 NM NW of Point Arguello, CA (46011)
wmo_46011 2010-02-16T22:50:00Z 2023-09-12T10:30:00Z Santa Maria - 21 NM NW of Point Arguello, CA (46011)
ism-cencoos-wmo_46054 2021-07-29T17:50:00Z 2023-09-12T11:30:00Z West Santa Barbara 38 NM West of Santa Barbara, CA (46054)
# Southern California Region
bbox = (-122.42, 32.04, -115.40, 35.28)
box = geometry.box(*bbox)

fig, ax = figure(subplot_kw=dict(projection=ccrs.PlateCarree()))
ax.coastlines()
ax.gridlines(draw_labels=True)
ax.scatter(df['minLongitude'], df['minLatitude'])
ax.set_title("Station Locations")
Text(0.5, 1.0, 'Station Locations')
/home/docs/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/10m_physical/ne_10m_coastline.zip
  warnings.warn(f'Downloading: {url}', DownloadWarning)
../_images/ae98d1519e1611e4cab511e50b114b68d895672d7874e3715b2c5f964c29aa75.png

We can now interrogate each of those stations and get a timeseries for the significant wave height data.

# Just get 4
stations = list(cat)[:4]

fig, axs = figure(nrows=len(stations), figsize=(18,18))

for i, dataset_id in enumerate(stations):
    ax = axs[i]
    source = cat[dataset_id]
    df = source.read()
    t = df['time (UTC)'].astype('M8[s]')
    sig_wave_height = df['sea_surface_wave_significant_height (m)']
    ax.plot(t, sig_wave_height)
    ax.set_title(f'{dataset_id} Significant Wave Height (m)')
    ax.set_xlim(np.datetime64('2014-01-01'), np.datetime64('2022-12-01'))
    ax.grid()
fig.tight_layout(pad=1)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[9], line 10
      8 source = cat[dataset_id]
      9 df = source.read()
---> 10 t = df['time (UTC)'].astype('M8[s]')
     11 sig_wave_height = df['sea_surface_wave_significant_height (m)']
     12 ax.plot(t, sig_wave_height)

File ~/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/pandas/core/generic.py:6532, in NDFrame.astype(self, dtype, copy, errors)
   6528     results = [ser.astype(dtype, copy=copy) for _, ser in self.items()]
   6530 else:
   6531     # else, only a single dtype is given
-> 6532     new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
   6533     res = self._constructor_from_mgr(new_data, axes=new_data.axes)
   6534     return res.__finalize__(self, method="astype")

File ~/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/pandas/core/internals/managers.py:414, in BaseBlockManager.astype(self, dtype, copy, errors)
    411 elif using_copy_on_write():
    412     copy = False
--> 414 return self.apply(
    415     "astype",
    416     dtype=dtype,
    417     copy=copy,
    418     errors=errors,
    419     using_cow=using_copy_on_write(),
    420 )

File ~/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/pandas/core/internals/managers.py:354, in BaseBlockManager.apply(self, f, align_keys, **kwargs)
    352         applied = b.apply(f, **kwargs)
    353     else:
--> 354         applied = getattr(b, f)(**kwargs)
    355     result_blocks = extend_blocks(applied, result_blocks)
    357 out = type(self).from_blocks(result_blocks, self.axes)

File ~/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/pandas/core/internals/blocks.py:616, in Block.astype(self, dtype, copy, errors, using_cow)
    596 """
    597 Coerce to the new dtype.
    598 
   (...)
    612 Block
    613 """
    614 values = self.values
--> 616 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
    618 new_values = maybe_coerce_values(new_values)
    620 refs = None

File ~/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/pandas/core/dtypes/astype.py:238, in astype_array_safe(values, dtype, copy, errors)
    235     dtype = dtype.numpy_dtype
    237 try:
--> 238     new_values = astype_array(values, dtype, copy=copy)
    239 except (ValueError, TypeError):
    240     # e.g. _astype_nansafe can fail on object-dtype of strings
    241     #  trying to convert to float
    242     if errors == "ignore":

File ~/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/pandas/core/dtypes/astype.py:183, in astype_array(values, dtype, copy)
    180     values = values.astype(dtype, copy=copy)
    182 else:
--> 183     values = _astype_nansafe(values, dtype, copy=copy)
    185 # in pandas we don't store numpy str dtypes, so convert to object
    186 if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str):

File ~/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/pandas/core/dtypes/astype.py:112, in _astype_nansafe(arr, dtype, copy, skipna)
    110     dti = to_datetime(arr.ravel())
    111     dta = dti._data.reshape(arr.shape)
--> 112     return dta.astype(dtype, copy=False)._ndarray
    114 elif lib.is_np_dtype(dtype, "m"):
    115     from pandas.core.construction import ensure_wrapped_if_datetimelike

File ~/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/pandas/core/arrays/datetimes.py:708, in DatetimeArray.astype(self, dtype, copy)
    702     # TODO: preserve freq?
    704 elif self.tz is not None and lib.is_np_dtype(dtype, "M"):
    705     # pre-2.0 behavior for DTA/DTI was
    706     #  values.tz_convert("UTC").tz_localize(None), which did not match
    707     #  the Series behavior
--> 708     raise TypeError(
    709         "Cannot use .astype to convert from timezone-aware dtype to "
    710         "timezone-naive dtype. Use obj.tz_localize(None) or "
    711         "obj.tz_convert('UTC').tz_localize(None) instead."
    712     )
    714 elif (
    715     self.tz is None
    716     and lib.is_np_dtype(dtype, "M")
    717     and dtype != self.dtype
    718     and is_unitless(dtype)
    719 ):
    720     raise TypeError(
    721         "Casting to unit-less dtype 'datetime64' is not supported. "
    722         "Pass e.g. 'datetime64[ns]' instead."
    723     )

TypeError: Cannot use .astype to convert from timezone-aware dtype to timezone-naive dtype. Use obj.tz_localize(None) or obj.tz_convert('UTC').tz_localize(None) instead.
../_images/08133c6cf84a8eb6c53d3609f7d446e81ae23f2b3d0895c71fd4264048d1612b.png