Example: Investigating Significant Wave Height - Southern California¶
Show code cell content
import intake_erddap
import intake
import numpy as np
import cartopy.crs as ccrs
import pandas as pd
from matplotlib import pyplot as plt
from shapely import geometry
from datetime import datetime
def figure(*args, figsize=(18, 8), facecolor='white', **kwargs):
return plt.subplots(*args, figsize=figsize, facecolor=facecolor, **kwargs)
Here’s an example of finding all stations that have significant wave height from the main IOOS ERDDAP server.
server = 'https://erddap.sensors.ioos.us/erddap'
cat = intake.open_erddap_cat(
server=server,
standard_names=["sea_surface_wind_wave_significant_height"]
)
df = pd.DataFrame([i.metadata for i in cat.values()])
sub_df = df[['datasetID', 'minTime', 'maxTime', 'title']][:5]
sub_df.style.set_table_attributes('class="dataframe docutils"').hide(axis="index")
datasetID | minTime | maxTime | title |
---|---|---|---|
wmo_46023 | 2010-01-01T00:50:00Z | 2010-09-08T13:50:00Z | 17 NM WNW of Point Arguello, CA (46023) |
ism-cencoos-wmo_46059 | 2021-12-04T03:30:00Z | 2023-09-12T11:10:00Z | 357 NM West of San Francisco, CA (46059) |
wmo_46059 | 2010-12-12T01:50:00Z | 2023-09-12T10:20:00Z | 357 NM West of San Francisco, CA (46059) |
ism-secoora-wmo_41001 | 2021-07-29T18:00:00Z | 2023-09-12T11:00:00Z | 41001 - EAST HATTERAS - 150 NM East of Cape Hatteras |
wmo_41001 | 2015-12-11T02:20:00Z | 2023-09-12T11:00:00Z | 41001 - EAST HATTERAS - 150 NM East of Cape Hatteras |
We can plot the locations of these stations on the globe.
fig, ax = figure(subplot_kw=dict(projection=ccrs.PlateCarree()))
ax.coastlines()
ax.gridlines(draw_labels=True)
ax.scatter(df['minLongitude'], df['minLatitude'])
<matplotlib.collections.PathCollection at 0x7f44dfee1fa0>
/home/docs/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/110m_physical/ne_110m_coastline.zip
warnings.warn(f'Downloading: {url}', DownloadWarning)
Since our region of interest is off the coast of Southern California, we’ll specify a bounding box and highlight the stations that intersect our region.
# Southern California Region
bbox = (-122.42, 32.04, -115.40, 35.28)
box = geometry.box(*bbox)
fig, ax = figure(subplot_kw=dict(projection=ccrs.PlateCarree()))
ax.coastlines()
ax.gridlines(draw_labels=True)
ax.scatter(df['minLongitude'], df['minLatitude'])
ax.add_geometries([box], facecolor='red', alpha=0.4, crs=ccrs.PlateCarree())
ax.set_extent([-130., -60., 20., 45.], crs=ccrs.PlateCarree())
/home/docs/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/50m_physical/ne_50m_coastline.zip
warnings.warn(f'Downloading: {url}', DownloadWarning)
We can pass this bounding box directly to the ERDDAP Catalog constructor, as well as limit our query only to stations that contain data after 2014:
cat = intake.open_erddap_cat(
server=server,
bbox=bbox,
start_time=datetime(2014, 1, 1),
standard_names=["sea_surface_wind_wave_significant_height"]
)
len(cat)
13
df = pd.DataFrame([i.metadata for i in cat.values()])
sub_df = df[['datasetID', 'minTime', 'maxTime', 'title']]
sub_df.style.set_table_attributes('class="dataframe docutils"').hide(axis="index")
datasetID | minTime | maxTime | title |
---|---|---|---|
ism-cencoos-wmo_46025 | 2021-07-29T17:50:00Z | 2023-09-12T11:00:00Z | 46025 - Santa Monica Basin - 33NM WSW of Santa Monica, CA |
ism-cencoos-wmo_46047 | 2021-07-29T17:50:00Z | 2023-09-12T11:40:00Z | 46047 - TANNER BANK - 121NM West of San Diego, CA |
ism-cencoos-wmo_46086 | 2021-07-29T17:50:00Z | 2023-09-12T11:00:00Z | 46086 - SAN CLEMENTE BASIN - 27NM SE OF San Clemente Is, CA |
ism-cencoos-edu_ucsd_cdip_46219 | 2021-07-29T18:26:00Z | 2023-02-27T22:26:00Z | 46219 - San Nicolas Island, CA (067) |
edu_ucsd_cdip_46242 | 2015-05-05T12:28:00Z | 2019-07-27T15:28:00Z | 46242 - Camp Pendleton Nearshore, CA (043) |
ism-cencoos-edu_ucsd_cdip_46251 | 2021-07-29T17:56:00Z | 2023-02-27T22:26:00Z | 46251 - Santa Cruz Basin, CA |
edu_ucsd_cdip_46255 | 2015-12-11T02:07:00Z | 2017-04-02T04:16:00Z | 46255 - Begg Rock, CA (138) |
ism-cencoos-mil_army_usace_46259 | 2021-07-29T17:56:00Z | 2023-02-28T00:56:00Z | 46259 - Santa Lucia Escarpment, CA (222) |
edu_ucsd_cdip_46217 | 2015-05-05T14:07:00Z | 2019-05-13T02:04:00Z | Anacapa Passage, CA - 111 (46217) |
ism-cencoos-edu_ucsd_cdip_46215 | 2021-07-29T17:56:00Z | 2023-04-20T15:26:00Z | Diablo Canyon, CA - 076 (46215) |
ism-cencoos-wmo_46011 | 2021-07-29T17:50:00Z | 2023-09-12T11:00:00Z | Santa Maria - 21 NM NW of Point Arguello, CA (46011) |
wmo_46011 | 2010-02-16T22:50:00Z | 2023-09-12T10:30:00Z | Santa Maria - 21 NM NW of Point Arguello, CA (46011) |
ism-cencoos-wmo_46054 | 2021-07-29T17:50:00Z | 2023-09-12T11:30:00Z | West Santa Barbara 38 NM West of Santa Barbara, CA (46054) |
# Southern California Region
bbox = (-122.42, 32.04, -115.40, 35.28)
box = geometry.box(*bbox)
fig, ax = figure(subplot_kw=dict(projection=ccrs.PlateCarree()))
ax.coastlines()
ax.gridlines(draw_labels=True)
ax.scatter(df['minLongitude'], df['minLatitude'])
ax.set_title("Station Locations")
Text(0.5, 1.0, 'Station Locations')
/home/docs/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/10m_physical/ne_10m_coastline.zip
warnings.warn(f'Downloading: {url}', DownloadWarning)
We can now interrogate each of those stations and get a timeseries for the significant wave height data.
# Just get 4
stations = list(cat)[:4]
fig, axs = figure(nrows=len(stations), figsize=(18,18))
for i, dataset_id in enumerate(stations):
ax = axs[i]
source = cat[dataset_id]
df = source.read()
t = df['time (UTC)'].astype('M8[s]')
sig_wave_height = df['sea_surface_wave_significant_height (m)']
ax.plot(t, sig_wave_height)
ax.set_title(f'{dataset_id} Significant Wave Height (m)')
ax.set_xlim(np.datetime64('2014-01-01'), np.datetime64('2022-12-01'))
ax.grid()
fig.tight_layout(pad=1)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[9], line 10
8 source = cat[dataset_id]
9 df = source.read()
---> 10 t = df['time (UTC)'].astype('M8[s]')
11 sig_wave_height = df['sea_surface_wave_significant_height (m)']
12 ax.plot(t, sig_wave_height)
File ~/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/pandas/core/generic.py:6532, in NDFrame.astype(self, dtype, copy, errors)
6528 results = [ser.astype(dtype, copy=copy) for _, ser in self.items()]
6530 else:
6531 # else, only a single dtype is given
-> 6532 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors)
6533 res = self._constructor_from_mgr(new_data, axes=new_data.axes)
6534 return res.__finalize__(self, method="astype")
File ~/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/pandas/core/internals/managers.py:414, in BaseBlockManager.astype(self, dtype, copy, errors)
411 elif using_copy_on_write():
412 copy = False
--> 414 return self.apply(
415 "astype",
416 dtype=dtype,
417 copy=copy,
418 errors=errors,
419 using_cow=using_copy_on_write(),
420 )
File ~/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/pandas/core/internals/managers.py:354, in BaseBlockManager.apply(self, f, align_keys, **kwargs)
352 applied = b.apply(f, **kwargs)
353 else:
--> 354 applied = getattr(b, f)(**kwargs)
355 result_blocks = extend_blocks(applied, result_blocks)
357 out = type(self).from_blocks(result_blocks, self.axes)
File ~/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/pandas/core/internals/blocks.py:616, in Block.astype(self, dtype, copy, errors, using_cow)
596 """
597 Coerce to the new dtype.
598
(...)
612 Block
613 """
614 values = self.values
--> 616 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors)
618 new_values = maybe_coerce_values(new_values)
620 refs = None
File ~/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/pandas/core/dtypes/astype.py:238, in astype_array_safe(values, dtype, copy, errors)
235 dtype = dtype.numpy_dtype
237 try:
--> 238 new_values = astype_array(values, dtype, copy=copy)
239 except (ValueError, TypeError):
240 # e.g. _astype_nansafe can fail on object-dtype of strings
241 # trying to convert to float
242 if errors == "ignore":
File ~/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/pandas/core/dtypes/astype.py:183, in astype_array(values, dtype, copy)
180 values = values.astype(dtype, copy=copy)
182 else:
--> 183 values = _astype_nansafe(values, dtype, copy=copy)
185 # in pandas we don't store numpy str dtypes, so convert to object
186 if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str):
File ~/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/pandas/core/dtypes/astype.py:112, in _astype_nansafe(arr, dtype, copy, skipna)
110 dti = to_datetime(arr.ravel())
111 dta = dti._data.reshape(arr.shape)
--> 112 return dta.astype(dtype, copy=False)._ndarray
114 elif lib.is_np_dtype(dtype, "m"):
115 from pandas.core.construction import ensure_wrapped_if_datetimelike
File ~/checkouts/readthedocs.org/user_builds/intake-erddap/conda/latest/lib/python3.9/site-packages/pandas/core/arrays/datetimes.py:708, in DatetimeArray.astype(self, dtype, copy)
702 # TODO: preserve freq?
704 elif self.tz is not None and lib.is_np_dtype(dtype, "M"):
705 # pre-2.0 behavior for DTA/DTI was
706 # values.tz_convert("UTC").tz_localize(None), which did not match
707 # the Series behavior
--> 708 raise TypeError(
709 "Cannot use .astype to convert from timezone-aware dtype to "
710 "timezone-naive dtype. Use obj.tz_localize(None) or "
711 "obj.tz_convert('UTC').tz_localize(None) instead."
712 )
714 elif (
715 self.tz is None
716 and lib.is_np_dtype(dtype, "M")
717 and dtype != self.dtype
718 and is_unitless(dtype)
719 ):
720 raise TypeError(
721 "Casting to unit-less dtype 'datetime64' is not supported. "
722 "Pass e.g. 'datetime64[ns]' instead."
723 )
TypeError: Cannot use .astype to convert from timezone-aware dtype to timezone-naive dtype. Use obj.tz_localize(None) or obj.tz_convert('UTC').tz_localize(None) instead.