CDIPpy API reference

`cdippy.cdipnc`

`Active`

Bases: CDIPnc

Loads an "active" (predeploy, moored, offsite, recovered) rt nc file for the given station and deployment.

E.g. a = Active('100', 6, 'predeploy') # The predeploy data for stn 100 dep 6.

Source code in cdippy/cdipnc.py

class Active(CDIPnc):
    """Loads an "active" (predeploy, moored, offsite, recovered) rt nc file
    for the given station and deployment.

    E.g. a = Active('100', 6, 'predeploy')  # The predeploy data for stn 100 dep 6.
    """

    def __init__(
        self,
        stn: str,
        deployment: int,
        active_state_key: str,
        data_dir: str = None,
        org: str = None,
    ):
        """
        PARAMETERS
        ----------
        stn : str
           Can be in 2, 3 or 5 char format e.g. 28, 028, 028p2
        active_state_key : str
            Values: predeploy|moored|offsite|recovered
        deployment : int [optional]
            Supply this to access specific station deployment data.
            Must be >= 1.
        data_dir : str [optional]
            Either a full path to a directory containing a local directory hierarchy
            of nc files. E.g. '/project/WNC' or a url to a THREDDS server.
        """
        CDIPnc.__init__(self, data_dir)
        self.set_dataset_info(stn, org, active_state_key, deployment)
        self.pub_set_default = "all"

`init(stn: str, deployment: int, active_state_key: str, data_dir: str = None, org: str = None)`

PARAMETERS

stn : str Can be in 2, 3 or 5 char format e.g. 28, 028, 028p2 active_state_key : str Values: predeploy|moored|offsite|recovered deployment : int [optional] Supply this to access specific station deployment data. Must be >= 1. data_dir : str [optional] Either a full path to a directory containing a local directory hierarchy of nc files. E.g. '/project/WNC' or a url to a THREDDS server.

Source code in cdippy/cdipnc.py

def __init__(
    self,
    stn: str,
    deployment: int,
    active_state_key: str,
    data_dir: str = None,
    org: str = None,
):
    """
    PARAMETERS
    ----------
    stn : str
       Can be in 2, 3 or 5 char format e.g. 28, 028, 028p2
    active_state_key : str
        Values: predeploy|moored|offsite|recovered
    deployment : int [optional]
        Supply this to access specific station deployment data.
        Must be >= 1.
    data_dir : str [optional]
        Either a full path to a directory containing a local directory hierarchy
        of nc files. E.g. '/project/WNC' or a url to a THREDDS server.
    """
    CDIPnc.__init__(self, data_dir)
    self.set_dataset_info(stn, org, active_state_key, deployment)
    self.pub_set_default = "all"

`ActiveXY`

Bases: Archive

Loads an "active" (predeploy, moored, offsite, recovered) xy nc file for the given station and deployment.

Source code in cdippy/cdipnc.py

class ActiveXY(Archive):
    """Loads an "active" (predeploy, moored, offsite, recovered) xy nc file
    for the given station and deployment.
    """

    def __init__(self, stn, deployment, dataset, data_dir=None, org=None):
        """
        PARAMETERS
            ----------
            dataset : str
                Active dataset name.
                Values are: predeploy|moored|offsite|recovered.
            For other parameters see CDIPnc.set_dataset_info.
        """
        CDIPnc.__init__(self, data_dir)
        self.set_dataset_info(stn, org, dataset + "xy", deployment)
        self.pub_set_default = "all"

`init(stn, deployment, dataset, data_dir=None, org=None)`

PARAMETERS ---------- dataset : str Active dataset name. Values are: predeploy|moored|offsite|recovered. For other parameters see CDIPnc.set_dataset_info.

Source code in cdippy/cdipnc.py

def __init__(self, stn, deployment, dataset, data_dir=None, org=None):
    """
    PARAMETERS
        ----------
        dataset : str
            Active dataset name.
            Values are: predeploy|moored|offsite|recovered.
        For other parameters see CDIPnc.set_dataset_info.
    """
    CDIPnc.__init__(self, data_dir)
    self.set_dataset_info(stn, org, dataset + "xy", deployment)
    self.pub_set_default = "all"

`Archive`

Bases: CDIPnc

Loads an archive (deployment) file for a given station and deployment.

Source code in cdippy/cdipnc.py

class Archive(CDIPnc):
    """Loads an archive (deployment) file for a given station and deployment."""

    def __init__(self, stn, deployment=None, data_dir=None, org=None):
        """For parameters see CDIPnc.set_dataset_info."""
        CDIPnc.__init__(self, data_dir)
        if not deployment:
            deployment = 1
        self.set_dataset_info(stn, org, "archive", deployment)

    def __get_idx_from_timestamp(self, timestamp: int) -> int:
        t0 = self.get_var("xyzStartTime")[0]
        r = self.get_var("xyzSampleRate")[0]
        # Mark I will have filter delay set to fill value
        d = self.get_var("xyzFilterDelay")
        d = 0 if d[0] is np.ma.masked else d[0]
        return int(round(r * (timestamp - t0 + d), 0))

    def __make_xyzTime(self, start_idx: int, end_idx: int) -> int:
        t0 = np.ma.asarray(self.get_var("xyzStartTime")[0])
        r = np.ma.asarray(self.get_var("xyzSampleRate")[0])
        # Mark I will have filter delay set to fill value
        d = self.get_var("xyzFilterDelay")
        d = 0 if d[0] is np.ma.masked else d[0]
        d = np.ma.asarray(d)
        i = np.ma.asarray(range(start_idx, end_idx))
        return t0 - d + i / r

    def get_xyz_timestamp(self, xyzIndex: int) -> int:
        """Returns the timestamp corresponding to the given xyz array index."""
        t0 = self.get_var("xyzStartTime")[0]
        r = self.get_var("xyzSampleRate")[0]
        # Mark I will have filter delay set to fill value
        d = self.get_var("xyzFilterDelay")
        d = 0 if d[0] is np.ma.masked else d[0]
        if t0 and r and d >= 0:
            return t0 - d + xyzIndex / r
        else:
            return None

    def get_request(self):
        """Overrides the base class method to handle xyz data requests."""

        # If not an xyz request, use base class version
        if self.get_var_prefix(self.vrs[0]) != "xyz":
            return super(Archive, self).get_request()

        # xyzData is shorthand for all these vars
        if self.vrs[0] == "xyzData":
            self.vrs = ["xyzXDisplacement", "xyzYDisplacement", "xyzZDisplacement"]

        # Handle the xyz request
        start_idx = self.__get_idx_from_timestamp(self.start_stamp)
        end_idx = self.__get_idx_from_timestamp(self.end_stamp)
        z = self.get_var("xyzZDisplacement")
        # Find out if the request timespan overlaps the data
        ts1 = cdip_utils.Timespan(start_idx, end_idx)
        ts2 = cdip_utils.Timespan(0, len(z) - 1)
        if not ts1.overlap(ts2):
            return {}
        # Make sure the indices will work with the arrays
        start_idx = max(0, start_idx)
        end_idx = min(len(z) - 1, end_idx)
        # Just calculate xyz times for the good indices
        xyzTime = self.__make_xyzTime(start_idx, end_idx)
        result = {"xyzTime": xyzTime}
        for vname in self.vrs:
            result[vname] = self.get_var(vname)[start_idx:end_idx]
        return result

`init(stn, deployment=None, data_dir=None, org=None)`

For parameters see CDIPnc.set_dataset_info.

Source code in cdippy/cdipnc.py

def __init__(self, stn, deployment=None, data_dir=None, org=None):
    """For parameters see CDIPnc.set_dataset_info."""
    CDIPnc.__init__(self, data_dir)
    if not deployment:
        deployment = 1
    self.set_dataset_info(stn, org, "archive", deployment)

`get_request()`

Overrides the base class method to handle xyz data requests.

Source code in cdippy/cdipnc.py

def get_request(self):
    """Overrides the base class method to handle xyz data requests."""

    # If not an xyz request, use base class version
    if self.get_var_prefix(self.vrs[0]) != "xyz":
        return super(Archive, self).get_request()

    # xyzData is shorthand for all these vars
    if self.vrs[0] == "xyzData":
        self.vrs = ["xyzXDisplacement", "xyzYDisplacement", "xyzZDisplacement"]

    # Handle the xyz request
    start_idx = self.__get_idx_from_timestamp(self.start_stamp)
    end_idx = self.__get_idx_from_timestamp(self.end_stamp)
    z = self.get_var("xyzZDisplacement")
    # Find out if the request timespan overlaps the data
    ts1 = cdip_utils.Timespan(start_idx, end_idx)
    ts2 = cdip_utils.Timespan(0, len(z) - 1)
    if not ts1.overlap(ts2):
        return {}
    # Make sure the indices will work with the arrays
    start_idx = max(0, start_idx)
    end_idx = min(len(z) - 1, end_idx)
    # Just calculate xyz times for the good indices
    xyzTime = self.__make_xyzTime(start_idx, end_idx)
    result = {"xyzTime": xyzTime}
    for vname in self.vrs:
        result[vname] = self.get_var(vname)[start_idx:end_idx]
    return result

`get_xyz_timestamp(xyzIndex: int) -> int`

Returns the timestamp corresponding to the given xyz array index.

Source code in cdippy/cdipnc.py

def get_xyz_timestamp(self, xyzIndex: int) -> int:
    """Returns the timestamp corresponding to the given xyz array index."""
    t0 = self.get_var("xyzStartTime")[0]
    r = self.get_var("xyzSampleRate")[0]
    # Mark I will have filter delay set to fill value
    d = self.get_var("xyzFilterDelay")
    d = 0 if d[0] is np.ma.masked else d[0]
    if t0 and r and d >= 0:
        return t0 - d + xyzIndex / r
    else:
        return None

`CDIPnc`

A base class used by the class StnData for retrieving data from CDIP netCDF (nc) files located either locally or remotely.

Files accessed remotely are served by CDIP's THREDDS server. Files accessed locally need to be located within a specific directory hierarchy.

For each CDIP nc file "type" such as historic.nc or archive.nc, there is a corresponding sub-class, e.g. Historic or Archive. Although the constructors of these classes can be used to access data, StnData is recommended because it seamlessly combines records across multiple files.

Source code in cdippy/cdipnc.py

class CDIPnc:
    """A base class used by the class StnData for retrieving data from
    CDIP netCDF (nc) files located either locally or remotely.

    Files accessed remotely are served by CDIP's THREDDS server.
    Files accessed locally need to be located within a specific
    directory hierarchy.

    For each CDIP nc file "type" such as historic.nc or archive.nc,
    there is a corresponding sub-class, e.g. Historic or Archive.
    Although the constructors of these classes can be used to access
    data, StnData is recommended because it seamlessly combines
    records across multiple files.
    """

    THREDDS_url = "https://thredds.cdip.ucsd.edu"
    dods = "thredds/dodsC"
    url = None

    # - Load_stn_nc_files only checks for this number of deployments
    max_deployments = 99

    # - Top level data dir for nc files. Files must be within subdirectories:
    # - i.e. <data_dir>/REALTIME, <data_dir>/ARCHIVE/201p1
    data_dir = None

    # DATA QUALITY FLAGS AND PUBLIC/NONPUB
    #
    # waveFlagPrimary (WFP): 1-good, 2-not_evaluated, 3-questionable, 4-bad, 9-missing
    # waveFlagSecondary (WFS): 0-unspecified, 1-sensor issues, 2... are specific messages e.g. Hs out of bounds
    #
    # Data for public release is distinguished by WFP=1 and found in all nc files.
    # Data not for public release is distinguished by WFP=4 and found in all files except historic.nc
    #
    # There are cases where WFP=1 and WFS!=0 - e.g. if frequency bands have been reformatted.
    # Records with WFP=4 are not necessarily bad data.
    # All xy records are flagged WFP=2 - not_evaluated.
    #
    # NC files: latest, pre-deploy, moored, offsite, recovered, historic, archive
    #
    pub_set_default = "public"
    # Dashed tags such as public-good are for backwards compatibility
    pub_set_map = {
        "public": "public",
        "nonpub": "nonpub",
        "all": "all",
        "public-good": "public",
        "nonpub-all": "nonpub",
        "both-all": "all",
    }

    # Applies the mask before data is returned
    apply_mask = True

    # Active datasets - deployments that span NOW
    active_datasets = {
        "predeploy": "p0",
        "moored": "p1",
        "offsite": "p2",
        "recovered": "p3",
    }

    # Spectral layout. For each dataset we need to determine if it is mk3 (64 bands)
    # or mk4 (100 bands) spectral layout. Prior to aggregation, if 1 dataset is mk3,
    # all spectral layouts must be converted to mk3 during aggregation.
    spectral_layout = None

    # REQUESTING DATA PROCEDURE
    #
    # HOW TO USE
    # 1. call set_request_info
    # 2. call get_request
    #
    # HOW IT WORKS
    # 1. For a given set of variables of the same type (e.g. 'wave'),
    #   a. determine the dimension var name and if it is a time dimension
    #   b. determine the ancillary variable name (e.g. 'waveFlagPrimary'), if it exists
    # 2. If the dimension is a time dimension, find the start and end indices based on the query
    #    (Use start and end indices to subset all variables henceforth)
    # 3. Create an ancillary variable mask based on the pub set (and start, end indices if applicable)
    # 4. For each variable,
    #    a. use start, end indices to create a masked array
    #    b. union the variable's mask with the ancillary mask
    #    c. set the new masked array variable's mask to the union mask
    # 5. Apply the mask if self.apply_mask set True.

    def __init__(self, data_dir: str = None, deployment: int = None):
        """PARAMETERS
        ----------
        data_dir : str [optional]
            Either a full path to a directory containing a local directory hierarchy
            of nc files. E.g. '/project/WNC' or a url to a THREDDS server.
        deployment : int [optional]
            Supply this to access specific station deployment data.
            Must be >= 1.
        """

        self.nc = None
        self.data_dir = data_dir
        self.deployment = deployment

    def set_request_info(
        self,
        start: datetime = None,
        end: datetime = None,
        vrs: list = ["waveHs"],
        pub_set: str = "public",
        apply_mask: bool = True,
    ) -> None:
        """Initializes data request information for get_request.

        PARAMETERS
        ----------
        start : str or datetime [optional] : default Jan 1, 1975
            Start time of data request (UTC). If provided as a string must
            be in the format Y-m-d H:M:S where Y is 4 chars and all others
            are 2 chars. Ex. '2020-03-30 19:32:56'.
        end : str or datetime [optional] : default now
            End time of data request (UTC). If not supplied defaults to now.
        vrs : list [optional] : default ['waveHs']
            A list of the names of variables to retrieve. They all must start
            with the same prefix, e.g. ['waveHs', 'waveTp', 'waveDp']
        pub_set: str [optional] values = public|nonpub|all
            Filters data based on data quality flags.
        apply_mask: bool [optional] default True
            Removes values from the masked array that have a mask value of True.
            Ex. If nonpub data is requested and apply_mask is False, the returned
            array will contain both public and nonpublic data (although public
            data records will have the mask value set to True). If apply_mask
            is set to True, only nonpub records will be returned.
        """
        if start is None:
            start = datetime(1975, 1, 1).replace(tzinfo=timezone.utc)
        if end is None:
            end = datetime.now(timezone.utc)
        self.set_timespan(start, end)
        self.pub_set = self.get_pub_set(pub_set)  # Standardize the set name
        if apply_mask is not None:
            self.apply_mask = apply_mask
        self.vrs = vrs

    def set_timespan(self, start, end):
        """Sets request timespan"""
        if isinstance(start, str):
            self.start_dt = datetime.strptime(start, "%Y-%m-%d %H:%M:%S").replace(
                tzinfo=timezone.utc
            )
        else:
            self.start_dt = start
        if isinstance(end, str):
            self.end_dt = datetime.strptime(end, "%Y-%m-%d %H:%M:%S").replace(
                tzinfo=timezone.utc
            )
        else:
            self.end_dt = end
        self.start_stamp = cdip_utils.datetime_to_timestamp(self.start_dt)

        self.end_stamp = cdip_utils.datetime_to_timestamp(self.end_dt)

    def get_request(self) -> dict:
        """Returns the data specified using set_request_info.

        RETURNS
        -------
        A dictionary containing keys of the requested variables each
        of which is a numpy masked array of data values. In addition,
        the time values are returned as well. For example, if waveHs
        was requested, the dictionary will look like this:
        {'waveHs': <np.masked_array>, 'waveTime': <np.masked_array>}
        """
        mask_results = {}
        save = {}
        result = {}

        # - Check if requested variable 0 exists
        first_var = self.get_var(self.vrs[0])
        if first_var is None:
            return result

        # Use first var to determine the dimension, grab it and find indices
        time_dim = None
        for dim_name in first_var.dimensions:
            nc_var = self.get_var(dim_name)
            if nc_var is None:  # To handle non-existing "count" variables
                continue
            if nc_var.units[0:7] == "seconds":
                time_dim = dim_name
                # dim_data = np.ma.asarray(self.nc.variables[dim_name][:])
                dim_data = self.__make_masked_array(nc_var, 0, nc_var.size)
                # - find time dimension start and end indices
                s_idx, e_idx = self.__get_indices(
                    dim_data[:], self.start_stamp, self.end_stamp
                )
                if s_idx == e_idx:
                    return result
                mask_results[time_dim] = dim_data[s_idx:e_idx]
            else:  # E.g. waveFrequency (Do I want to add to result?
                save[dim_name] = self.nc.variables[dim_name]

        # Grab the time subset of each variable
        for v_name in self.vrs:
            v = self.get_var(v_name)
            if v is None:
                continue
            if v_name == "metaStationName":
                # Use existing byte_arr_to_string method for station name
                result[v_name] = self.byte_arr_to_string(self.nc.variables[v_name][:])
            elif len(v.dimensions) == 1 and v.dimensions[0] == "maxStrlen64":
                arr = self.nc.variables[v_name][:]
                result[v_name] = self.byte_arr_to_string(arr).strip("\x00")
            elif time_dim:
                mask_results[v_name] = self.__make_masked_array(v, s_idx, e_idx)
            else:
                # !!! This could be a problem for 2-d arrays. Specifying end
                # index too large may reshape array?
                #
                # Also, there seems to be a bug for single values such as
                # metaWaterDepth in realtime files. Those variables have
                # no shape (shape is an empty tupble) and len(v) bombs even
                # though v[:] returns an array with one value.
                try:
                    v_len = len(v)
                except Exception:
                    v_len = 1
                result[v_name] = self.__make_masked_array(v, 0, v_len)

        # Use first var to determine the ancillary variable, e.g. waveFlagPrimary
        # If there is an ancillary variable, use pub/nonpub to create a mask
        if hasattr(first_var, "ancillary_variables"):
            anc_names = first_var.ancillary_variables.split(" ")
            anc_name = anc_names[0]
            # Create the variable mask using pub/nonpub choice
            if not time_dim:
                s_idx = None
            anc_mask = self.make_pub_mask(anc_name, s_idx, e_idx)
        else:
            anc_mask = None

        # Still a problem. 2-d vars.
        # Seems to work if the variable has no mask set. But
        # if mask set, returns 1-d var.
        for v_name in mask_results:
            if self.apply_mask and anc_mask is not None:
                v = mask_results[v_name]
                mask_results[v_name] = v[~anc_mask]
            result[v_name] = mask_results[v_name]

        return result

    def __make_masked_array(
        self, nc_var: str, s_idx: int, e_idx: int
    ) -> np.ma.masked_array:
        """Returns a numpy masked array for a given nc variable and indices.

        e_idx is appropriate for python arrays. I.e. one more than last index.
        """
        if len(nc_var.shape) <= 1:
            try:
                data = np.ma.asarray(nc_var[s_idx:e_idx])
            except Exception:
                try:
                    data = np.ma.asarray(nc_var[s_idx:e_idx])
                except Exception:
                    return None
            return data
        elif len(nc_var.shape) == 2:
            try:
                arr = np.ma.asarray(nc_var[s_idx:e_idx, :])
            except Exception:
                try:
                    arr = np.ma.asarray(nc_var[s_idx:e_idx, :])
                except Exception:
                    return None
            return arr

    def make_pub_mask(self, anc_name: str, s_idx: int, e_idx: int) -> np.ndarray:
        """Returns an np.ndarray of bools given pub_set and ancillary var"""

        # No s_idx, use whole array. Otherwise time subset the anc var.
        nc_primary = self.get_var(anc_name)
        if s_idx is None:
            s_idx = 0
            e_idx = len(nc_primary)
        primary_flag_values = nc_primary[s_idx:e_idx]

        if anc_name == "waveFrequencyFlagPrimary":
            return None  # Not sure about this one
        elif anc_name == "gpsStatusFlags":
            return np.ma.make_mask(primary_flag_values < 0, shrink=False)
        elif (
            anc_name == "waveFlagPrimary"
            or anc_name == "sstFlagPrimary"
            or anc_name == "acmFlagPrimary"
            or anc_name == "cat4FlagPrimary"
        ):
            public_mask = primary_flag_values != 1
        elif anc_name == "xyzFlagPrimary":
            public_mask = primary_flag_values != 2
        else:
            return None

        if self.pub_set == "public":
            return np.ma.make_mask(public_mask, shrink=False)
        elif self.pub_set == "nonpub":
            return np.ma.make_mask(~public_mask, shrink=False)
        elif self.pub_set == "all":
            return np.ma.make_mask(primary_flag_values < 0, shrink=False)

    def get_pub_set(self, name: str) -> str:
        """Returns either 'public', 'nonpub' or 'all'.

        Maintains backwards compatibility with prior pub_set names.
        """
        if name is None or name not in self.pub_set_map.keys():
            return self.pub_set_default
        else:
            return self.pub_set_map[name]

    def get_var_prefix(self, var_name: str) -> str:
        """Returns 'wave' part of the string 'waveHs'."""
        s = ""
        for c in var_name:
            if c.isupper():
                break
            s += c
        return s

    def get_flag_meanings(self, flag_name: str) -> list:
        """Returns flag category values and meanings given a flag_name."""
        return self.get_var(flag_name).flag_meanings.split(" ")

    def get_flag_values(self, flag_name: str) -> list:
        """Returns flag category values and meanings given a flag_name."""
        v = self.get_var(flag_name)
        if flag_name[0:3] == "gps":
            return v.flag_masks
        else:
            return v.flag_values

    def get_date_modified(self) -> datetime:
        """Returns the time the nc file was last modified."""
        return datetime.strptime(self.nc.date_modified, "%Y-%m-%dT%H:%M:%SZ")

    def get_coverage_start(self) -> datetime:
        """Returns the start time of the nc file data coverage."""
        return datetime.strptime(self.nc.time_coverage_start, "%Y-%m-%dT%H:%M:%SZ")

    def get_coverage_end(self) -> datetime:
        """Returns the end time of the nc file data coverage."""
        return datetime.strptime(self.nc.time_coverage_end, "%Y-%m-%dT%H:%M:%SZ")

    def __get_indices(self, times: list, start_stamp: int, end_stamp: int) -> tuple:
        """Returns start and end indices to include any times that are equal to start_stamp or end_stamp."""
        s_idx = bisect_left(times, start_stamp)  # Will include time if equal
        # Will give e_idx appropriate for python arrays
        e_idx = bisect_right(times, end_stamp, s_idx)
        return s_idx, e_idx

    def get_nc(self, url: str = None, retry: bool = False) -> netCDF4.Dataset:
        if not url:
            url = self.url
        try:
            return netCDF4.Dataset(url)
        except Exception as e:
            # Try again if unsuccessful (nc file not ready? THREDDS problem?)
            if retry:
                logger.warning(
                    msg=f"Retrying to open dataset at {url} due to an unexpected exception: {e}"
                )
                try:
                    return netCDF4.Dataset(url)
                except Exception:
                    pass
            logger.exception(
                msg=f"Failed to open dataset at {url} due to an unexpected exception: {e}"
            )
            return None

    def byte_arr_to_string(self, b_arr: np.ma.masked_array) -> str:
        if np.ma.is_masked(b_arr):
            b_arr = b_arr[~b_arr.mask]
        s = ""
        for c in b_arr[:].astype("U"):
            s += c
        return s

    def metaStationName(self) -> str:
        """Returns the metaStationName."""
        if self.nc is None:
            return None
        return self.byte_arr_to_string(self.nc.variables["metaStationName"][:])

    def get_var(self, var_name: str):
        """Checks if a variable exists then returns a pointer to it."""
        if self.nc is None or var_name not in self.nc.variables:
            return None
        return self.nc.variables[var_name]

    def get_dataset_urls(self) -> dict:
        """Returns a dict of two lists of urls (or paths) to all CDIP station datasets.

        The top level keys are 'realtime' and 'historic'. The urls are retrieved by
        either descending into the THREDDS catalog.xml or recursively walking through data_dir sub
        directories.

        For applications that need to use the data from multiple deployment files for
        a station, stndata:get_nc_files will load those files efficiently.
        """
        if self.data_dir is not None:
            result = {"realtime": [], "archive": []}
            # - Walk through data_dir sub dirs
            for dirpath, dirnames, filenames in os.walk(self.data_dir):
                if dirpath.find("REALTIME") >= 0:
                    for file in filenames:
                        if os.path.splitext(file)[1] == ".nc":
                            result["realtime"].append(os.path.join(dirpath, file))
                elif dirpath.find("ARCHIVE") >= 0:
                    for file in filenames:
                        if os.path.splitext(file)[1] == ".nc":
                            result["archive"].append(os.path.join(dirpath, file))
            return result

        catalog_url = "/".join([self.THREDDS_url, "thredds", "catalog.xml"])

        result = {}
        root = url_utils.load_et_root(catalog_url)
        catalogs = []
        url_utils.rfindta(root, catalogs, "catalogRef", "href")
        for catalog in catalogs:
            # - Archive data sets
            url = self.THREDDS_url + catalog
            cat = url_utils.load_et_root(url)
            if catalog.find("archive") >= 0:
                ar_urls = []
                url_utils.rfindta(cat, ar_urls, "catalogRef", "href")
                b_url = os.path.dirname(url)
                # - Station datasets
                ar_ds_urls = []
                for u in ar_urls:
                    url = b_url + "/" + u
                    ds = url_utils.load_et_root(url)
                    url_utils.rfindta(ds, ar_ds_urls, "dataset", "urlPath")
                full_urls = []
                for url in ar_ds_urls:
                    full_urls.append(
                        "/".join([self.THREDDS_url, self.dods, "cdip", url[5:]])
                    )
                result["archive"] = full_urls
            elif catalog.find("realtime") >= 0:
                rt_ds_urls = []
                url_utils.rfindta(cat, rt_ds_urls, "dataset", "urlPath")
                full_urls = []
                for url in rt_ds_urls:
                    full_urls.append(
                        "/".join([self.THREDDS_url, self.dods, "cdip", url[5:]])
                    )
                result["realtime"] = full_urls
        return result

    def set_dataset_info(
        self, stn: str, org: str, dataset_name: str, deployment: int = None
    ) -> None:
        """Sets self.stn, org, filename, url and loads self.nc. The key to understanding all of
        this is that we are ultimately setting _url_, which can be an actual path to the
        nc file or a url to THREDDS DoDS service.

        PARAMETERS
        ----------
        stn : str
           Can be in 3char (e.g. 028) or 5char (e.g. 028p2) format for org=cdip
        org: str
            (Organization) Values are: cdip|ww3|external
        dataset_name : str
            Values: realtime|historic|archive|realtimexy|archivexy|
                    predeploy|moored|offsite|recovered
        deployment : int [optional]
            Supply this to access specific station deployment data.
            Must be >= 1.

        Paths are:
            <top_dir>/EXTERNAL/WW3/<filename>  [filename=<stn>_<org_dir>_<dataset_name>.nc][CDIP stn like 192w3]
            <top_dir>/REALTIME/<filename> [filename=<stn><p1>_rt.nc]
            <top_dir>/REALTIME/<filename> [filename=<stn><p1>_xy.nc]
            <top_dir>/ARCHIVE/<stn>/<filename> [filename=<stn3><p1>_<deployment>.nc]
            <top_dir>/PREDEPLOY/<stn>/<filename> [filename=<stn3><pX>_<deployment>_rt.nc]**
            <top_dir>/PREDEPLOY/<stn>/<filename> [filename=<stn3><pX>_<deployment>_xy.nc]**

            **Active deployment directories are PREDEPLOY (p0), MOORED (p1), OFFSITE (p2)  and RECOVERED (p3)
              pX = p0|p1|p2|p3; deployment = dXX e.g. d01

        Urls are:
            http://thredds.cdip.ucsd/thredds/dodsC/<org1>/<org_dir>/<filename>
               [org1=external|cdip,org_dir=WW3|OWI etc]
            http://thredds.cdip.ucsd/thredds/dodsC/<org1>/<dataset_name>/<filename>

            Note:
               Since adding dataset_name, we no longer need the 5char stn id
               for org=cdip datasets. The p_val will be 'p1' for every dataset except
               active datasets in buoy states predeploy (p0), offsite (p2) and recovered (p3).
        """
        ext = ".nc"

        # Allowing data_dir to be either url or path
        __using_path = False
        if self.data_dir:
            if self.data_dir[0:4] == "http":
                self.THREDDS_url = self.data_dir
            else:
                __using_path = True

        if org is None:
            org = "cdip"
        if org == "cdip":
            org1 = "cdip"
        else:
            org1 = "external"
        # Org_dir follows 'external' and always uppercase (isn't used when org is cdip)
        org_dir = org.upper()

        # Handle the xy datasets
        if "xy" in dataset_name:
            ftype = "xy"
            dataset_name = dataset_name[0:-2]
        else:
            ftype = "rt"

        # Historic and archive both use archive as a dataset_dir
        # Lowercase for url, uppercase for url
        if dataset_name == "historic":
            dataset_dir = "archive"
        else:
            dataset_dir = dataset_name

        # Local paths use uppercase
        if __using_path:
            org1 = org1.upper()
            dataset_dir = dataset_dir.upper()
            if org == "cdip":
                url_pre = self.data_dir
            else:
                url_pre = "/".join([self.data_dir, org1])
        else:
            url_pre = "/".join([self.THREDDS_url, self.dods, org1])

        # Set p_val to 'p1' - it will get changed appropriately below
        stn = stn[0:3] + "p1"

        # Make filename and url
        if org == "cdip":
            if type(deployment) is not str:
                deployment = "d" + str(deployment).zfill(2)
            if dataset_name in self.active_datasets.keys():
                stn = stn[0:3] + self.active_datasets[dataset_name]
                dataset_name = "_".join([deployment, ftype])
            elif dataset_name == "realtime":
                dataset_name = ftype
            elif dataset_name == "historic":
                dataset_dir = "/".join([dataset_dir, stn])
            elif dataset_name == "archive" and deployment:
                dataset_name = deployment
                dataset_dir = "/".join([dataset_dir, stn])
            self.filename = "_".join([stn, dataset_name + ext])
            self.url = "/".join([url_pre, dataset_dir, self.filename])
        else:
            if stn[3:4] == "p" and org == "ww3":  # Cdip stn id
                stn_tmp = ndbc.get_wmo_id(stn[0:3])
            else:
                stn_tmp = stn
            self.filename = "_".join([stn_tmp, org_dir, dataset_name + ext])
            self.url = "/".join([url_pre, org_dir, self.filename])

        self.stn = stn
        self.org = org
        self.nc = self.get_nc()

`__get_indices(times: list, start_stamp: int, end_stamp: int) -> tuple`

Returns start and end indices to include any times that are equal to start_stamp or end_stamp.

Source code in cdippy/cdipnc.py

def __get_indices(self, times: list, start_stamp: int, end_stamp: int) -> tuple:
    """Returns start and end indices to include any times that are equal to start_stamp or end_stamp."""
    s_idx = bisect_left(times, start_stamp)  # Will include time if equal
    # Will give e_idx appropriate for python arrays
    e_idx = bisect_right(times, end_stamp, s_idx)
    return s_idx, e_idx

`init(data_dir: str = None, deployment: int = None)`

PARAMETERS

data_dir : str [optional] Either a full path to a directory containing a local directory hierarchy of nc files. E.g. '/project/WNC' or a url to a THREDDS server. deployment : int [optional] Supply this to access specific station deployment data. Must be >= 1.

Source code in cdippy/cdipnc.py

def __init__(self, data_dir: str = None, deployment: int = None):
    """PARAMETERS
    ----------
    data_dir : str [optional]
        Either a full path to a directory containing a local directory hierarchy
        of nc files. E.g. '/project/WNC' or a url to a THREDDS server.
    deployment : int [optional]
        Supply this to access specific station deployment data.
        Must be >= 1.
    """

    self.nc = None
    self.data_dir = data_dir
    self.deployment = deployment

`__make_masked_array(nc_var: str, s_idx: int, e_idx: int) -> np.ma.masked_array`

Returns a numpy masked array for a given nc variable and indices.

e_idx is appropriate for python arrays. I.e. one more than last index.

Source code in cdippy/cdipnc.py

def __make_masked_array(
    self, nc_var: str, s_idx: int, e_idx: int
) -> np.ma.masked_array:
    """Returns a numpy masked array for a given nc variable and indices.

    e_idx is appropriate for python arrays. I.e. one more than last index.
    """
    if len(nc_var.shape) <= 1:
        try:
            data = np.ma.asarray(nc_var[s_idx:e_idx])
        except Exception:
            try:
                data = np.ma.asarray(nc_var[s_idx:e_idx])
            except Exception:
                return None
        return data
    elif len(nc_var.shape) == 2:
        try:
            arr = np.ma.asarray(nc_var[s_idx:e_idx, :])
        except Exception:
            try:
                arr = np.ma.asarray(nc_var[s_idx:e_idx, :])
            except Exception:
                return None
        return arr

`get_coverage_end() -> datetime`

Returns the end time of the nc file data coverage.

Source code in cdippy/cdipnc.py

def get_coverage_end(self) -> datetime:
    """Returns the end time of the nc file data coverage."""
    return datetime.strptime(self.nc.time_coverage_end, "%Y-%m-%dT%H:%M:%SZ")

`get_coverage_start() -> datetime`

Returns the start time of the nc file data coverage.

Source code in cdippy/cdipnc.py

def get_coverage_start(self) -> datetime:
    """Returns the start time of the nc file data coverage."""
    return datetime.strptime(self.nc.time_coverage_start, "%Y-%m-%dT%H:%M:%SZ")

`get_dataset_urls() -> dict`

Returns a dict of two lists of urls (or paths) to all CDIP station datasets.

The top level keys are 'realtime' and 'historic'. The urls are retrieved by either descending into the THREDDS catalog.xml or recursively walking through data_dir sub directories.

For applications that need to use the data from multiple deployment files for a station, stndata:get_nc_files will load those files efficiently.

Source code in cdippy/cdipnc.py

def get_dataset_urls(self) -> dict:
    """Returns a dict of two lists of urls (or paths) to all CDIP station datasets.

    The top level keys are 'realtime' and 'historic'. The urls are retrieved by
    either descending into the THREDDS catalog.xml or recursively walking through data_dir sub
    directories.

    For applications that need to use the data from multiple deployment files for
    a station, stndata:get_nc_files will load those files efficiently.
    """
    if self.data_dir is not None:
        result = {"realtime": [], "archive": []}
        # - Walk through data_dir sub dirs
        for dirpath, dirnames, filenames in os.walk(self.data_dir):
            if dirpath.find("REALTIME") >= 0:
                for file in filenames:
                    if os.path.splitext(file)[1] == ".nc":
                        result["realtime"].append(os.path.join(dirpath, file))
            elif dirpath.find("ARCHIVE") >= 0:
                for file in filenames:
                    if os.path.splitext(file)[1] == ".nc":
                        result["archive"].append(os.path.join(dirpath, file))
        return result

    catalog_url = "/".join([self.THREDDS_url, "thredds", "catalog.xml"])

    result = {}
    root = url_utils.load_et_root(catalog_url)
    catalogs = []
    url_utils.rfindta(root, catalogs, "catalogRef", "href")
    for catalog in catalogs:
        # - Archive data sets
        url = self.THREDDS_url + catalog
        cat = url_utils.load_et_root(url)
        if catalog.find("archive") >= 0:
            ar_urls = []
            url_utils.rfindta(cat, ar_urls, "catalogRef", "href")
            b_url = os.path.dirname(url)
            # - Station datasets
            ar_ds_urls = []
            for u in ar_urls:
                url = b_url + "/" + u
                ds = url_utils.load_et_root(url)
                url_utils.rfindta(ds, ar_ds_urls, "dataset", "urlPath")
            full_urls = []
            for url in ar_ds_urls:
                full_urls.append(
                    "/".join([self.THREDDS_url, self.dods, "cdip", url[5:]])
                )
            result["archive"] = full_urls
        elif catalog.find("realtime") >= 0:
            rt_ds_urls = []
            url_utils.rfindta(cat, rt_ds_urls, "dataset", "urlPath")
            full_urls = []
            for url in rt_ds_urls:
                full_urls.append(
                    "/".join([self.THREDDS_url, self.dods, "cdip", url[5:]])
                )
            result["realtime"] = full_urls
    return result

`get_date_modified() -> datetime`

Returns the time the nc file was last modified.

Source code in cdippy/cdipnc.py

def get_date_modified(self) -> datetime:
    """Returns the time the nc file was last modified."""
    return datetime.strptime(self.nc.date_modified, "%Y-%m-%dT%H:%M:%SZ")

`get_flag_meanings(flag_name: str) -> list`

Returns flag category values and meanings given a flag_name.

Source code in cdippy/cdipnc.py

def get_flag_meanings(self, flag_name: str) -> list:
    """Returns flag category values and meanings given a flag_name."""
    return self.get_var(flag_name).flag_meanings.split(" ")

`get_flag_values(flag_name: str) -> list`

Returns flag category values and meanings given a flag_name.

Source code in cdippy/cdipnc.py

def get_flag_values(self, flag_name: str) -> list:
    """Returns flag category values and meanings given a flag_name."""
    v = self.get_var(flag_name)
    if flag_name[0:3] == "gps":
        return v.flag_masks
    else:
        return v.flag_values

`get_pub_set(name: str) -> str`

Returns either 'public', 'nonpub' or 'all'.

Maintains backwards compatibility with prior pub_set names.

Source code in cdippy/cdipnc.py

def get_pub_set(self, name: str) -> str:
    """Returns either 'public', 'nonpub' or 'all'.

    Maintains backwards compatibility with prior pub_set names.
    """
    if name is None or name not in self.pub_set_map.keys():
        return self.pub_set_default
    else:
        return self.pub_set_map[name]

`get_request() -> dict`

Returns the data specified using set_request_info.

RETURNS

A dictionary containing keys of the requested variables each of which is a numpy masked array of data values. In addition, the time values are returned as well. For example, if waveHs was requested, the dictionary will look like this:

Source code in cdippy/cdipnc.py

def get_request(self) -> dict:
    """Returns the data specified using set_request_info.

    RETURNS
    -------
    A dictionary containing keys of the requested variables each
    of which is a numpy masked array of data values. In addition,
    the time values are returned as well. For example, if waveHs
    was requested, the dictionary will look like this:
    {'waveHs': <np.masked_array>, 'waveTime': <np.masked_array>}
    """
    mask_results = {}
    save = {}
    result = {}

    # - Check if requested variable 0 exists
    first_var = self.get_var(self.vrs[0])
    if first_var is None:
        return result

    # Use first var to determine the dimension, grab it and find indices
    time_dim = None
    for dim_name in first_var.dimensions:
        nc_var = self.get_var(dim_name)
        if nc_var is None:  # To handle non-existing "count" variables
            continue
        if nc_var.units[0:7] == "seconds":
            time_dim = dim_name
            # dim_data = np.ma.asarray(self.nc.variables[dim_name][:])
            dim_data = self.__make_masked_array(nc_var, 0, nc_var.size)
            # - find time dimension start and end indices
            s_idx, e_idx = self.__get_indices(
                dim_data[:], self.start_stamp, self.end_stamp
            )
            if s_idx == e_idx:
                return result
            mask_results[time_dim] = dim_data[s_idx:e_idx]
        else:  # E.g. waveFrequency (Do I want to add to result?
            save[dim_name] = self.nc.variables[dim_name]

    # Grab the time subset of each variable
    for v_name in self.vrs:
        v = self.get_var(v_name)
        if v is None:
            continue
        if v_name == "metaStationName":
            # Use existing byte_arr_to_string method for station name
            result[v_name] = self.byte_arr_to_string(self.nc.variables[v_name][:])
        elif len(v.dimensions) == 1 and v.dimensions[0] == "maxStrlen64":
            arr = self.nc.variables[v_name][:]
            result[v_name] = self.byte_arr_to_string(arr).strip("\x00")
        elif time_dim:
            mask_results[v_name] = self.__make_masked_array(v, s_idx, e_idx)
        else:
            # !!! This could be a problem for 2-d arrays. Specifying end
            # index too large may reshape array?
            #
            # Also, there seems to be a bug for single values such as
            # metaWaterDepth in realtime files. Those variables have
            # no shape (shape is an empty tupble) and len(v) bombs even
            # though v[:] returns an array with one value.
            try:
                v_len = len(v)
            except Exception:
                v_len = 1
            result[v_name] = self.__make_masked_array(v, 0, v_len)

    # Use first var to determine the ancillary variable, e.g. waveFlagPrimary
    # If there is an ancillary variable, use pub/nonpub to create a mask
    if hasattr(first_var, "ancillary_variables"):
        anc_names = first_var.ancillary_variables.split(" ")
        anc_name = anc_names[0]
        # Create the variable mask using pub/nonpub choice
        if not time_dim:
            s_idx = None
        anc_mask = self.make_pub_mask(anc_name, s_idx, e_idx)
    else:
        anc_mask = None

    # Still a problem. 2-d vars.
    # Seems to work if the variable has no mask set. But
    # if mask set, returns 1-d var.
    for v_name in mask_results:
        if self.apply_mask and anc_mask is not None:
            v = mask_results[v_name]
            mask_results[v_name] = v[~anc_mask]
        result[v_name] = mask_results[v_name]

    return result

`get_var(var_name: str)`

Checks if a variable exists then returns a pointer to it.

Source code in cdippy/cdipnc.py

def get_var(self, var_name: str):
    """Checks if a variable exists then returns a pointer to it."""
    if self.nc is None or var_name not in self.nc.variables:
        return None
    return self.nc.variables[var_name]

`get_var_prefix(var_name: str) -> str`

Returns 'wave' part of the string 'waveHs'.

Source code in cdippy/cdipnc.py

def get_var_prefix(self, var_name: str) -> str:
    """Returns 'wave' part of the string 'waveHs'."""
    s = ""
    for c in var_name:
        if c.isupper():
            break
        s += c
    return s

`make_pub_mask(anc_name: str, s_idx: int, e_idx: int) -> np.ndarray`

Returns an np.ndarray of bools given pub_set and ancillary var

Source code in cdippy/cdipnc.py

def make_pub_mask(self, anc_name: str, s_idx: int, e_idx: int) -> np.ndarray:
    """Returns an np.ndarray of bools given pub_set and ancillary var"""

    # No s_idx, use whole array. Otherwise time subset the anc var.
    nc_primary = self.get_var(anc_name)
    if s_idx is None:
        s_idx = 0
        e_idx = len(nc_primary)
    primary_flag_values = nc_primary[s_idx:e_idx]

    if anc_name == "waveFrequencyFlagPrimary":
        return None  # Not sure about this one
    elif anc_name == "gpsStatusFlags":
        return np.ma.make_mask(primary_flag_values < 0, shrink=False)
    elif (
        anc_name == "waveFlagPrimary"
        or anc_name == "sstFlagPrimary"
        or anc_name == "acmFlagPrimary"
        or anc_name == "cat4FlagPrimary"
    ):
        public_mask = primary_flag_values != 1
    elif anc_name == "xyzFlagPrimary":
        public_mask = primary_flag_values != 2
    else:
        return None

    if self.pub_set == "public":
        return np.ma.make_mask(public_mask, shrink=False)
    elif self.pub_set == "nonpub":
        return np.ma.make_mask(~public_mask, shrink=False)
    elif self.pub_set == "all":
        return np.ma.make_mask(primary_flag_values < 0, shrink=False)

`metaStationName() -> str`

Returns the metaStationName.

Source code in cdippy/cdipnc.py

def metaStationName(self) -> str:
    """Returns the metaStationName."""
    if self.nc is None:
        return None
    return self.byte_arr_to_string(self.nc.variables["metaStationName"][:])

`set_dataset_info(stn: str, org: str, dataset_name: str, deployment: int = None) -> None`

Sets self.stn, org, filename, url and loads self.nc. The key to understanding all of this is that we are ultimately setting url, which can be an actual path to the nc file or a url to THREDDS DoDS service.

PARAMETERS

Paths are

/EXTERNAL/WW3/ [filename=.nc][CDIP stn like 192w3] /REALTIME/ [filename=rt.nc] /REALTIME/ [filename=_xy.nc] /ARCHIVE// [filename=.nc] /PREDEPLOY// [filename=__rt.nc] /PREDEPLOY// [filename=__xy.nc]

**Active deployment directories are PREDEPLOY (p0), MOORED (p1), OFFSITE (p2) and RECOVERED (p3) pX = p0|p1|p2|p3; deployment = dXX e.g. d01

Urls are

http://thredds.cdip.ucsd/thredds/dodsC/// [org1=external|cdip,org_dir=WW3|OWI etc] http://thredds.cdip.ucsd/thredds/dodsC///

Note: Since adding dataset_name, we no longer need the 5char stn id for org=cdip datasets. The p_val will be 'p1' for every dataset except active datasets in buoy states predeploy (p0), offsite (p2) and recovered (p3).

Source code in cdippy/cdipnc.py

def set_dataset_info(
    self, stn: str, org: str, dataset_name: str, deployment: int = None
) -> None:
    """Sets self.stn, org, filename, url and loads self.nc. The key to understanding all of
    this is that we are ultimately setting _url_, which can be an actual path to the
    nc file or a url to THREDDS DoDS service.

    PARAMETERS
    ----------
    stn : str
       Can be in 3char (e.g. 028) or 5char (e.g. 028p2) format for org=cdip
    org: str
        (Organization) Values are: cdip|ww3|external
    dataset_name : str
        Values: realtime|historic|archive|realtimexy|archivexy|
                predeploy|moored|offsite|recovered
    deployment : int [optional]
        Supply this to access specific station deployment data.
        Must be >= 1.

    Paths are:
        <top_dir>/EXTERNAL/WW3/<filename>  [filename=<stn>_<org_dir>_<dataset_name>.nc][CDIP stn like 192w3]
        <top_dir>/REALTIME/<filename> [filename=<stn><p1>_rt.nc]
        <top_dir>/REALTIME/<filename> [filename=<stn><p1>_xy.nc]
        <top_dir>/ARCHIVE/<stn>/<filename> [filename=<stn3><p1>_<deployment>.nc]
        <top_dir>/PREDEPLOY/<stn>/<filename> [filename=<stn3><pX>_<deployment>_rt.nc]**
        <top_dir>/PREDEPLOY/<stn>/<filename> [filename=<stn3><pX>_<deployment>_xy.nc]**

        **Active deployment directories are PREDEPLOY (p0), MOORED (p1), OFFSITE (p2)  and RECOVERED (p3)
          pX = p0|p1|p2|p3; deployment = dXX e.g. d01

    Urls are:
        http://thredds.cdip.ucsd/thredds/dodsC/<org1>/<org_dir>/<filename>
           [org1=external|cdip,org_dir=WW3|OWI etc]
        http://thredds.cdip.ucsd/thredds/dodsC/<org1>/<dataset_name>/<filename>

        Note:
           Since adding dataset_name, we no longer need the 5char stn id
           for org=cdip datasets. The p_val will be 'p1' for every dataset except
           active datasets in buoy states predeploy (p0), offsite (p2) and recovered (p3).
    """
    ext = ".nc"

    # Allowing data_dir to be either url or path
    __using_path = False
    if self.data_dir:
        if self.data_dir[0:4] == "http":
            self.THREDDS_url = self.data_dir
        else:
            __using_path = True

    if org is None:
        org = "cdip"
    if org == "cdip":
        org1 = "cdip"
    else:
        org1 = "external"
    # Org_dir follows 'external' and always uppercase (isn't used when org is cdip)
    org_dir = org.upper()

    # Handle the xy datasets
    if "xy" in dataset_name:
        ftype = "xy"
        dataset_name = dataset_name[0:-2]
    else:
        ftype = "rt"

    # Historic and archive both use archive as a dataset_dir
    # Lowercase for url, uppercase for url
    if dataset_name == "historic":
        dataset_dir = "archive"
    else:
        dataset_dir = dataset_name

    # Local paths use uppercase
    if __using_path:
        org1 = org1.upper()
        dataset_dir = dataset_dir.upper()
        if org == "cdip":
            url_pre = self.data_dir
        else:
            url_pre = "/".join([self.data_dir, org1])
    else:
        url_pre = "/".join([self.THREDDS_url, self.dods, org1])

    # Set p_val to 'p1' - it will get changed appropriately below
    stn = stn[0:3] + "p1"

    # Make filename and url
    if org == "cdip":
        if type(deployment) is not str:
            deployment = "d" + str(deployment).zfill(2)
        if dataset_name in self.active_datasets.keys():
            stn = stn[0:3] + self.active_datasets[dataset_name]
            dataset_name = "_".join([deployment, ftype])
        elif dataset_name == "realtime":
            dataset_name = ftype
        elif dataset_name == "historic":
            dataset_dir = "/".join([dataset_dir, stn])
        elif dataset_name == "archive" and deployment:
            dataset_name = deployment
            dataset_dir = "/".join([dataset_dir, stn])
        self.filename = "_".join([stn, dataset_name + ext])
        self.url = "/".join([url_pre, dataset_dir, self.filename])
    else:
        if stn[3:4] == "p" and org == "ww3":  # Cdip stn id
            stn_tmp = ndbc.get_wmo_id(stn[0:3])
        else:
            stn_tmp = stn
        self.filename = "_".join([stn_tmp, org_dir, dataset_name + ext])
        self.url = "/".join([url_pre, org_dir, self.filename])

    self.stn = stn
    self.org = org
    self.nc = self.get_nc()

`set_request_info(start: datetime = None, end: datetime = None, vrs: list = ['waveHs'], pub_set: str = 'public', apply_mask: bool = True) -> None`

Initializes data request information for get_request.

PARAMETERS

start : str or datetime [optional] : default Jan 1, 1975 Start time of data request (UTC). If provided as a string must be in the format Y-m-d H:M:S where Y is 4 chars and all others are 2 chars. Ex. '2020-03-30 19:32:56'. end : str or datetime [optional] : default now End time of data request (UTC). If not supplied defaults to now. vrs : list [optional] : default ['waveHs'] A list of the names of variables to retrieve. They all must start with the same prefix, e.g. ['waveHs', 'waveTp', 'waveDp'] pub_set: str [optional] values = public|nonpub|all Filters data based on data quality flags. apply_mask: bool [optional] default True Removes values from the masked array that have a mask value of True. Ex. If nonpub data is requested and apply_mask is False, the returned array will contain both public and nonpublic data (although public data records will have the mask value set to True). If apply_mask is set to True, only nonpub records will be returned.

Source code in cdippy/cdipnc.py

def set_request_info(
    self,
    start: datetime = None,
    end: datetime = None,
    vrs: list = ["waveHs"],
    pub_set: str = "public",
    apply_mask: bool = True,
) -> None:
    """Initializes data request information for get_request.

    PARAMETERS
    ----------
    start : str or datetime [optional] : default Jan 1, 1975
        Start time of data request (UTC). If provided as a string must
        be in the format Y-m-d H:M:S where Y is 4 chars and all others
        are 2 chars. Ex. '2020-03-30 19:32:56'.
    end : str or datetime [optional] : default now
        End time of data request (UTC). If not supplied defaults to now.
    vrs : list [optional] : default ['waveHs']
        A list of the names of variables to retrieve. They all must start
        with the same prefix, e.g. ['waveHs', 'waveTp', 'waveDp']
    pub_set: str [optional] values = public|nonpub|all
        Filters data based on data quality flags.
    apply_mask: bool [optional] default True
        Removes values from the masked array that have a mask value of True.
        Ex. If nonpub data is requested and apply_mask is False, the returned
        array will contain both public and nonpublic data (although public
        data records will have the mask value set to True). If apply_mask
        is set to True, only nonpub records will be returned.
    """
    if start is None:
        start = datetime(1975, 1, 1).replace(tzinfo=timezone.utc)
    if end is None:
        end = datetime.now(timezone.utc)
    self.set_timespan(start, end)
    self.pub_set = self.get_pub_set(pub_set)  # Standardize the set name
    if apply_mask is not None:
        self.apply_mask = apply_mask
    self.vrs = vrs

`set_timespan(start, end)`

Sets request timespan

Source code in cdippy/cdipnc.py

def set_timespan(self, start, end):
    """Sets request timespan"""
    if isinstance(start, str):
        self.start_dt = datetime.strptime(start, "%Y-%m-%d %H:%M:%S").replace(
            tzinfo=timezone.utc
        )
    else:
        self.start_dt = start
    if isinstance(end, str):
        self.end_dt = datetime.strptime(end, "%Y-%m-%d %H:%M:%S").replace(
            tzinfo=timezone.utc
        )
    else:
        self.end_dt = end
    self.start_stamp = cdip_utils.datetime_to_timestamp(self.start_dt)

    self.end_stamp = cdip_utils.datetime_to_timestamp(self.end_dt)

`Historic`

Bases: CDIPnc

Loads the historic nc file for a given station.

Source code in cdippy/cdipnc.py

class Historic(CDIPnc):
    """Loads the historic nc file for a given station."""

    def __init__(self, stn, data_dir=None, org=None):
        """For parameters see CDIPnc.set_dataset_info."""

        CDIPnc.__init__(self, data_dir)
        self.set_dataset_info(stn, org, "historic")

`init(stn, data_dir=None, org=None)`

For parameters see CDIPnc.set_dataset_info.

Source code in cdippy/cdipnc.py

def __init__(self, stn, data_dir=None, org=None):
    """For parameters see CDIPnc.set_dataset_info."""

    CDIPnc.__init__(self, data_dir)
    self.set_dataset_info(stn, org, "historic")

`Latest`

Bases: CDIPnc

Loads the latest_3day.nc and has methods for retrieving the data.

Source code in cdippy/cdipnc.py

class Latest(CDIPnc):
    """Loads the latest_3day.nc and has methods for retrieving the data."""

    # Do not apply the mask to get_request calls.
    apply_mask = False

    def __init__(self, data_dir: str = None):
        """PARAMETERS
        ----------
        data_dir : str [optional]
            Either a full path to a directory containing a local directory hierarchy
            of nc files. E.g. '/project/WNC' or a url to a THREDDS server.
        """

        CDIPnc.__init__(self, data_dir)
        self.labels = []  # - Holds stn labels, e.g. '100p1' for this instance
        # Set latest timespan (Latest_3day goes up to 30 minutes beyond now)
        now_plus_30min = datetime.now(timezone.utc) + timedelta(minutes=30)
        # Using the unix epoch to catch all data in latest_3day in case the file is very old
        epoch = datetime.fromtimestamp(0)
        self.set_timespan(epoch, now_plus_30min)

        # Set basic information and init self.nc
        self.filename = "latest_3day.nc"
        if self.data_dir:
            self.url = "/".join([self.data_dir, "REALTIME", self.filename])
        else:
            self.url = "/".join(
                [CDIPnc.THREDDS_url, CDIPnc.dods, "cdip/realtime/latest_3day.nc"]
            )
        self.nc = self.get_nc(self.url)

    def metaStationNames(self) -> list:
        """Get list of latest station names."""
        if self.nc is None:
            return None
        names = []
        for name_arr in self.nc.variables["metaStationName"]:
            names.append(self.byte_arr_to_string(name_arr))
        return names

    def metaSiteLabels(self) -> list:
        """Sets and returns self.labels, a list of station labels, e.g. ['100p1',...]."""
        if self.nc is None:
            return None
        for label_arr in self.nc.variables["metaSiteLabel"]:
            self.labels.append(self.byte_arr_to_string(label_arr))
        return self.labels

    def metaDeployLabels(self) -> list:
        """Returns a list of metaDeployLabels."""
        if self.nc is None:
            return None
        labels = []
        for label_arr in self.nc.variables["metaDeployLabel"]:
            labels.append(self.byte_arr_to_string(label_arr))
        return labels

    def metaDeployNumbers(self) -> list:
        """Returns a list of metaDeployNumbers."""
        if self.nc is None:
            return None
        numbers = []
        for number in self.nc.variables["metaDeployNumber"]:
            numbers.append(number)
        return numbers

    def metaWMOids(self) -> list:
        """Returns a list of WMO ids, e.g. ['46225',...]."""
        if self.nc is None:
            return None
        labels = []
        for label_arr in self.nc.variables["metaWMOid"]:
            labels.append(self.byte_arr_to_string(label_arr))
        return labels

    def metaLatitudes(self) -> list:
        """Returns a list of station latitudes, e.g. [23.4,...]."""
        if self.nc is None:
            return None
        lats = []
        for lat in self.nc.variables["metaLatitude"][:]:
            lats.append(lat)
        return lats

    def metaLongitudes(self) -> list:
        """Returns a list of station longitudes, e.g. [23.4,...]."""
        if self.nc is None:
            return None
        lons = []
        for lon in self.nc.variables["metaLongitude"][:]:
            lons.append(lon)
        return lons

    def metaWaterDepths(self) -> list:
        """Returns a list of station water depths."""
        if self.nc is None:
            return None
        depths = []
        for d in self.nc.variables["metaWaterDepth"][:]:
            depths.append(d)
        return depths

    def get_latest(
        self,
        pub_set: str = "public",
        meta_vars: list = None,
        params: list = None,
        array_format=True,
    ) -> list:
        """
        By default, array_format = True, it will return a dictionary of numpy masked
        arrays of the latest requested parameters as well as metadata information.

        If array_format = False, it returns a list of dicts. Each dict will contain
        latest station data and metadata.

        Parameter data values that are masked or non-existant are set to np.nan.
        Time values (e.g. 'waveTime') for the wave data if masked or non-existant
        are set to None.

        Both meta_vars and params if None (or not included in the argument list) will
        return default sets of meta_vars and parameters. If meta_vars and params are set
        just those will be returned.
        """

        # Use these if params (or meta_vars) is None

        default_params_by_type = {
            "wave": ["waveHs", "waveTp", "waveDp", "waveTa"],
            "sst": ["sstSeaSurfaceTemperature"],
            "acm": ["acmSpeed", "acmDirection"],
            "cat4": ["cat4AirTemperature"],
            "gps": ["gpsLongitude", "gpsLatitude"],
            "meta": [
                "metaLongitude",
                "metaLatitude",
                "metaWaterDepth",
                "metaStationName",
                "metaSiteLabel",
                "metaDeployLabel",
                "metaWMOid",
            ],
        }

        if params is None:
            params = []
            for t in default_params_by_type:
                if "meta" not in t:
                    params += default_params_by_type[t]

        # Initialize requested parameters by type

        requested_params = {}
        for typ in default_params_by_type:
            for p in params:
                if typ in p:
                    if typ not in requested_params:
                        requested_params[typ] = []
                    requested_params[typ].append(p)
        requested_types = list(set(requested_params.keys()))

        self.pub_set = self.get_pub_set(pub_set)

        # Load meta variables

        if meta_vars is None:
            meta_vars = default_params_by_type["meta"]

        meta = {}
        for p in meta_vars:
            meta[p] = getattr(self, p + "s")()

        # We always need these to remove duplicates

        site_labels = self.metaSiteLabels()
        deploy_labels = self.metaDeployLabels()

        # Loop through the data types (e.g. 'wave', 'sst', 'acm' ...)
        # and grab data for the parameters requested.

        req = {}
        for typ in requested_types:

            # Add the parameters requested into the request list
            self.vrs = requested_params[typ].copy()

            # Add the necessary time variables into the request list
            self.vrs += [typ + "Time", typ + "TimeOffset", typ + "TimeBounds"]

            # Make the data request for the included parameters and time variables.
            req[typ] = self.get_request()

            # We don't quality check the GPS
            if typ != "gps":
                pub_mask = self.make_pub_mask(typ + "FlagPrimary", None, None)
                mask = np.ma.mask_or(req[typ][typ + "TimeOffset"].mask, pub_mask)
                req[typ][typ + "TimeOffset"].mask = mask

        num_stations = self.get_var("waveTimeOffset").shape[1]

        result = {}  # Store station dictionaries
        for s in range(num_stations):
            stn = {}

            # To remove duplicates (p1 usually) use the site label as a key, e.g. 162p1
            # We will be keeping the pX with the greatest deploy label.

            site_label = site_labels[s]
            if site_label in result:
                if deploy_labels[s] < result[site_label]["deploy_label"]:
                    continue

            stn["deploy_label"] = deploy_labels[s]

            latest_timestamp = -1  # To help find a time
            latest_type = None  # for the group of
            waves_included = False  # parameters.
            has_data = False
            for typ in requested_types:
                offsets = req[typ][typ + "TimeOffset"][:, s]
                t_n = typ + "Time"
                tb_n = typ + "TimeBounds"
                # Find the highest data index (latest data) for the type
                # using the TimeOffset.
                idx = -1
                if self.__has_a_number(offsets):
                    idx = np.ma.flatnotmasked_edges(offsets)[1]
                    stn[t_n] = req[typ][t_n][idx] + offsets[idx]
                    stn[tb_n] = np.ma.array([None, None])
                    for i in [0, 1]:
                        stn[tb_n][i] = req[typ][tb_n][idx][i] + offsets[idx]
                    for pm in requested_params[typ]:
                        stn[pm] = req[typ][pm][idx, s]
                    if typ != "gps":
                        has_data = True
                else:
                    stn[t_n] = np.nan
                    stn[tb_n] = np.nan
                    for pm in requested_params[typ]:
                        stn[pm] = np.nan
                if stn[t_n] is not np.nan and typ != "gps":
                    if typ == "wave":
                        waves_included = True
                    if stn[t_n] > latest_timestamp:
                        latest_timestamp = stn[t_n]
                        latest_type = typ
            stn["hasParameterData"] = has_data
            if latest_type is not None:
                group_type = "wave" if waves_included else latest_type
                stn["groupTime"] = stn[group_type + "Time"]
                stn["groupTimeBounds"] = stn[group_type + "TimeBounds"]
                least_timestamp = max(stn["groupTime"] - 1800, 0)
                for typ in requested_types:
                    t_n = typ + "Time"
                    if stn[t_n] is not np.nan and stn[t_n] < least_timestamp:
                        stn[t_n] = np.nan
            else:
                stn["groupTime"] = np.nan
                stn["groupTimeBounds"] = np.nan
            for m in meta_vars:
                stn[m] = meta[m][s]
            if stn["hasParameterData"] or (len(params) == 0 and len(meta_vars) > 0):
                result[site_label] = stn

        # To satisfy the original array_format = False, remove the site Labels

        new_result = []
        for site_label in result:
            new_result.append(result[site_label])
        result = new_result

        # Return parameters as lists in a single dict rather than a list of dicts.

        array_result = {}
        if array_format:
            for r in result:
                for key in r:
                    if key not in array_result:
                        array_result[key] = []
                    array_result[key].append(r[key])
            result = array_result

        return result

    def __has_a_number(self, arr):
        """Test if there is at least one number in the array"""
        for x in arr:
            if isinstance(x, numbers.Number):
                return True
        return False

`__has_a_number(arr)`

Test if there is at least one number in the array

Source code in cdippy/cdipnc.py

def __has_a_number(self, arr):
    """Test if there is at least one number in the array"""
    for x in arr:
        if isinstance(x, numbers.Number):
            return True
    return False

`init(data_dir: str = None)`

PARAMETERS

data_dir : str [optional] Either a full path to a directory containing a local directory hierarchy of nc files. E.g. '/project/WNC' or a url to a THREDDS server.

Source code in cdippy/cdipnc.py

def __init__(self, data_dir: str = None):
    """PARAMETERS
    ----------
    data_dir : str [optional]
        Either a full path to a directory containing a local directory hierarchy
        of nc files. E.g. '/project/WNC' or a url to a THREDDS server.
    """

    CDIPnc.__init__(self, data_dir)
    self.labels = []  # - Holds stn labels, e.g. '100p1' for this instance
    # Set latest timespan (Latest_3day goes up to 30 minutes beyond now)
    now_plus_30min = datetime.now(timezone.utc) + timedelta(minutes=30)
    # Using the unix epoch to catch all data in latest_3day in case the file is very old
    epoch = datetime.fromtimestamp(0)
    self.set_timespan(epoch, now_plus_30min)

    # Set basic information and init self.nc
    self.filename = "latest_3day.nc"
    if self.data_dir:
        self.url = "/".join([self.data_dir, "REALTIME", self.filename])
    else:
        self.url = "/".join(
            [CDIPnc.THREDDS_url, CDIPnc.dods, "cdip/realtime/latest_3day.nc"]
        )
    self.nc = self.get_nc(self.url)

`get_latest(pub_set: str = 'public', meta_vars: list = None, params: list = None, array_format=True) -> list`

By default, array_format = True, it will return a dictionary of numpy masked arrays of the latest requested parameters as well as metadata information.

If array_format = False, it returns a list of dicts. Each dict will contain latest station data and metadata.

Parameter data values that are masked or non-existant are set to np.nan. Time values (e.g. 'waveTime') for the wave data if masked or non-existant are set to None.

Both meta_vars and params if None (or not included in the argument list) will return default sets of meta_vars and parameters. If meta_vars and params are set just those will be returned.

Source code in cdippy/cdipnc.py

def get_latest(
    self,
    pub_set: str = "public",
    meta_vars: list = None,
    params: list = None,
    array_format=True,
) -> list:
    """
    By default, array_format = True, it will return a dictionary of numpy masked
    arrays of the latest requested parameters as well as metadata information.

    If array_format = False, it returns a list of dicts. Each dict will contain
    latest station data and metadata.

    Parameter data values that are masked or non-existant are set to np.nan.
    Time values (e.g. 'waveTime') for the wave data if masked or non-existant
    are set to None.

    Both meta_vars and params if None (or not included in the argument list) will
    return default sets of meta_vars and parameters. If meta_vars and params are set
    just those will be returned.
    """

    # Use these if params (or meta_vars) is None

    default_params_by_type = {
        "wave": ["waveHs", "waveTp", "waveDp", "waveTa"],
        "sst": ["sstSeaSurfaceTemperature"],
        "acm": ["acmSpeed", "acmDirection"],
        "cat4": ["cat4AirTemperature"],
        "gps": ["gpsLongitude", "gpsLatitude"],
        "meta": [
            "metaLongitude",
            "metaLatitude",
            "metaWaterDepth",
            "metaStationName",
            "metaSiteLabel",
            "metaDeployLabel",
            "metaWMOid",
        ],
    }

    if params is None:
        params = []
        for t in default_params_by_type:
            if "meta" not in t:
                params += default_params_by_type[t]

    # Initialize requested parameters by type

    requested_params = {}
    for typ in default_params_by_type:
        for p in params:
            if typ in p:
                if typ not in requested_params:
                    requested_params[typ] = []
                requested_params[typ].append(p)
    requested_types = list(set(requested_params.keys()))

    self.pub_set = self.get_pub_set(pub_set)

    # Load meta variables

    if meta_vars is None:
        meta_vars = default_params_by_type["meta"]

    meta = {}
    for p in meta_vars:
        meta[p] = getattr(self, p + "s")()

    # We always need these to remove duplicates

    site_labels = self.metaSiteLabels()
    deploy_labels = self.metaDeployLabels()

    # Loop through the data types (e.g. 'wave', 'sst', 'acm' ...)
    # and grab data for the parameters requested.

    req = {}
    for typ in requested_types:

        # Add the parameters requested into the request list
        self.vrs = requested_params[typ].copy()

        # Add the necessary time variables into the request list
        self.vrs += [typ + "Time", typ + "TimeOffset", typ + "TimeBounds"]

        # Make the data request for the included parameters and time variables.
        req[typ] = self.get_request()

        # We don't quality check the GPS
        if typ != "gps":
            pub_mask = self.make_pub_mask(typ + "FlagPrimary", None, None)
            mask = np.ma.mask_or(req[typ][typ + "TimeOffset"].mask, pub_mask)
            req[typ][typ + "TimeOffset"].mask = mask

    num_stations = self.get_var("waveTimeOffset").shape[1]

    result = {}  # Store station dictionaries
    for s in range(num_stations):
        stn = {}

        # To remove duplicates (p1 usually) use the site label as a key, e.g. 162p1
        # We will be keeping the pX with the greatest deploy label.

        site_label = site_labels[s]
        if site_label in result:
            if deploy_labels[s] < result[site_label]["deploy_label"]:
                continue

        stn["deploy_label"] = deploy_labels[s]

        latest_timestamp = -1  # To help find a time
        latest_type = None  # for the group of
        waves_included = False  # parameters.
        has_data = False
        for typ in requested_types:
            offsets = req[typ][typ + "TimeOffset"][:, s]
            t_n = typ + "Time"
            tb_n = typ + "TimeBounds"
            # Find the highest data index (latest data) for the type
            # using the TimeOffset.
            idx = -1
            if self.__has_a_number(offsets):
                idx = np.ma.flatnotmasked_edges(offsets)[1]
                stn[t_n] = req[typ][t_n][idx] + offsets[idx]
                stn[tb_n] = np.ma.array([None, None])
                for i in [0, 1]:
                    stn[tb_n][i] = req[typ][tb_n][idx][i] + offsets[idx]
                for pm in requested_params[typ]:
                    stn[pm] = req[typ][pm][idx, s]
                if typ != "gps":
                    has_data = True
            else:
                stn[t_n] = np.nan
                stn[tb_n] = np.nan
                for pm in requested_params[typ]:
                    stn[pm] = np.nan
            if stn[t_n] is not np.nan and typ != "gps":
                if typ == "wave":
                    waves_included = True
                if stn[t_n] > latest_timestamp:
                    latest_timestamp = stn[t_n]
                    latest_type = typ
        stn["hasParameterData"] = has_data
        if latest_type is not None:
            group_type = "wave" if waves_included else latest_type
            stn["groupTime"] = stn[group_type + "Time"]
            stn["groupTimeBounds"] = stn[group_type + "TimeBounds"]
            least_timestamp = max(stn["groupTime"] - 1800, 0)
            for typ in requested_types:
                t_n = typ + "Time"
                if stn[t_n] is not np.nan and stn[t_n] < least_timestamp:
                    stn[t_n] = np.nan
        else:
            stn["groupTime"] = np.nan
            stn["groupTimeBounds"] = np.nan
        for m in meta_vars:
            stn[m] = meta[m][s]
        if stn["hasParameterData"] or (len(params) == 0 and len(meta_vars) > 0):
            result[site_label] = stn

    # To satisfy the original array_format = False, remove the site Labels

    new_result = []
    for site_label in result:
        new_result.append(result[site_label])
    result = new_result

    # Return parameters as lists in a single dict rather than a list of dicts.

    array_result = {}
    if array_format:
        for r in result:
            for key in r:
                if key not in array_result:
                    array_result[key] = []
                array_result[key].append(r[key])
        result = array_result

    return result

`metaDeployLabels() -> list`

Returns a list of metaDeployLabels.

Source code in cdippy/cdipnc.py

def metaDeployLabels(self) -> list:
    """Returns a list of metaDeployLabels."""
    if self.nc is None:
        return None
    labels = []
    for label_arr in self.nc.variables["metaDeployLabel"]:
        labels.append(self.byte_arr_to_string(label_arr))
    return labels

`metaDeployNumbers() -> list`

Returns a list of metaDeployNumbers.

Source code in cdippy/cdipnc.py

def metaDeployNumbers(self) -> list:
    """Returns a list of metaDeployNumbers."""
    if self.nc is None:
        return None
    numbers = []
    for number in self.nc.variables["metaDeployNumber"]:
        numbers.append(number)
    return numbers

`metaLatitudes() -> list`

Returns a list of station latitudes, e.g. [23.4,...].

Source code in cdippy/cdipnc.py

def metaLatitudes(self) -> list:
    """Returns a list of station latitudes, e.g. [23.4,...]."""
    if self.nc is None:
        return None
    lats = []
    for lat in self.nc.variables["metaLatitude"][:]:
        lats.append(lat)
    return lats

`metaLongitudes() -> list`

Returns a list of station longitudes, e.g. [23.4,...].

Source code in cdippy/cdipnc.py

def metaLongitudes(self) -> list:
    """Returns a list of station longitudes, e.g. [23.4,...]."""
    if self.nc is None:
        return None
    lons = []
    for lon in self.nc.variables["metaLongitude"][:]:
        lons.append(lon)
    return lons

`metaSiteLabels() -> list`

Sets and returns self.labels, a list of station labels, e.g. ['100p1',...].

Source code in cdippy/cdipnc.py

def metaSiteLabels(self) -> list:
    """Sets and returns self.labels, a list of station labels, e.g. ['100p1',...]."""
    if self.nc is None:
        return None
    for label_arr in self.nc.variables["metaSiteLabel"]:
        self.labels.append(self.byte_arr_to_string(label_arr))
    return self.labels

`metaStationNames() -> list`

Get list of latest station names.

Source code in cdippy/cdipnc.py

def metaStationNames(self) -> list:
    """Get list of latest station names."""
    if self.nc is None:
        return None
    names = []
    for name_arr in self.nc.variables["metaStationName"]:
        names.append(self.byte_arr_to_string(name_arr))
    return names

`metaWMOids() -> list`

Returns a list of WMO ids, e.g. ['46225',...].

Source code in cdippy/cdipnc.py

def metaWMOids(self) -> list:
    """Returns a list of WMO ids, e.g. ['46225',...]."""
    if self.nc is None:
        return None
    labels = []
    for label_arr in self.nc.variables["metaWMOid"]:
        labels.append(self.byte_arr_to_string(label_arr))
    return labels

`metaWaterDepths() -> list`

Returns a list of station water depths.

Source code in cdippy/cdipnc.py

def metaWaterDepths(self) -> list:
    """Returns a list of station water depths."""
    if self.nc is None:
        return None
    depths = []
    for d in self.nc.variables["metaWaterDepth"][:]:
        depths.append(d)
    return depths

`Realtime`

Bases: CDIPnc

Loads the realtime nc file for the given station.

Source code in cdippy/cdipnc.py

class Realtime(CDIPnc):
    """Loads the realtime nc file for the given station."""

    def __init__(self, stn: str, data_dir: str = None, org: str = None):
        """For parameters: See CDIPnc.set_dataset_info."""
        CDIPnc.__init__(self, data_dir)
        self.set_dataset_info(stn, org, "realtime")

`init(stn: str, data_dir: str = None, org: str = None)`

For parameters: See CDIPnc.set_dataset_info.

Source code in cdippy/cdipnc.py

def __init__(self, stn: str, data_dir: str = None, org: str = None):
    """For parameters: See CDIPnc.set_dataset_info."""
    CDIPnc.__init__(self, data_dir)
    self.set_dataset_info(stn, org, "realtime")

`RealtimeXY`

Bases: Archive

Loads the realtime xy nc file for the given station.

Source code in cdippy/cdipnc.py

class RealtimeXY(Archive):
    """Loads the realtime xy nc file for the given station."""

    def __init__(self, stn, data_dir=None, org=None):
        """For parameters see CDIPnc.set_dataset_info."""
        CDIPnc.__init__(self, data_dir)
        self.set_dataset_info(stn, org, "realtimexy")

`init(stn, data_dir=None, org=None)`

For parameters see CDIPnc.set_dataset_info.

Source code in cdippy/cdipnc.py

def __init__(self, stn, data_dir=None, org=None):
    """For parameters see CDIPnc.set_dataset_info."""
    CDIPnc.__init__(self, data_dir)
    self.set_dataset_info(stn, org, "realtimexy")

`cdippy.nchashes`

`NcHashes`

A class that checks for changes to datasets since by reading the online list of historic netCDF file hashes.

Source code in cdippy/nchashes.py

class NcHashes:
    """
    A class that checks for changes to datasets since by reading the online list of historic netCDF file hashes.
    """

    hashes_url = "http://cdip.ucsd.edu/data_access/metadata/wavecdf_by_datemod.txt"
    new_hashes = {}

    def __init__(self, hash_file_location=""):
        self.hash_pkl = hash_file_location + "/HASH.pkl"

    def load_hash_table(self):
        lines = url_utils.read_url(self.hashes_url).strip().split("\n")
        for line in lines:
            if line[0:8] == "filename":
                continue
            fields = line.split("\t")
            if len(fields) < 7:
                continue
            self.new_hashes[fields[0]] = fields[6]

    def compare_hash_tables(self) -> list:
        """
        Compare the current in-memory list of files, loaded by `load_hash_table` to the list saved in HASH.pkl and return a list of stations that are new or have changed.

        Returns:
            changed ([str]): A list of nc files that have changed or are since HASH.pkl was last saved.
        """
        old_hashes = self._get_old_hashes()
        changed = []
        if old_hashes:
            if len(self.new_hashes) == 0:
                return []
            for key in self.new_hashes:
                if key not in old_hashes.keys() or (
                    key in old_hashes.keys() and old_hashes[key] != self.new_hashes[key]
                ):
                    changed.append(key)
        return changed

    def save_new_hashes(self):
        cdip_utils.pkl_dump(self.new_hashes, self.hash_pkl)

    def _get_old_hashes(self):
        return cdip_utils.pkl_load(self.hash_pkl)

`compare_hash_tables() -> list`

Compare the current in-memory list of files, loaded by load_hash_table to the list saved in HASH.pkl and return a list of stations that are new or have changed.

Returns:	`changed`( `[str]` ) – A list of nc files that have changed or are since HASH.pkl was last saved.

Source code in cdippy/nchashes.py

def compare_hash_tables(self) -> list:
    """
    Compare the current in-memory list of files, loaded by `load_hash_table` to the list saved in HASH.pkl and return a list of stations that are new or have changed.

    Returns:
        changed ([str]): A list of nc files that have changed or are since HASH.pkl was last saved.
    """
    old_hashes = self._get_old_hashes()
    changed = []
    if old_hashes:
        if len(self.new_hashes) == 0:
            return []
        for key in self.new_hashes:
            if key not in old_hashes.keys() or (
                key in old_hashes.keys() and old_hashes[key] != self.new_hashes[key]
            ):
                changed.append(key)
    return changed

`cdippy.ncstats`

`NcStats`

Bases: StnData

Produces data availability statistics for a given station.

This class provides methods to

Return counts for the entire station record, intended for use by web applications.
Save availability counts (e.g., xyz counts) for individual NetCDF files. Updates to totals are calculated by re-summarizing any files that have changed and aggregating all files to produce new totals.

Source code in cdippy/ncstats.py

class NcStats(StnData):
    """Produces data availability statistics for a given station.

    This class provides methods to:
        * Return counts for the entire station record, intended for use by web applications.
        * Save availability counts (e.g., xyz counts) for individual NetCDF files.
          Updates to totals are calculated by re-summarizing any files that have changed
          and aggregating all files to produce new totals.
    """

    QC_flags = ["waveFlagPrimary", "sstFlagPrimary", "gpsStatusFlags"]

    def __init__(self, stn: str, data_dir: str = None):
        """Initializes an NcStats instance.

        Args:
            stn (str): Station identifier.
            data_dir (str, optional): Path to the data directory. Defaults to None.
        """
        StnData.__init__(self, stn, data_dir)

        self.date_modifieds = {}
        self.start = datetime.strptime("1975-01-01 00:00:00", "%Y-%m-%d %H:%M:%S")
        self.end = datetime.now(timezone.utc)
        self.pub_set = "all"

    def make_stats(self) -> dict:
        """Computes station-level statistics.

        Returns:
            dict: A dictionary containing:
                - "flag_counts" (dict): Flag count summaries for the station.
                - "deployments" (dict): Deployment summary statistics.
        """
        result = {}
        result["flag_counts"] = self.flag_counts()
        result["deployments"] = self.deployment_summary()
        return result

    def deployment_summary(self) -> dict:
        """Generates deployment summary statistics.

        Returns:
            dict: A dictionary containing:
                - Deployment IDs as keys, with values containing start and end coverage times.
                - "number_of_deployments" (int): The number of deployments.
        """
        self.load_nc_files()
        result = {}
        dep_cnt = 0
        for nc_name in self.nc_files:
            dep = nc_name[-6:-3]
            if dep[0:1] == "d":
                dep_cnt += 1
                result[dep] = {}
                result[dep]["time_coverage_start"] = self.nc_files[
                    nc_name
                ].get_coverage_start()
                result[dep]["time_coverage_end"] = self.nc_files[
                    nc_name
                ].get_coverage_end()
        result["number_of_deployments"] = dep_cnt
        return result

    def load_nc_files(self, types: list = ["realtime", "historic", "archive"]) -> dict:
        """Loads NetCDF files for the station.

        Args:
            types (list, optional): List of file categories to load. Defaults to
                ["realtime", "historic", "archive"].

        Returns:
            dict: Dictionary of NetCDF file objects keyed by filename.
        """
        self.nc_files = self.get_nc_files(types)

    def load_file(self, nc_filename: str):
        """Loads a specific NetCDF file into the instance.

        Args:
            nc_filename (str): Filename of the NetCDF file.

        Sets:
            self.nc: Loaded NetCDF file object.
        """
        if nc_filename in self.nc_files:
            self.nc = self.nc_files[nc_filename]
        else:
            self.nc = self.get_nc(self.filename_to_url(nc_filename))

    def load_date_modifieds(self):
        pass

    def store_date_modified(self):
        pass

    def nc_file_summaries(self) -> dict:
        self.load_nc_files()
        result = {}
        for nc_name in self.nc_files:
            result[nc_name] = self.nc_file_summary(nc_name)
        return result

    def nc_file_summary(self, nc_filename: str) -> dict:
        """Computes a summary for a given NetCDF file.

        Args:
            nc_filename (str): Name of the NetCDF file.

        Returns:
            dict: Summary statistics for the file, including:
                - "flag_counts" (dict): Flag count statistics.
        """
        if self.nc is None:
            self.load_file(nc_filename)
        result = {}
        # - Currently have just one summary
        result["flag_counts"] = self.flag_counts()
        return result

    def flag_counts(self, QC_flags: list = None) -> dict:
        """Computes counts of flag variables for the entire station record.

        Args:
            QC_flags (list, optional): List of quality-control flag variable names.
                Defaults to `self.QC_flags`.

        Returns:
            dict: A dictionary containing:
                - "totals" (dict[str, pandas.DataFrame]): Total counts per flag.
                - "by_month" (dict[str, pandas.DataFrame]): Monthly counts per flag.
        """
        result = {"totals": {}, "by_month": {}}
        if not QC_flags:
            QC_flags = self.QC_flags
        for flag_name in QC_flags:
            dim = self.meta.get_var_prefix(flag_name)
            self.data = self.get_series(self.start, self.end, [flag_name], self.pub_set)
            cat_var = self.make_categorical_flag_var(flag_name)
            result["totals"][flag_name] = self.total_count(cat_var)
            result["by_month"][flag_name] = self.by_month_count(cat_var, dim)
        return result

    def total_count(self, cat_var) -> pd.DataFrame:
        """Counts totals for a given categorical flag variable.

        Args:
            cat_var (pandas.Categorical): Categorical flag variable.

        Returns:
            pandas.DataFrame: DataFrame with counts grouped by category.
        """
        return pd.DataFrame({"cnt": cat_var}).groupby(cat_var).count()

    def by_month_count(self, cat_var, dim: str) -> pd.DataFrame:
        """Counts observations by month for a given flag variable.

        Args:
            cat_var (pandas.Categorical): Categorical flag variable.
            dim (str): Dimension name prefix for the time variable.

        Returns:
            pandas.DataFrame: DataFrame with counts grouped by month and flag value.
        """
        df = pd.DataFrame(
            {"cnt": cat_var}, index=pd.to_datetime(self.data[dim + "Time"], unit="s")
        )
        mon_map = df.index.map(lambda x: str(x.year) + str("{:02d}".format(x.month)))
        return df.groupby([mon_map, cat_var]).count().fillna(0).astype(int)

    def make_categorical_flag_var(self, flag_name: str):
        cat = pd.Categorical(
            self.data[flag_name], categories=self.meta.get_flag_values(flag_name)
        )
        return cat.rename_categories(self.meta.get_flag_meanings(flag_name))

`init(stn: str, data_dir: str = None)`

Initializes an NcStats instance.

Parameters:	`stn` (`str`) – Station identifier. `data_dir` (`str`, default: `None` ) – Path to the data directory. Defaults to None.

Source code in cdippy/ncstats.py

def __init__(self, stn: str, data_dir: str = None):
    """Initializes an NcStats instance.

    Args:
        stn (str): Station identifier.
        data_dir (str, optional): Path to the data directory. Defaults to None.
    """
    StnData.__init__(self, stn, data_dir)

    self.date_modifieds = {}
    self.start = datetime.strptime("1975-01-01 00:00:00", "%Y-%m-%d %H:%M:%S")
    self.end = datetime.now(timezone.utc)
    self.pub_set = "all"

`by_month_count(cat_var, dim: str) -> pd.DataFrame`

Counts observations by month for a given flag variable.

Parameters:	`cat_var` (`Categorical`) – Categorical flag variable. `dim` (`str`) – Dimension name prefix for the time variable.

Returns:	`DataFrame` – pandas.DataFrame: DataFrame with counts grouped by month and flag value.

Source code in cdippy/ncstats.py

def by_month_count(self, cat_var, dim: str) -> pd.DataFrame:
    """Counts observations by month for a given flag variable.

    Args:
        cat_var (pandas.Categorical): Categorical flag variable.
        dim (str): Dimension name prefix for the time variable.

    Returns:
        pandas.DataFrame: DataFrame with counts grouped by month and flag value.
    """
    df = pd.DataFrame(
        {"cnt": cat_var}, index=pd.to_datetime(self.data[dim + "Time"], unit="s")
    )
    mon_map = df.index.map(lambda x: str(x.year) + str("{:02d}".format(x.month)))
    return df.groupby([mon_map, cat_var]).count().fillna(0).astype(int)

`deployment_summary() -> dict`

Generates deployment summary statistics.

Returns:	`dict`( `dict` ) – A dictionary containing: - Deployment IDs as keys, with values containing start and end coverage times. - "number_of_deployments" (int): The number of deployments.

Source code in cdippy/ncstats.py

def deployment_summary(self) -> dict:
    """Generates deployment summary statistics.

    Returns:
        dict: A dictionary containing:
            - Deployment IDs as keys, with values containing start and end coverage times.
            - "number_of_deployments" (int): The number of deployments.
    """
    self.load_nc_files()
    result = {}
    dep_cnt = 0
    for nc_name in self.nc_files:
        dep = nc_name[-6:-3]
        if dep[0:1] == "d":
            dep_cnt += 1
            result[dep] = {}
            result[dep]["time_coverage_start"] = self.nc_files[
                nc_name
            ].get_coverage_start()
            result[dep]["time_coverage_end"] = self.nc_files[
                nc_name
            ].get_coverage_end()
    result["number_of_deployments"] = dep_cnt
    return result

`flag_counts(QC_flags: list = None) -> dict`

Computes counts of flag variables for the entire station record.

Parameters:	`QC_flags` (`list`, default: `None` ) – List of quality-control flag variable names. Defaults to `self.QC_flags`.

Returns:	`dict`( `dict` ) – A dictionary containing: - "totals" (dict[str, pandas.DataFrame]): Total counts per flag. - "by_month" (dict[str, pandas.DataFrame]): Monthly counts per flag.

Source code in cdippy/ncstats.py

def flag_counts(self, QC_flags: list = None) -> dict:
    """Computes counts of flag variables for the entire station record.

    Args:
        QC_flags (list, optional): List of quality-control flag variable names.
            Defaults to `self.QC_flags`.

    Returns:
        dict: A dictionary containing:
            - "totals" (dict[str, pandas.DataFrame]): Total counts per flag.
            - "by_month" (dict[str, pandas.DataFrame]): Monthly counts per flag.
    """
    result = {"totals": {}, "by_month": {}}
    if not QC_flags:
        QC_flags = self.QC_flags
    for flag_name in QC_flags:
        dim = self.meta.get_var_prefix(flag_name)
        self.data = self.get_series(self.start, self.end, [flag_name], self.pub_set)
        cat_var = self.make_categorical_flag_var(flag_name)
        result["totals"][flag_name] = self.total_count(cat_var)
        result["by_month"][flag_name] = self.by_month_count(cat_var, dim)
    return result

`load_file(nc_filename: str)`

Loads a specific NetCDF file into the instance.

Parameters:	`nc_filename` (`str`) – Filename of the NetCDF file.

Sets

self.nc: Loaded NetCDF file object.

Source code in cdippy/ncstats.py

def load_file(self, nc_filename: str):
    """Loads a specific NetCDF file into the instance.

    Args:
        nc_filename (str): Filename of the NetCDF file.

    Sets:
        self.nc: Loaded NetCDF file object.
    """
    if nc_filename in self.nc_files:
        self.nc = self.nc_files[nc_filename]
    else:
        self.nc = self.get_nc(self.filename_to_url(nc_filename))

`load_nc_files(types: list = ['realtime', 'historic', 'archive']) -> dict`

Loads NetCDF files for the station.

Parameters:	`types` (`list`, default: `['realtime', 'historic', 'archive']` ) – List of file categories to load. Defaults to ["realtime", "historic", "archive"].

Returns:	`dict`( `dict` ) – Dictionary of NetCDF file objects keyed by filename.

Source code in cdippy/ncstats.py

def load_nc_files(self, types: list = ["realtime", "historic", "archive"]) -> dict:
    """Loads NetCDF files for the station.

    Args:
        types (list, optional): List of file categories to load. Defaults to
            ["realtime", "historic", "archive"].

    Returns:
        dict: Dictionary of NetCDF file objects keyed by filename.
    """
    self.nc_files = self.get_nc_files(types)

`make_stats() -> dict`

Computes station-level statistics.

Returns:	`dict`( `dict` ) – A dictionary containing: - "flag_counts" (dict): Flag count summaries for the station. - "deployments" (dict): Deployment summary statistics.

Source code in cdippy/ncstats.py

def make_stats(self) -> dict:
    """Computes station-level statistics.

    Returns:
        dict: A dictionary containing:
            - "flag_counts" (dict): Flag count summaries for the station.
            - "deployments" (dict): Deployment summary statistics.
    """
    result = {}
    result["flag_counts"] = self.flag_counts()
    result["deployments"] = self.deployment_summary()
    return result

`nc_file_summary(nc_filename: str) -> dict`

Computes a summary for a given NetCDF file.

Parameters:	`nc_filename` (`str`) – Name of the NetCDF file.

Returns:	`dict`( `dict` ) – Summary statistics for the file, including: - "flag_counts" (dict): Flag count statistics.

Source code in cdippy/ncstats.py

def nc_file_summary(self, nc_filename: str) -> dict:
    """Computes a summary for a given NetCDF file.

    Args:
        nc_filename (str): Name of the NetCDF file.

    Returns:
        dict: Summary statistics for the file, including:
            - "flag_counts" (dict): Flag count statistics.
    """
    if self.nc is None:
        self.load_file(nc_filename)
    result = {}
    # - Currently have just one summary
    result["flag_counts"] = self.flag_counts()
    return result

`total_count(cat_var) -> pd.DataFrame`

Counts totals for a given categorical flag variable.

Parameters:	`cat_var` (`Categorical`) – Categorical flag variable.

Returns:	`DataFrame` – pandas.DataFrame: DataFrame with counts grouped by category.

Source code in cdippy/ncstats.py

def total_count(self, cat_var) -> pd.DataFrame:
    """Counts totals for a given categorical flag variable.

    Args:
        cat_var (pandas.Categorical): Categorical flag variable.

    Returns:
        pandas.DataFrame: DataFrame with counts grouped by category.
    """
    return pd.DataFrame({"cnt": cat_var}).groupby(cat_var).count()

`cdippy.ndbc`

Methods for working with NDBC

`get_stn_info(wmo_id)`

Work in progress, querying ndbc sos service.

Source code in cdippy/ndbc.py

def get_stn_info(wmo_id):
    """Work in progress, querying ndbc sos service."""
    qry = "&".join([request, service, version, outputformat, describe_stn + wmo_id])
    url = "?".join([sos_base, qry])
    root = url_utils.load_et_root(url)
    results = []
    url_utils.rfindt(root, results, "description")

`get_wmo_id(stn, store=True, filepath='.')`

Queries cdip wmo id table for a given station. Drops pickle file locally.

Source code in cdippy/ndbc.py

def get_wmo_id(
    stn,
    store=True,
    filepath=".",
):
    """Queries cdip wmo id table for a given station. Drops pickle file locally."""
    pkl_fl = filepath + "/WMO_IDS.pkl" if store else None
    now = datetime.now(timezone.utc)
    if not pkl_fl or now.minute == 23 or not os.path.isfile(pkl_fl):
        url = "/".join([cdip_base, "wmo_ids"])
        r = url_utils.read_url(url)
        ids = {}
        for line in r.splitlines():
            ids[line[0:3]] = line[5:].strip()
        if pkl_fl:
            cdip_utils.pkl_dump(ids, pkl_fl)
    else:
        ids = cdip_utils.pkl_load(pkl_fl)
    if stn in ids:
        return ids[stn]
    return None

`cdippy.plotting`

`make_annual_hs_boxplot(stn: str, year: int) -> Figure`

Create a boxplot of annual significant wave heights for a station.

Parameters:	`stn` (`str`) – A 5-char station identifier, e.g. '100p1'. `year` (`int`) – The year to plot.

Returns:	`fig`( `Figure` ) – A matplotlib.pyplot.Figure object for the created plot.

Source code in cdippy/plotting.py

def make_annual_hs_boxplot(stn: str, year: int) -> Figure:
    """
    Create a boxplot of annual significant wave heights for a station.

    Args:
        stn (str): A 5-char station identifier, e.g. '100p1'.
        year (int): The year to plot.

    Returns:
        fig (Figure): A matplotlib.pyplot.Figure object for the created plot.
    """

    return plots.annual_hs_boxplot.make_plot(stn, year)

`make_compendium_plot(stns: str, start: str, end: str, params: str, x_inch: int) -> Figure`

CDIP's classic compendium plot for multiple stations and parameters.

Parameters:

stns (str) –

A comma-delimited list of 5-char station identifiers, e.g. '100p1,201p1'.
start (str) –

Start time of data series formatted as 'yyyymm[ddHHMMss]' where 'ddHHMMss' are optional components.
end (str) –

End time of data series ('yyyymm[ddHHMMss]') If 'None' is provided, defaults to the current date and time.
params (str) –

A comma-delimited string of parameter names, e.g. 'waveHs,waveTp'.

Returns:	`fig`( `Figure` ) – A matplotlib.pyplot.Figure object for the created plot.

Source code in cdippy/plotting.py

def make_compendium_plot(
    stns: str, start: str, end: str, params: str, x_inch: int
) -> Figure:
    """CDIP's classic compendium plot for multiple stations and parameters.

    Args:
        stns (str): A comma-delimited list of 5-char station identifiers, e.g. '100p1,201p1'.
        start (str): Start time of data series formatted as 'yyyymm[ddHHMMss]' where 'ddHHMMss' are optional components.
        end (str): End time of data series ('yyyymm[ddHHMMss]') If 'None' is provided, defaults to the current date and time.
        params (str): A comma-delimited string of parameter names, e.g. 'waveHs,waveTp'.

    Returns:
        fig (Figure): A matplotlib.pyplot.Figure object for the created plot.

    """

    return plots.compendium.make_plot(stns, start, end, params, x_inch)

`make_sst_climatology_plot(stn: str, x_inch: int = None, y_inch: int = None) -> Figure`

Create a plot of yearly climatology of sea surface temperature at a station for all years of available data.

Parameters:	`stn` (`str`) – A 5-char station identifier, e.g. '100p1'.

Returns:	`fig`( `Figure` ) – A matplotlib.pyplot.Figure object for the created plot.

Source code in cdippy/plotting.py

def make_sst_climatology_plot(
    stn: str, x_inch: int = None, y_inch: int = None
) -> Figure:
    """
    Create a plot of yearly climatology of sea surface temperature at a station for all years of available data.

    Args:
        stn (str): A 5-char station identifier, e.g. '100p1'.

    Returns:
        fig (Figure): A matplotlib.pyplot.Figure object for the created plot.
    """

    return plots.sst_climatology.make_plot(stn, x_inch, y_inch)

`cdippy.spectra`

Author: Sarah Heim. (Some of which is a port of Corey Olfe's code)

This code was originally taken from the cdip_mobile site.

`Spectra`

Bases: object

Source code in cdippy/spectra.py

class Spectra(object):
    def __init__(self):
        """initializing Spectra. Meant for using methods to create array
            of Spectrum subClass objects

        :ivar specArr: array of Spectrum subClass objects
        """
        self.specArr = []

    def get_spectraNum(self):
        """return the number of objects (spectrum) in the specArr"""
        return len(self.specArr)

    def __str__(self):
        """Spectra is an array of Spectrum(s)"""
        return "Spectra is an array of {0} Spectrum(s)".format(self.get_spectraNum())

    def get_spectraType(self):
        """returns the type of Class of the first object in specArr,
        all should be the same i.e. Spectrum_64band"""
        if self.get_spectraNum() > 0:
            return type(self.specArr[0])
        else:
            return None

    def get_bandSize(self):
        """returns the size (number of freq/bands) of the spectrum in spectra"""
        if self.get_spectraNum() > 0:
            return len(self.specArr[0].freq)
        else:
            return 0

    def whichSpecClass(self, length):
        """
        Return the type subClass of Spectrum is appropriate according length passed
        i.e. Spectrum_64band
        :var int lenght: length/count of the number of frequencies
        """
        specObjs = Spectrum.__subclasses__()
        for sObj in specObjs:
            objNum = len(sObj().freq)
            if objNum == length:
                return sObj
        return None

    # def get_spectrumArr_from_StnData(self, stn, start, end):
    def set_spectrumArr_fromQuery(self, dataDict):
        """
        specArr is empty. Create Spectrum objects and put in specArr

        :var dataDict: dictionary (output from cdippy.stndata query)
        """
        bandNum = len(dataDict["waveEnergyDensity"][0])
        specCls = self.whichSpecClass(bandNum)
        for e, ep in enumerate(dataDict["waveTime"]):
            # create Spectrum object of appropriate type for each time
            # i.e.: spec = Spectrum_64band(stn)
            spec = specCls()
            spec.set_specAtts(dataDict, e)
            self.specArr.append(spec)

    def specArr_ToDict(self):
        """Output the specArr as a dictionary with keys like waveA1Value, waveEnergyDensity etc."""
        newDict = {}
        if len(self.specArr) == 0:
            return newDict

        names = [
            "waveTime",
            "waveEnergyDensity",
            "waveMeanDirection",
            "waveA1Value",
            "waveA2Value",
            "waveB1Value",
            "waveB2Value",
        ]
        lists = {}
        for name in names:
            lists[name] = []
        if hasattr(self.specArr[0], "check") and self.specArr[0].check is not None:
            lists["waveCheckFactor"] = []

        for s in self.specArr:
            lists["waveTime"].append(s.wTime)
            lists["waveEnergyDensity"].append(s.ener_dens)
            lists["waveMeanDirection"].append(s.dMean)
            lists["waveA1Value"].append(s.a1)
            lists["waveA2Value"].append(s.a2)
            lists["waveB1Value"].append(s.b1)
            lists["waveB2Value"].append(s.b2)
            if "waveCheckFactor" in lists:
                lists["waveCheckFactor"].append(s.check)

        newDict["waveTime"] = np.ma.array(lists["waveTime"])
        newDict["waveEnergyDensity"] = np.ma.array(lists["waveEnergyDensity"])
        newDict["waveMeanDirection"] = np.ma.array(lists["waveMeanDirection"])
        newDict["waveA1Value"] = np.ma.array(lists["waveA1Value"])
        newDict["waveA2Value"] = np.ma.array(lists["waveA2Value"])
        newDict["waveB1Value"] = np.ma.array(lists["waveB1Value"])
        newDict["waveB2Value"] = np.ma.array(lists["waveB2Value"])
        if "waveCheckFactor" in lists:
            newDict["waveCheckFactor"] = np.ma.array(lists["waveCheckFactor"])

        return newDict

    def redist_specArr(self, objName):
        """
        Will redistribute spectrum if necessary (if different type)

        :var int objName: name of the subClass to redistribute to. .i.e. ``Spectrum_9band``
        """
        if self.get_spectraType() != objName:
            for i, sp in enumerate(self.specArr):
                self.specArr[i] = sp.redistribute_sp(objName)

`init()`

initializing Spectra. Meant for using methods to create array of Spectrum subClass objects

:ivar specArr: array of Spectrum subClass objects

Source code in cdippy/spectra.py

def __init__(self):
    """initializing Spectra. Meant for using methods to create array
        of Spectrum subClass objects

    :ivar specArr: array of Spectrum subClass objects
    """
    self.specArr = []

`str()`

Spectra is an array of Spectrum(s)

Source code in cdippy/spectra.py

def __str__(self):
    """Spectra is an array of Spectrum(s)"""
    return "Spectra is an array of {0} Spectrum(s)".format(self.get_spectraNum())

`get_bandSize()`

returns the size (number of freq/bands) of the spectrum in spectra

Source code in cdippy/spectra.py

def get_bandSize(self):
    """returns the size (number of freq/bands) of the spectrum in spectra"""
    if self.get_spectraNum() > 0:
        return len(self.specArr[0].freq)
    else:
        return 0

`get_spectraNum()`

return the number of objects (spectrum) in the specArr

Source code in cdippy/spectra.py

def get_spectraNum(self):
    """return the number of objects (spectrum) in the specArr"""
    return len(self.specArr)

`get_spectraType()`

returns the type of Class of the first object in specArr, all should be the same i.e. Spectrum_64band

Source code in cdippy/spectra.py

def get_spectraType(self):
    """returns the type of Class of the first object in specArr,
    all should be the same i.e. Spectrum_64band"""
    if self.get_spectraNum() > 0:
        return type(self.specArr[0])
    else:
        return None

`redist_specArr(objName)`

Will redistribute spectrum if necessary (if different type)

:var int objName: name of the subClass to redistribute to. .i.e. Spectrum_9band

Source code in cdippy/spectra.py

def redist_specArr(self, objName):
    """
    Will redistribute spectrum if necessary (if different type)

    :var int objName: name of the subClass to redistribute to. .i.e. ``Spectrum_9band``
    """
    if self.get_spectraType() != objName:
        for i, sp in enumerate(self.specArr):
            self.specArr[i] = sp.redistribute_sp(objName)

`set_spectrumArr_fromQuery(dataDict)`

specArr is empty. Create Spectrum objects and put in specArr

:var dataDict: dictionary (output from cdippy.stndata query)

Source code in cdippy/spectra.py

def set_spectrumArr_fromQuery(self, dataDict):
    """
    specArr is empty. Create Spectrum objects and put in specArr

    :var dataDict: dictionary (output from cdippy.stndata query)
    """
    bandNum = len(dataDict["waveEnergyDensity"][0])
    specCls = self.whichSpecClass(bandNum)
    for e, ep in enumerate(dataDict["waveTime"]):
        # create Spectrum object of appropriate type for each time
        # i.e.: spec = Spectrum_64band(stn)
        spec = specCls()
        spec.set_specAtts(dataDict, e)
        self.specArr.append(spec)

`specArr_ToDict()`

Output the specArr as a dictionary with keys like waveA1Value, waveEnergyDensity etc.

Source code in cdippy/spectra.py

def specArr_ToDict(self):
    """Output the specArr as a dictionary with keys like waveA1Value, waveEnergyDensity etc."""
    newDict = {}
    if len(self.specArr) == 0:
        return newDict

    names = [
        "waveTime",
        "waveEnergyDensity",
        "waveMeanDirection",
        "waveA1Value",
        "waveA2Value",
        "waveB1Value",
        "waveB2Value",
    ]
    lists = {}
    for name in names:
        lists[name] = []
    if hasattr(self.specArr[0], "check") and self.specArr[0].check is not None:
        lists["waveCheckFactor"] = []

    for s in self.specArr:
        lists["waveTime"].append(s.wTime)
        lists["waveEnergyDensity"].append(s.ener_dens)
        lists["waveMeanDirection"].append(s.dMean)
        lists["waveA1Value"].append(s.a1)
        lists["waveA2Value"].append(s.a2)
        lists["waveB1Value"].append(s.b1)
        lists["waveB2Value"].append(s.b2)
        if "waveCheckFactor" in lists:
            lists["waveCheckFactor"].append(s.check)

    newDict["waveTime"] = np.ma.array(lists["waveTime"])
    newDict["waveEnergyDensity"] = np.ma.array(lists["waveEnergyDensity"])
    newDict["waveMeanDirection"] = np.ma.array(lists["waveMeanDirection"])
    newDict["waveA1Value"] = np.ma.array(lists["waveA1Value"])
    newDict["waveA2Value"] = np.ma.array(lists["waveA2Value"])
    newDict["waveB1Value"] = np.ma.array(lists["waveB1Value"])
    newDict["waveB2Value"] = np.ma.array(lists["waveB2Value"])
    if "waveCheckFactor" in lists:
        newDict["waveCheckFactor"] = np.ma.array(lists["waveCheckFactor"])

    return newDict

`whichSpecClass(length)`

Return the type subClass of Spectrum is appropriate according length passed i.e. Spectrum_64band :var int lenght: length/count of the number of frequencies

Source code in cdippy/spectra.py

def whichSpecClass(self, length):
    """
    Return the type subClass of Spectrum is appropriate according length passed
    i.e. Spectrum_64band
    :var int lenght: length/count of the number of frequencies
    """
    specObjs = Spectrum.__subclasses__()
    for sObj in specObjs:
        objNum = len(sObj().freq)
        if objNum == length:
            return sObj
    return None

`Spectrum`

Bases: object

Source code in cdippy/spectra.py

class Spectrum(object):
    def __init__(self):
        pass

    def __str__(self):
        return str(self.__dict__)
        # return "Station %s: \n\tstart: %s \n\tend : %s" % (self.stn, self.start.isoformat(), self.end.isoformat())

    def set_specAtts(self, query, i):
        """Set spectra attributes from cdippy.stndata query

        :var mArr query: multi-dimentional array returned from cdippy.stndata
        :var int i: index
        """
        self.wTime = query["waveTime"][i]
        self.dMean = query["waveMeanDirection"][i]
        self.ener_dens = query["waveEnergyDensity"][i]
        self.a1 = query["waveA1Value"][i]
        self.b1 = query["waveB1Value"][i]
        self.a2 = query["waveA2Value"][i]
        self.b2 = query["waveB2Value"][i]
        self.check = (
            query["waveCheckFactor"][i] if "waveCheckFactor" in query.keys() else None
        )

    def set_FreqBands(self, num, sz):
        """Makes array of frequencies
        :var int num: frequency or bandwith?
        :var int sz: size, number of bands
        """

        self.freq = np.ma.array(list(map(lambda x: x * num, range(1, sz + 1))))
        self.bandwidth = np.ma.array(([num] * sz), dtype=np.float32)
        # return list(map(lambda x: x*num, range(1, sz+1)))

    def freq_cutoffs(self):
        """returns array of tuples of all the (low,high) frequencies;
        a.k.a.bots/tops"""
        arr = []
        for i, f in enumerate(self.freq):
            b = self.bandwidth[i]
            # if i< 25: print(i, f, b)
            arr.append((f - b / 2, f + b / 2))
        return arr

    def recip(self, f):
        """returns INTEGER of reciprocal of number.
        Specifically for converting frequency (float) to period(integer)"""
        return round(1 / f)

    def peri_cutoffs(self):
        """returns array of tuples of all the (low,high) periods"""
        return list(map(lambda x: tuple(map(self.recip, x)), self.freq_cutoffs()))

    # def get_center_periods(self):
    #     return list(map(lambda x: "%.1f" % (1/x), self.freq))

    def ma_to_list(self, marray):
        """
        :var str marray: string name of attribute that contains a masked array
        """
        return list(np.ma.getdata(getattr(self, marray)))

    def get_Energy(self):
        """units:meters**2 per bandwidth.
        sum(get_energy) is Total Energy"""
        return self.ener_dens * self.bandwidth

    def get_SigWaveHt(self):
        """units: meters"""
        # return list(map(lambda x: self.calc_Hs(x), self.get_Energy()))
        return map(lambda x: self.calc_Hs(x), self.get_Energy())

    def get_Tp(self):
        # index with the most energy
        ind = np.argmax(list(self.get_SigWaveHt()))
        return 1 / (self.freq[ind])

    def get_Dp(self):
        # index with the most energy
        ind = np.argmax(list(self.get_SigWaveHt()))
        return self.dMean[ind]

    def calc_Hs(self, energy):
        """returns the square root of energy x 4"""
        return energy**0.5 * 4

    def total_Hs(self):
        """square root of Total Energy x 4"""
        # return self.calc_Hs(np.sum(self.get_Energy()))
        return self.calc_Hs(np.sum(self.get_Energy()))

    def redistribute_sp(self, specInstClass):
        """
        translation of Corey's redistribute_sp code:
        c  Subroutine that redistributes a spectrum into a new spectral layout.

        :var specInstClass: the class to redistribute to can be instance or name of Class
        """
        # c--   Initialize the new spectral dist (redist_sp)
        try:
            cls = getattr(MODULE, specInstClass)
        except Exception:
            return
        redist_sp = cls()
        reBands = len(redist_sp.freq)
        redist_sp.wTime = self.wTime
        redist_sp.dMean = np.ma.array(([-1] * reBands), dtype=np.float32)
        redist_sp.ener_dens = np.ma.zeros(reBands, dtype=np.float32)
        redist_sp.a1 = np.ma.zeros(reBands, dtype=np.float32)
        redist_sp.b1 = np.ma.zeros(reBands, dtype=np.float32)
        redist_sp.a2 = np.ma.zeros(reBands, dtype=np.float32)
        redist_sp.b2 = np.ma.zeros(reBands, dtype=np.float32)
        if hasattr(self, "check") and self.check is not None:
            self.check.mask = False
            redist_sp.check = np.ma.zeros(reBands, dtype=np.float32)

        redist_botsTops = redist_sp.freq_cutoffs()
        orig_botsTops = self.freq_cutoffs()

        # c--   Do the business - loop over the new bins, adding in each of the original
        # c--   spectral bands to the appropriate bin. Partition bands where necessary.
        for i in range(reBands):
            cos_sum = 0
            sin_sum = 0
            miss_dir = False
            rBot, rTop = redist_botsTops[i][0], redist_botsTops[i][1]
            # print('%s: (%.3f, %.3f)' % (i, rBot, rTop))
            for j, ob in enumerate(self.freq):
                # minor re-write of bot/top
                # c--   If the full band falls into the current bin, add the entire contents
                # c--   If the bottom of the band falls in the bin, add in the appropriate portion
                # c--   If the top of the band falls in the bin, add in the appropriate portion
                # c--   If the middle of the band falls in the bin, add in the appropriate portion
                oBot, oTop = orig_botsTops[j][0], orig_botsTops[j][1]
                bot = rBot if rBot >= oBot else oBot
                top = rTop if rTop <= oTop else oTop
                if bot < top:
                    # Moved band_calcs here:
                    # c  Helper for REDISTRIBUTE_SP; adds components of original spectral layout
                    # c  into the redistributed layout, weighting by energy
                    curr_energy = self.ener_dens[j] * (top - bot)
                    # [redist_sp, miss_dir, sin_sum, cos_sum] = self.band_calcs(redist_sp, curr_energy, sin_sum, cos_sum, miss_dir, i, j)
                    if curr_energy != 0:
                        redist_sp.ener_dens[i] += curr_energy
                        # print('\tredist(%.3f, %.3f) new(%.3f, %.3f), %f, %f, %f' %
                        # (oBot, oTop, bot, top, self.ener_dens[j],
                        # curr_energy, redist_sp.ener_dens[i]))
                        if self.dMean[j] == -1:
                            miss_dir = True
                        else:
                            redist_sp.a1[i] += curr_energy * self.a1[j]
                            redist_sp.b1[i] += curr_energy * self.b1[j]
                            redist_sp.a2[i] += curr_energy * self.a2[j]
                            redist_sp.b2[i] += curr_energy * self.b2[j]
                            if hasattr(self, "check") and self.check is not None:
                                redist_sp.check[i] += curr_energy * self.check[j]
                            sin_sum += curr_energy * math.sin(
                                math.radians(self.dMean[j])
                            )
                            cos_sum += curr_energy * math.cos(
                                math.radians(self.dMean[j])
                            )

            # c--   Calculate direction and calc ener_dens once bin is complete
            if redist_sp.ener_dens[i] > 0:
                redist_sp.ener_dens[i] /= redist_sp.bandwidth[i]
                if not miss_dir:
                    sin_avg = sin_sum / redist_sp.ener_dens[i]
                    cos_avg = cos_sum / redist_sp.ener_dens[i]
                    redist_sp.dMean[i] = math.degrees(math.atan2(sin_avg, cos_avg))
                    if redist_sp.dMean[i] < 0:
                        redist_sp.dMean[i] += 360
                    redist_sp.a1[i] /= redist_sp.bandwidth[i]
                    redist_sp.b1[i] /= redist_sp.bandwidth[i]
                    redist_sp.a2[i] /= redist_sp.bandwidth[i]
                    redist_sp.b2[i] /= redist_sp.bandwidth[i]
                    if hasattr(self, "check") and self.check is not None:
                        redist_sp.check[i] /= redist_sp.bandwidth[i]

            # c--   Normalize once energy redistributed
            # c  Subroutine that normalizes the coefficients in a sp_data_block. Direction
            # c  is set to -1 for any band in which the coeffs can't be normalized
            # c  by energy.
            if redist_sp.dMean[i] != -1:
                redist_sp.a1[i] /= redist_sp.ener_dens[i]
                redist_sp.b1[i] /= redist_sp.ener_dens[i]
                redist_sp.a2[i] /= redist_sp.ener_dens[i]
                redist_sp.b2[i] /= redist_sp.ener_dens[i]
                if hasattr(self, "check") and self.check is not None:
                    redist_sp.check[i] /= redist_sp.ener_dens[i]
                    if redist_sp.check[i] > 2.55:
                        redist_sp.check[i] = 2.55
                max_coeff = max(
                    redist_sp.a1[i], redist_sp.b1[i], redist_sp.a2[i], redist_sp.b2[i]
                )
                min_coeff = min(
                    redist_sp.a1[i], redist_sp.b1[i], redist_sp.a2[i], redist_sp.b2[i]
                )
                if max_coeff > 1 or min_coeff < -1:
                    redist_sp.dMean[i] = -1

        return redist_sp

`calc_Hs(energy)`

returns the square root of energy x 4

Source code in cdippy/spectra.py

def calc_Hs(self, energy):
    """returns the square root of energy x 4"""
    return energy**0.5 * 4

`freq_cutoffs()`

returns array of tuples of all the (low,high) frequencies; a.k.a.bots/tops

Source code in cdippy/spectra.py

def freq_cutoffs(self):
    """returns array of tuples of all the (low,high) frequencies;
    a.k.a.bots/tops"""
    arr = []
    for i, f in enumerate(self.freq):
        b = self.bandwidth[i]
        # if i< 25: print(i, f, b)
        arr.append((f - b / 2, f + b / 2))
    return arr

`get_Energy()`

units:meters**2 per bandwidth. sum(get_energy) is Total Energy

Source code in cdippy/spectra.py

def get_Energy(self):
    """units:meters**2 per bandwidth.
    sum(get_energy) is Total Energy"""
    return self.ener_dens * self.bandwidth

`get_SigWaveHt()`

units: meters

Source code in cdippy/spectra.py

def get_SigWaveHt(self):
    """units: meters"""
    # return list(map(lambda x: self.calc_Hs(x), self.get_Energy()))
    return map(lambda x: self.calc_Hs(x), self.get_Energy())

`ma_to_list(marray)`

:var str marray: string name of attribute that contains a masked array

Source code in cdippy/spectra.py

def ma_to_list(self, marray):
    """
    :var str marray: string name of attribute that contains a masked array
    """
    return list(np.ma.getdata(getattr(self, marray)))

`peri_cutoffs()`

returns array of tuples of all the (low,high) periods

Source code in cdippy/spectra.py

def peri_cutoffs(self):
    """returns array of tuples of all the (low,high) periods"""
    return list(map(lambda x: tuple(map(self.recip, x)), self.freq_cutoffs()))

`recip(f)`

returns INTEGER of reciprocal of number. Specifically for converting frequency (float) to period(integer)

Source code in cdippy/spectra.py

def recip(self, f):
    """returns INTEGER of reciprocal of number.
    Specifically for converting frequency (float) to period(integer)"""
    return round(1 / f)

`redistribute_sp(specInstClass)`

translation of Corey's redistribute_sp code: c Subroutine that redistributes a spectrum into a new spectral layout.

:var specInstClass: the class to redistribute to can be instance or name of Class

Source code in cdippy/spectra.py

def redistribute_sp(self, specInstClass):
    """
    translation of Corey's redistribute_sp code:
    c  Subroutine that redistributes a spectrum into a new spectral layout.

    :var specInstClass: the class to redistribute to can be instance or name of Class
    """
    # c--   Initialize the new spectral dist (redist_sp)
    try:
        cls = getattr(MODULE, specInstClass)
    except Exception:
        return
    redist_sp = cls()
    reBands = len(redist_sp.freq)
    redist_sp.wTime = self.wTime
    redist_sp.dMean = np.ma.array(([-1] * reBands), dtype=np.float32)
    redist_sp.ener_dens = np.ma.zeros(reBands, dtype=np.float32)
    redist_sp.a1 = np.ma.zeros(reBands, dtype=np.float32)
    redist_sp.b1 = np.ma.zeros(reBands, dtype=np.float32)
    redist_sp.a2 = np.ma.zeros(reBands, dtype=np.float32)
    redist_sp.b2 = np.ma.zeros(reBands, dtype=np.float32)
    if hasattr(self, "check") and self.check is not None:
        self.check.mask = False
        redist_sp.check = np.ma.zeros(reBands, dtype=np.float32)

    redist_botsTops = redist_sp.freq_cutoffs()
    orig_botsTops = self.freq_cutoffs()

    # c--   Do the business - loop over the new bins, adding in each of the original
    # c--   spectral bands to the appropriate bin. Partition bands where necessary.
    for i in range(reBands):
        cos_sum = 0
        sin_sum = 0
        miss_dir = False
        rBot, rTop = redist_botsTops[i][0], redist_botsTops[i][1]
        # print('%s: (%.3f, %.3f)' % (i, rBot, rTop))
        for j, ob in enumerate(self.freq):
            # minor re-write of bot/top
            # c--   If the full band falls into the current bin, add the entire contents
            # c--   If the bottom of the band falls in the bin, add in the appropriate portion
            # c--   If the top of the band falls in the bin, add in the appropriate portion
            # c--   If the middle of the band falls in the bin, add in the appropriate portion
            oBot, oTop = orig_botsTops[j][0], orig_botsTops[j][1]
            bot = rBot if rBot >= oBot else oBot
            top = rTop if rTop <= oTop else oTop
            if bot < top:
                # Moved band_calcs here:
                # c  Helper for REDISTRIBUTE_SP; adds components of original spectral layout
                # c  into the redistributed layout, weighting by energy
                curr_energy = self.ener_dens[j] * (top - bot)
                # [redist_sp, miss_dir, sin_sum, cos_sum] = self.band_calcs(redist_sp, curr_energy, sin_sum, cos_sum, miss_dir, i, j)
                if curr_energy != 0:
                    redist_sp.ener_dens[i] += curr_energy
                    # print('\tredist(%.3f, %.3f) new(%.3f, %.3f), %f, %f, %f' %
                    # (oBot, oTop, bot, top, self.ener_dens[j],
                    # curr_energy, redist_sp.ener_dens[i]))
                    if self.dMean[j] == -1:
                        miss_dir = True
                    else:
                        redist_sp.a1[i] += curr_energy * self.a1[j]
                        redist_sp.b1[i] += curr_energy * self.b1[j]
                        redist_sp.a2[i] += curr_energy * self.a2[j]
                        redist_sp.b2[i] += curr_energy * self.b2[j]
                        if hasattr(self, "check") and self.check is not None:
                            redist_sp.check[i] += curr_energy * self.check[j]
                        sin_sum += curr_energy * math.sin(
                            math.radians(self.dMean[j])
                        )
                        cos_sum += curr_energy * math.cos(
                            math.radians(self.dMean[j])
                        )

        # c--   Calculate direction and calc ener_dens once bin is complete
        if redist_sp.ener_dens[i] > 0:
            redist_sp.ener_dens[i] /= redist_sp.bandwidth[i]
            if not miss_dir:
                sin_avg = sin_sum / redist_sp.ener_dens[i]
                cos_avg = cos_sum / redist_sp.ener_dens[i]
                redist_sp.dMean[i] = math.degrees(math.atan2(sin_avg, cos_avg))
                if redist_sp.dMean[i] < 0:
                    redist_sp.dMean[i] += 360
                redist_sp.a1[i] /= redist_sp.bandwidth[i]
                redist_sp.b1[i] /= redist_sp.bandwidth[i]
                redist_sp.a2[i] /= redist_sp.bandwidth[i]
                redist_sp.b2[i] /= redist_sp.bandwidth[i]
                if hasattr(self, "check") and self.check is not None:
                    redist_sp.check[i] /= redist_sp.bandwidth[i]

        # c--   Normalize once energy redistributed
        # c  Subroutine that normalizes the coefficients in a sp_data_block. Direction
        # c  is set to -1 for any band in which the coeffs can't be normalized
        # c  by energy.
        if redist_sp.dMean[i] != -1:
            redist_sp.a1[i] /= redist_sp.ener_dens[i]
            redist_sp.b1[i] /= redist_sp.ener_dens[i]
            redist_sp.a2[i] /= redist_sp.ener_dens[i]
            redist_sp.b2[i] /= redist_sp.ener_dens[i]
            if hasattr(self, "check") and self.check is not None:
                redist_sp.check[i] /= redist_sp.ener_dens[i]
                if redist_sp.check[i] > 2.55:
                    redist_sp.check[i] = 2.55
            max_coeff = max(
                redist_sp.a1[i], redist_sp.b1[i], redist_sp.a2[i], redist_sp.b2[i]
            )
            min_coeff = min(
                redist_sp.a1[i], redist_sp.b1[i], redist_sp.a2[i], redist_sp.b2[i]
            )
            if max_coeff > 1 or min_coeff < -1:
                redist_sp.dMean[i] = -1

    return redist_sp

`set_FreqBands(num, sz)`

Makes array of frequencies :var int num: frequency or bandwith? :var int sz: size, number of bands

Source code in cdippy/spectra.py

def set_FreqBands(self, num, sz):
    """Makes array of frequencies
    :var int num: frequency or bandwith?
    :var int sz: size, number of bands
    """

    self.freq = np.ma.array(list(map(lambda x: x * num, range(1, sz + 1))))
    self.bandwidth = np.ma.array(([num] * sz), dtype=np.float32)

`set_specAtts(query, i)`

Set spectra attributes from cdippy.stndata query

:var mArr query: multi-dimentional array returned from cdippy.stndata :var int i: index

Source code in cdippy/spectra.py

def set_specAtts(self, query, i):
    """Set spectra attributes from cdippy.stndata query

    :var mArr query: multi-dimentional array returned from cdippy.stndata
    :var int i: index
    """
    self.wTime = query["waveTime"][i]
    self.dMean = query["waveMeanDirection"][i]
    self.ener_dens = query["waveEnergyDensity"][i]
    self.a1 = query["waveA1Value"][i]
    self.b1 = query["waveB1Value"][i]
    self.a2 = query["waveA2Value"][i]
    self.b2 = query["waveB2Value"][i]
    self.check = (
        query["waveCheckFactor"][i] if "waveCheckFactor" in query.keys() else None
    )

`total_Hs()`

square root of Total Energy x 4

Source code in cdippy/spectra.py

def total_Hs(self):
    """square root of Total Energy x 4"""
    # return self.calc_Hs(np.sum(self.get_Energy()))
    return self.calc_Hs(np.sum(self.get_Energy()))

`cdippy.stndata`

`StnData`

Bases: CDIPnc

Returns data and metadata for the specified station.

This class merges data from multiple CDIP netCDF files to produce a single dictionary with keys of the requested variables. Each key corresponds to a numpy masked array.

METHODS

get_series(start, end, vrs) Returns data for a station given start date, end date and a list of variables. get_parameters(start, end) Calls get_series with vrs set to parameter variables. get_spectra(start, end) Calls get_series with vrs set to spectrum variables. get_xyz Calls get_series with vrs set to xyz variables. get_stn_meta Returns all station meta variables. get_nc_files Returns a dictionary of all this station's netCDF files. get_target_times Returns a 2-tuple of timestamps, an interval corresponding to n records to the right or left of target_timestamp.

Source code in cdippy/stndata.py

class StnData(CDIPnc):
    """Returns data and metadata for the specified station.

    This class merges data from multiple CDIP netCDF files to
    produce a single dictionary with keys of the requested variables.
    Each key corresponds to a numpy masked array.

    METHODS
    -------
    get_series(start, end, vrs)
        Returns data for a station given start date, end date and a
        list of variables.
    get_parameters(start, end)
        Calls get_series with vrs set to parameter variables.
    get_spectra(start, end)
        Calls get_series with vrs set to spectrum variables.
    get_xyz
        Calls get_series with vrs set to xyz variables.
    get_stn_meta
        Returns all station meta variables.
    get_nc_files
        Returns a dictionary of all this station's netCDF files.
    get_target_times
        Returns a 2-tuple of timestamps, an interval corresponding
        to  n records to the right or left of target_timestamp.
    """

    nc_file_types = [
        "historic",
        "archive",
        "predeploy",
        "moored",
        "offsite",
        "recovered",
    ]

    # Commonly requested sets of variables
    parameter_vars = ["waveHs", "waveTp", "waveDp", "waveTa"]
    xyz_vars = ["xyzXDisplacement", "xyzYDisplacement", "xyzZDisplacement"]
    gps_vars = ["gpsLatitude", "gpsLongitude", "gpsStatusFlags"]
    spectrum_vars = [
        "waveEnergyDensity",
        "waveMeanDirection",
        "waveA1Value",
        "waveB1Value",
        "waveA2Value",
        "waveB2Value",
        "waveCheckFactor",
    ]
    meta_vars = [
        "metaStationName",
        "metaDeployLatitude",
        "metaDeployLongitude",
        "metaWaterDepth",
        "metaDeclination",
    ]
    meta_attributes = [
        "wmo_id",
        "geospatial_lat_min",
        "geospatial_lat_max",
        "geospatial_lat_units",
        "geospatial_lat_resolution",
        "geospatial_lon_min",
        "geospatial_lon_max",
        "geospatial_lon_units",
        "geospatial_lon_resolution",
        "geospatial_vertical_min",
        "geospatial_vertical_max",
        "geospatial_vertical_units",
        "geospatial_vertical_resolution",
        "time_coverage_start",
        "time_coverage_end",
        "date_created",
        "date_modified",
    ]

    pub_set = None
    vrs = None
    meta = None

    def __init__(
        self, stn: str, data_dir: str = None, org: str = None, deploy_num: int = None
    ):
        """
        PARAMETERS
        ----------
        stn : str
           Can be in 2, 3 or 5 char format e.g. 28, 028, 028p2
        data_dir : str [optional]
            Either a full path to a directory containing a local directory hierarchy
            of nc files. E.g. '/project/WNC' or a url to a THREDDS server.
        org: str
            (Organization) Values are: cdip|ww3|external
        deploy_num : int [optional]
            Supply this to access specific station deployment data.
            Must be >= 1.
        """
        self.nc = None
        self.stn = stn
        self.data_dir = data_dir
        self.org = org

        # Accept numbers for cdip stations
        if type(stn) is not str:
            stn = str(stn).zfill(3) + "p1"

        # Initialize nc file used for meta information
        self.deploy_num = deploy_num
        if deploy_num:
            # Check all active datasets in this order p3 -> p2 -> p1 -> p0
            p_lookup = dict([[v, k] for k, v in self.active_datasets.items()])
            __found_active_meta = False
            for p in reversed(sorted(p_lookup)):
                self.meta = Active(
                    self.stn, self.deploy_num, p_lookup[p], self.data_dir, self.org
                )
                if self.meta.nc:
                    __found_active_meta = True
                    break
            if not __found_active_meta:
                self.meta = Archive(self.stn, self.deploy_num, self.data_dir, self.org)
        else:
            self.historic = Historic(self.stn, self.data_dir, self.org)
            self.realtime = Realtime(self.stn, self.data_dir, self.org)
            if self.historic and self.historic.nc:
                self.meta = self.historic
            else:
                if self.realtime and self.realtime.nc:
                    self.meta = self.realtime
        if self.meta is None:
            return None

    def get_stn_meta(self) -> dict:
        """Returns a dict of station meta data."""
        result = {}
        if self.meta is None:
            return result
        self.meta.set_request_info(vrs=self.meta_vars)
        result = self.meta.get_request()
        for attr_name in self.meta_attributes:
            if hasattr(self.meta.nc, attr_name):
                result[attr_name] = getattr(self.meta.nc, attr_name)
        return result

    def get_parameters(
        self,
        start: datetime = None,
        end: datetime = None,
        pub_set: str = "public",
        apply_mask=True,
        target_records=0,
    ) -> dict:
        """Calls get_series to return wave parameters."""
        return self.get_series(
            start, end, self.parameter_vars, pub_set, apply_mask, target_records
        )

    def get_xyz(
        self, start: datetime = None, end: datetime = None, pub_set: str = "public"
    ) -> dict:
        """Calls get_series to return displacement data."""
        return self.get_series(start, end, self.xyz_vars, pub_set)

    def get_spectra(
        self,
        start: datetime = None,
        end: datetime = None,
        pub_set: str = "public",
        apply_mask: bool = True,
        target_records: int = 0,
        force_64bands: bool = False,
    ) -> dict:
        """Calls get_series to return spectral data."""
        return self.get_series(
            start,
            end,
            self.spectrum_vars,
            pub_set,
            apply_mask,
            target_records,
            force_64bands,
        )

    def get_series(
        self,
        start: datetime = None,
        end: datetime = None,
        vrs: list = None,
        pub_set: str = None,
        apply_mask: bool = None,
        target_records: int = 0,
        force_64bands: bool = False,
    ) -> dict:
        """
        Returns a dict of data between start and end dates with specified quality.

        PARAMETERS
        ----------
        start : str or datetime [optional] : default Jan 1, 1975
            Start time of data request (UTC). If provided as a string must
            be in the format Y-m-d H:M:S where Y is 4 chars and all others
            are 2 chars. Ex. '2020-03-30 19:32:56'.
        end : str or datetime [optional] : default now
            End time of data request (UTC). If not supplied defaults to now.
        vrs : list [optional] : default ['waveHs']
            A list of the names of variables to retrieve. They all must start
            with the same prefix, e.g. ['waveHs', 'waveTp', 'waveDp']
        pub_set: str [optional] values = public|nonpub|all
            Filters data based on data quality flags.
        apply_mask: bool [optional] default True
            Removes values from the masked array that have a mask value of True.
            Ex. If nonpub data is requested and apply_mask is False, the returned
            array will contain both public and nonpublic data (although public
            data records will have the mask value set to True). If apply_mask
            is set to True, only nonpub records will be returned.
        target_records: int [optional]
            If start is specified and end is None, this will specify the number
            of additional records to return closest to start.
        force_64bands: bool [optional]
            For the case in which all spectra returned are mk4 100 band format,
            force the conversion to 64bands. Mixed formats are always returned in mk3
            64 band format.
        """
        if vrs is None:
            vrs = self.parameter_vars
        prefix = self.get_var_prefix(vrs[0])

        if start is not None and end is None:  # Target time
            if isinstance(start, str):
                start = datetime.strptime(start, "%Y-%m-%d %H:%M:%S")
            ts_I = self.get_target_timespan(
                cdip_utils.datetime_to_timestamp(start), target_records, prefix + "Time"
            )
            if ts_I[0] is not None:
                start = cdip_utils.timestamp_to_datetime(ts_I[0])
                end = cdip_utils.timestamp_to_datetime(ts_I[1])
            else:
                return None
        elif start is None:  # Use default 3 days back
            start = datetime.utcnow() - timedelta(days=3)
            end = datetime.utcnow()

        if pub_set is None:
            pub_set = self.pub_set

        if apply_mask is None:
            apply_mask = self.apply_mask

        self.force_64bands = force_64bands

        self.set_request_info(start, end, vrs, pub_set, apply_mask)
        print(self.deploy_num)

        if prefix == "xyz" and self.deploy_num is None:
            return self.__merge_xyz_request()
        elif prefix == "xyz" and self.deploy_num is not None:
            return self.__merge_active_request("xyz")
        elif self.deploy_num is None:
            return self.__merge_request()
        else:
            return self.__merge_active_request("rt")

    def __aggregate_dicts(self, dict1: dict, dict2: dict) -> dict:
        """
        Returns a dict of data combined from two dictionaries. Dict1 has oldest data.
        All the other __merge methods end up using this method.

        This method also redistributes 100 band spectra to 64 band format if 1) both
        formats are present in dict1 and dict2 or 2) the force_64bands option is True.
        """
        # Union the keys to make sure we check each one
        ukeys = set(dict1.keys()) | set(dict2.keys())
        # Determine if there are any spectra vars to redistribute
        svars = set(self.spectrum_vars) & ukeys

        if len(svars) != 0:
            key = next(iter(svars))  # retrieves an element from the set
            shape1 = dict1[key].shape[1] if key in dict1.keys() else 0
            shape2 = dict2[key].shape[1] if key in dict2.keys() else 0
            shapes = [shape1, shape2]
            if 100 in shapes and (self.force_64bands or 64 in shapes):
                dicts = [dict1, dict2]
                for i, shape in enumerate(shapes):
                    if shape == 100:
                        spectra_obj = Spectra()
                        spectra_obj.set_spectrumArr_fromQuery(dicts[i])
                        spectra_obj.redist_specArr("Spectrum_64band")
                        redistributed_dict = spectra_obj.specArr_ToDict()
                        for v in self.spectrum_vars:
                            if v in dicts[i].keys():
                                dicts[i][v] = redistributed_dict[v]
        # Concatenate the variables
        result = {}
        for key in ukeys:
            if key in dict2 and key in dict1:
                result[key] = ma.concatenate([dict1[key], dict2[key]])
            elif key in dict2:
                result[key] = dict2[key]
            else:
                result[key] = dict1[key]
        return result

    def __merge_archive_helper(self, cdip_nc: CDIPnc, result):
        file_start_stamp = cdip_utils.datetime_to_timestamp(
            cdip_nc.get_coverage_start()
        )
        file_end_stamp = cdip_utils.datetime_to_timestamp(cdip_nc.get_coverage_end())
        file_timespan = cdip_utils.Timespan(file_start_stamp, file_end_stamp)
        request_timespan = cdip_utils.Timespan(self.start_stamp, self.end_stamp)
        if request_timespan.overlap(file_timespan):
            cdip_nc.start_stamp = self.start_stamp
            cdip_nc.end_stamp = self.end_stamp
            cdip_nc.pub_set = self.pub_set
            cdip_nc.apply_mask = self.apply_mask
            cdip_nc.vrs = self.vrs
            tmp_result = cdip_nc.get_request()

            result = self.__aggregate_dicts(result, tmp_result)
        return result, file_start_stamp

    def __merge_xyz_helper(
        self, cdip_nc: CDIPnc, request_timespan: cdip_utils.Timespan, result: dict
    ):
        # Try the next file if it is without xyz data
        z = cdip_nc.get_var("xyzZDisplacement")
        if z is None:
            return result, self.start_stamp
        # Try the next file if start_stamp cannot be calculated
        start_stamp = cdip_nc.get_xyz_timestamp(0)
        end_stamp = cdip_nc.get_xyz_timestamp(len(z) - 1)
        if start_stamp is None:
            return result, self.start_stamp
        file_timespan = cdip_utils.Timespan(start_stamp, end_stamp)
        # Add data if request timespan overlaps data timespan
        if request_timespan.overlap(file_timespan):
            cdip_nc.start_stamp = self.start_stamp
            cdip_nc.end_stamp = self.end_stamp
            cdip_nc.pub_set = self.pub_set
            cdip_nc.apply_mask = self.apply_mask
            cdip_nc.vrs = self.vrs
            tmp_result = cdip_nc.get_request()
            result = self.__aggregate_dicts(result, tmp_result)
        return result, start_stamp

    def remove_duplicates(self, data_dict: dict) -> dict:
        """Duplicate records may exist after merge_ routines. This removes them."""
        result = {}
        keys = list(data_dict.keys())
        if len(keys) > 0:
            key = keys[0]
            prefix = self.get_var_prefix(key)
            time_dimension_name = prefix + "Time"
            time_values, indices_of_unique_values = np.unique(
                data_dict[time_dimension_name], return_index=True
            )
            result[time_dimension_name] = time_values
            for key in keys:
                if key != time_dimension_name:
                    result[key] = data_dict[key][indices_of_unique_values]
            return result
        else:
            return data_dict

    def __merge_xyz_request(self):
        """Merge xyz data from realtime and archive nc files."""
        if self.vrs and self.vrs[0] == "xyzData":
            self.vrs = ["xyzXDisplacement", "xyzYDisplacement", "xyzZDisplacement"]
        request_timespan = cdip_utils.Timespan(self.start_stamp, self.end_stamp)
        arch_file_used = False
        rt_file_used = False
        result = {}

        # First get realtime data if it exists
        rt = RealtimeXY(self.stn)
        if rt.nc is not None:
            rt_file_used = True
            result, start_stamp = self.__merge_xyz_helper(rt, request_timespan, result)

        # If the request start time is more recent than the realtime
        # start time, no need to look in the archives
        if self.start_stamp > start_stamp:
            return result

        # Second, look in archive files for data
        for dep in range(1, self.max_deployments):
            ar = Archive(self.stn, dep, self.data_dir, self.org)
            if ar.nc is None:
                break
            arch_file_used = True
            result, start_stamp = self.__merge_xyz_helper(ar, request_timespan, result)
            # Break if file start stamp is greater than request end stamp
            if start_stamp > self.end_stamp:
                break

        if rt_file_used and arch_file_used:
            result = self.remove_duplicates(result)
        return result

    def __merge_active_request(self, nc_class_type: str = "rt"):
        """
        Returns data for a given request across active datasets.

        When deploy_num is supplied all files (active and archive)
        are checked for data.
        """
        sorted_datasets = sorted(
            self.meta.active_datasets.items(), key=operator.itemgetter(1)
        )

        result = {}
        num_files_used = 0
        for ds in sorted_datasets:
            if nc_class_type == "xyz":
                a = ActiveXY(self.stn, self.deploy_num, ds[0], self.data_dir, self.org)
            else:
                a = Active(self.stn, self.deploy_num, ds[0], self.data_dir, self.org)

            if ds[0] == "moored" and a.nc is None:
                a = Archive(
                    self.stn[0:3] + "p1", self.deploy_num, self.data_dir, self.org
                )

            if a.nc is not None:
                a.vrs = self.vrs
                a.start_stamp = self.start_stamp
                a.end_stamp = self.end_stamp
                a.pub_set = self.pub_set
                a.apply_mask = self.apply_mask
                tmp_result = a.get_request()
                result = self.__aggregate_dicts(result, tmp_result)
                num_files_used += 1

        if num_files_used > 1:
            result = self.remove_duplicates(result)
        return result

    def __merge_request(self):
        """Returns data for given request across realtime and historic files"""

        num_files_used = 0
        rt = {}
        r = self.realtime
        # Note that we are assuming that waveTime will work for every time dim.
        if r.nc is not None and r.get_var("waveTime")[0] <= self.end_stamp:
            num_files_used += 1
            r.vrs = self.vrs
            r.start_stamp = self.start_stamp
            r.end_stamp = self.end_stamp
            r.pub_set = self.pub_set
            r.apply_mask = self.apply_mask
            rt = r.get_request()

        ht = {}
        h = self.historic
        # Historic file contains public data
        if (
            h.nc is not None
            and h.get_var("waveTime")[-1] >= self.start_stamp
            and self.pub_set == "public"
        ):
            num_files_used += 1
            h.vrs = self.vrs
            h.start_stamp = self.start_stamp
            h.end_stamp = self.end_stamp
            h.pub_set = self.pub_set
            h.apply_mask = self.apply_mask
            ht = h.get_request()

        result = self.__aggregate_dicts(ht, rt)

        # Check Archive files if requesting non-pub data
        if self.pub_set != "public":
            for dep in range(1, self.max_deployments):
                ar = Archive(self.stn, dep, self.data_dir, self.org)
                if ar.nc is None:
                    break
                num_files_used += 1
                result, start_stamp = self.__merge_archive_helper(ar, result)
                # Break if file start stamp is greater than request end stamp
                if start_stamp > self.end_stamp:
                    break

        if num_files_used > 1:
            result = self.remove_duplicates(result)
        return result

    def get_nc_files(self, types: list = nc_file_types) -> dict:
        """Returns dict of netCDF4 objects of a station's netcdf files"""
        result = {}
        for ftype in types:
            if ftype == "historic":
                ht = Historic(self.stn, self.data_dir, self.org)
                if ht.nc:
                    result[ht.filename] = ht.nc
            if ftype == "archive":
                for dep in range(1, self.max_deployments):
                    ar = Archive(self.stn, dep, self.data_dir, self.org)
                    if ar.nc is None:
                        break
                    result[ar.filename] = ar
            if ftype in self.meta.active_datasets:
                for dep in range(1, self.max_deployments):
                    ac = Active(self.stn, dep, ftype, self.data_dir, self.org)
                    if ac.nc is not None:
                        result[ac.filename] = ac
                    ac = ActiveXY(self.stn, dep, ftype, self.data_dir, self.org)
                    if ac.nc is not None:
                        result[ac.filename] = ac
        return result

    def get_target_timespan(
        self, target_timestamp: int, num_target_records: int, time_var: str
    ) -> tuple:
        """Returns a timespan containing the n closest records to the target_timestamp.

        PARAMETERS
        ----------
        target_timestamp : int
            A unix timestamp which is the target time about which the closest
            n records will be returned.
        n : int
            The number of records to return that are closest to the target
            timestamp.
        time_var : str
            The name of the time dimension variable to use. E.g. waveTime.

        RETURNS
        -------
        A 2-tuple of timestamps corresponding to i and i+n (where n may
        be negative) which will be the timestamps for the n records
        closest to the target_timestamp.
        """
        r_ok = False
        if self.realtime.nc is not None:
            r_ok = True
        h_ok = False
        if self.historic.nc is not None:
            h_ok = True

        # Check realtime to find closest index

        r_closest_idx = None
        if r_ok:
            r_stamps = self.realtime.get_var(time_var)[:]
            r_last_idx = len(r_stamps) - 1
            i_b = bisect_left(r_stamps, target_timestamp)
            # i_b will be possibly one more than the last index
            i_b = min(i_b, r_last_idx)
            # Target timestamp is exactly equal to a data time
            if i_b == r_last_idx or r_stamps[i_b] == target_timestamp:
                r_closest_idx = i_b
            elif i_b > 0:
                r_closest_idx = cdip_utils.get_closest_index(
                    i_b - 1, i_b, r_stamps, target_timestamp
                )

        # If closest index not found, check historic

        h_closest_idx = None
        h_last_idx = None  # Let's us know if h_stamps has been loaded
        if h_ok and not r_closest_idx:
            h_stamps = self.historic.get_var(time_var)[:]
            h_last_idx = len(h_stamps) - 1
            i_b = bisect_left(h_stamps, target_timestamp)
            i_b = min(i_b, h_last_idx)
            # Target timestamp is exactly equal to a data time
            if (i_b <= h_last_idx and h_stamps[i_b] == target_timestamp) or i_b == 0:
                h_closest_idx = i_b
            elif i_b >= h_last_idx:  # Target is between the two files
                if r_ok:
                    if abs(h_stamps[h_last_idx] - target_timestamp) < abs(
                        r_stamps[0] - target_timestamp
                    ):
                        h_closest_idx = i_b
                    else:
                        r_closest_idx = 0
                else:  # No realtime file
                    h_closest_idx = i_b
            else:  # Within middle of historic stamps
                h_closest_idx = cdip_utils.get_closest_index(
                    i_b - 1, i_b, h_stamps, target_timestamp
                )

        # Now we have the closest index, find the intervals

        if r_closest_idx is not None:
            r_interval = cdip_utils.get_interval(
                r_stamps, r_closest_idx, num_target_records
            )
            # If bound exceeded toward H and H exists, cacluate h_interval
            if r_interval[2] < 0 and h_ok:
                if not h_last_idx:
                    h_stamps = self.historic.get_var(time_var)[:]
                    h_last_idx = len(h_stamps) - 1
                h_interval = cdip_utils.get_interval(
                    h_stamps, h_last_idx, num_target_records + r_closest_idx + 1
                )
                return cdip_utils.combine_intervals(h_interval, r_interval)
            else:
                return r_interval
        elif h_closest_idx is not None:
            h_interval = cdip_utils.get_interval(
                h_stamps, h_closest_idx, num_target_records
            )
            # If bound exceeded toward R and R exists, cacluate r_interval
            if h_interval[2] > 0 and r_ok:
                r_interval = cdip_utils.get_interval(
                    r_stamps, 0, num_target_records + h_closest_idx - h_last_idx - 1
                )
                return cdip_utils.combine_intervals(h_interval, r_interval)
            else:
                return h_interval

        # If we get to here there's a problem
        return (None, None, None)

`__aggregate_dicts(dict1: dict, dict2: dict) -> dict`

Returns a dict of data combined from two dictionaries. Dict1 has oldest data. All the other __merge methods end up using this method.

This method also redistributes 100 band spectra to 64 band format if 1) both formats are present in dict1 and dict2 or 2) the force_64bands option is True.

Source code in cdippy/stndata.py

def __aggregate_dicts(self, dict1: dict, dict2: dict) -> dict:
    """
    Returns a dict of data combined from two dictionaries. Dict1 has oldest data.
    All the other __merge methods end up using this method.

    This method also redistributes 100 band spectra to 64 band format if 1) both
    formats are present in dict1 and dict2 or 2) the force_64bands option is True.
    """
    # Union the keys to make sure we check each one
    ukeys = set(dict1.keys()) | set(dict2.keys())
    # Determine if there are any spectra vars to redistribute
    svars = set(self.spectrum_vars) & ukeys

    if len(svars) != 0:
        key = next(iter(svars))  # retrieves an element from the set
        shape1 = dict1[key].shape[1] if key in dict1.keys() else 0
        shape2 = dict2[key].shape[1] if key in dict2.keys() else 0
        shapes = [shape1, shape2]
        if 100 in shapes and (self.force_64bands or 64 in shapes):
            dicts = [dict1, dict2]
            for i, shape in enumerate(shapes):
                if shape == 100:
                    spectra_obj = Spectra()
                    spectra_obj.set_spectrumArr_fromQuery(dicts[i])
                    spectra_obj.redist_specArr("Spectrum_64band")
                    redistributed_dict = spectra_obj.specArr_ToDict()
                    for v in self.spectrum_vars:
                        if v in dicts[i].keys():
                            dicts[i][v] = redistributed_dict[v]
    # Concatenate the variables
    result = {}
    for key in ukeys:
        if key in dict2 and key in dict1:
            result[key] = ma.concatenate([dict1[key], dict2[key]])
        elif key in dict2:
            result[key] = dict2[key]
        else:
            result[key] = dict1[key]
    return result

`init(stn: str, data_dir: str = None, org: str = None, deploy_num: int = None)`

PARAMETERS

stn : str Can be in 2, 3 or 5 char format e.g. 28, 028, 028p2 data_dir : str [optional] Either a full path to a directory containing a local directory hierarchy of nc files. E.g. '/project/WNC' or a url to a THREDDS server. org: str (Organization) Values are: cdip|ww3|external deploy_num : int [optional] Supply this to access specific station deployment data. Must be >= 1.

Source code in cdippy/stndata.py

def __init__(
    self, stn: str, data_dir: str = None, org: str = None, deploy_num: int = None
):
    """
    PARAMETERS
    ----------
    stn : str
       Can be in 2, 3 or 5 char format e.g. 28, 028, 028p2
    data_dir : str [optional]
        Either a full path to a directory containing a local directory hierarchy
        of nc files. E.g. '/project/WNC' or a url to a THREDDS server.
    org: str
        (Organization) Values are: cdip|ww3|external
    deploy_num : int [optional]
        Supply this to access specific station deployment data.
        Must be >= 1.
    """
    self.nc = None
    self.stn = stn
    self.data_dir = data_dir
    self.org = org

    # Accept numbers for cdip stations
    if type(stn) is not str:
        stn = str(stn).zfill(3) + "p1"

    # Initialize nc file used for meta information
    self.deploy_num = deploy_num
    if deploy_num:
        # Check all active datasets in this order p3 -> p2 -> p1 -> p0
        p_lookup = dict([[v, k] for k, v in self.active_datasets.items()])
        __found_active_meta = False
        for p in reversed(sorted(p_lookup)):
            self.meta = Active(
                self.stn, self.deploy_num, p_lookup[p], self.data_dir, self.org
            )
            if self.meta.nc:
                __found_active_meta = True
                break
        if not __found_active_meta:
            self.meta = Archive(self.stn, self.deploy_num, self.data_dir, self.org)
    else:
        self.historic = Historic(self.stn, self.data_dir, self.org)
        self.realtime = Realtime(self.stn, self.data_dir, self.org)
        if self.historic and self.historic.nc:
            self.meta = self.historic
        else:
            if self.realtime and self.realtime.nc:
                self.meta = self.realtime
    if self.meta is None:
        return None

`__merge_active_request(nc_class_type: str = 'rt')`

Returns data for a given request across active datasets.

When deploy_num is supplied all files (active and archive) are checked for data.

Source code in cdippy/stndata.py

def __merge_active_request(self, nc_class_type: str = "rt"):
    """
    Returns data for a given request across active datasets.

    When deploy_num is supplied all files (active and archive)
    are checked for data.
    """
    sorted_datasets = sorted(
        self.meta.active_datasets.items(), key=operator.itemgetter(1)
    )

    result = {}
    num_files_used = 0
    for ds in sorted_datasets:
        if nc_class_type == "xyz":
            a = ActiveXY(self.stn, self.deploy_num, ds[0], self.data_dir, self.org)
        else:
            a = Active(self.stn, self.deploy_num, ds[0], self.data_dir, self.org)

        if ds[0] == "moored" and a.nc is None:
            a = Archive(
                self.stn[0:3] + "p1", self.deploy_num, self.data_dir, self.org
            )

        if a.nc is not None:
            a.vrs = self.vrs
            a.start_stamp = self.start_stamp
            a.end_stamp = self.end_stamp
            a.pub_set = self.pub_set
            a.apply_mask = self.apply_mask
            tmp_result = a.get_request()
            result = self.__aggregate_dicts(result, tmp_result)
            num_files_used += 1

    if num_files_used > 1:
        result = self.remove_duplicates(result)
    return result

`__merge_request()`

Returns data for given request across realtime and historic files

Source code in cdippy/stndata.py

def __merge_request(self):
    """Returns data for given request across realtime and historic files"""

    num_files_used = 0
    rt = {}
    r = self.realtime
    # Note that we are assuming that waveTime will work for every time dim.
    if r.nc is not None and r.get_var("waveTime")[0] <= self.end_stamp:
        num_files_used += 1
        r.vrs = self.vrs
        r.start_stamp = self.start_stamp
        r.end_stamp = self.end_stamp
        r.pub_set = self.pub_set
        r.apply_mask = self.apply_mask
        rt = r.get_request()

    ht = {}
    h = self.historic
    # Historic file contains public data
    if (
        h.nc is not None
        and h.get_var("waveTime")[-1] >= self.start_stamp
        and self.pub_set == "public"
    ):
        num_files_used += 1
        h.vrs = self.vrs
        h.start_stamp = self.start_stamp
        h.end_stamp = self.end_stamp
        h.pub_set = self.pub_set
        h.apply_mask = self.apply_mask
        ht = h.get_request()

    result = self.__aggregate_dicts(ht, rt)

    # Check Archive files if requesting non-pub data
    if self.pub_set != "public":
        for dep in range(1, self.max_deployments):
            ar = Archive(self.stn, dep, self.data_dir, self.org)
            if ar.nc is None:
                break
            num_files_used += 1
            result, start_stamp = self.__merge_archive_helper(ar, result)
            # Break if file start stamp is greater than request end stamp
            if start_stamp > self.end_stamp:
                break

    if num_files_used > 1:
        result = self.remove_duplicates(result)
    return result

`__merge_xyz_request()`

Merge xyz data from realtime and archive nc files.

Source code in cdippy/stndata.py

def __merge_xyz_request(self):
    """Merge xyz data from realtime and archive nc files."""
    if self.vrs and self.vrs[0] == "xyzData":
        self.vrs = ["xyzXDisplacement", "xyzYDisplacement", "xyzZDisplacement"]
    request_timespan = cdip_utils.Timespan(self.start_stamp, self.end_stamp)
    arch_file_used = False
    rt_file_used = False
    result = {}

    # First get realtime data if it exists
    rt = RealtimeXY(self.stn)
    if rt.nc is not None:
        rt_file_used = True
        result, start_stamp = self.__merge_xyz_helper(rt, request_timespan, result)

    # If the request start time is more recent than the realtime
    # start time, no need to look in the archives
    if self.start_stamp > start_stamp:
        return result

    # Second, look in archive files for data
    for dep in range(1, self.max_deployments):
        ar = Archive(self.stn, dep, self.data_dir, self.org)
        if ar.nc is None:
            break
        arch_file_used = True
        result, start_stamp = self.__merge_xyz_helper(ar, request_timespan, result)
        # Break if file start stamp is greater than request end stamp
        if start_stamp > self.end_stamp:
            break

    if rt_file_used and arch_file_used:
        result = self.remove_duplicates(result)
    return result

`get_nc_files(types: list = nc_file_types) -> dict`

Returns dict of netCDF4 objects of a station's netcdf files

Source code in cdippy/stndata.py

def get_nc_files(self, types: list = nc_file_types) -> dict:
    """Returns dict of netCDF4 objects of a station's netcdf files"""
    result = {}
    for ftype in types:
        if ftype == "historic":
            ht = Historic(self.stn, self.data_dir, self.org)
            if ht.nc:
                result[ht.filename] = ht.nc
        if ftype == "archive":
            for dep in range(1, self.max_deployments):
                ar = Archive(self.stn, dep, self.data_dir, self.org)
                if ar.nc is None:
                    break
                result[ar.filename] = ar
        if ftype in self.meta.active_datasets:
            for dep in range(1, self.max_deployments):
                ac = Active(self.stn, dep, ftype, self.data_dir, self.org)
                if ac.nc is not None:
                    result[ac.filename] = ac
                ac = ActiveXY(self.stn, dep, ftype, self.data_dir, self.org)
                if ac.nc is not None:
                    result[ac.filename] = ac
    return result

`get_parameters(start: datetime = None, end: datetime = None, pub_set: str = 'public', apply_mask=True, target_records=0) -> dict`

Calls get_series to return wave parameters.

Source code in cdippy/stndata.py

def get_parameters(
    self,
    start: datetime = None,
    end: datetime = None,
    pub_set: str = "public",
    apply_mask=True,
    target_records=0,
) -> dict:
    """Calls get_series to return wave parameters."""
    return self.get_series(
        start, end, self.parameter_vars, pub_set, apply_mask, target_records
    )

`get_series(start: datetime = None, end: datetime = None, vrs: list = None, pub_set: str = None, apply_mask: bool = None, target_records: int = 0, force_64bands: bool = False) -> dict`

Returns a dict of data between start and end dates with specified quality.

PARAMETERS

start : str or datetime [optional] : default Jan 1, 1975 Start time of data request (UTC). If provided as a string must be in the format Y-m-d H:M:S where Y is 4 chars and all others are 2 chars. Ex. '2020-03-30 19:32:56'. end : str or datetime [optional] : default now End time of data request (UTC). If not supplied defaults to now. vrs : list [optional] : default ['waveHs'] A list of the names of variables to retrieve. They all must start with the same prefix, e.g. ['waveHs', 'waveTp', 'waveDp'] pub_set: str [optional] values = public|nonpub|all Filters data based on data quality flags. apply_mask: bool [optional] default True Removes values from the masked array that have a mask value of True. Ex. If nonpub data is requested and apply_mask is False, the returned array will contain both public and nonpublic data (although public data records will have the mask value set to True). If apply_mask is set to True, only nonpub records will be returned. target_records: int [optional] If start is specified and end is None, this will specify the number of additional records to return closest to start. force_64bands: bool [optional] For the case in which all spectra returned are mk4 100 band format, force the conversion to 64bands. Mixed formats are always returned in mk3 64 band format.

Source code in cdippy/stndata.py

def get_series(
    self,
    start: datetime = None,
    end: datetime = None,
    vrs: list = None,
    pub_set: str = None,
    apply_mask: bool = None,
    target_records: int = 0,
    force_64bands: bool = False,
) -> dict:
    """
    Returns a dict of data between start and end dates with specified quality.

    PARAMETERS
    ----------
    start : str or datetime [optional] : default Jan 1, 1975
        Start time of data request (UTC). If provided as a string must
        be in the format Y-m-d H:M:S where Y is 4 chars and all others
        are 2 chars. Ex. '2020-03-30 19:32:56'.
    end : str or datetime [optional] : default now
        End time of data request (UTC). If not supplied defaults to now.
    vrs : list [optional] : default ['waveHs']
        A list of the names of variables to retrieve. They all must start
        with the same prefix, e.g. ['waveHs', 'waveTp', 'waveDp']
    pub_set: str [optional] values = public|nonpub|all
        Filters data based on data quality flags.
    apply_mask: bool [optional] default True
        Removes values from the masked array that have a mask value of True.
        Ex. If nonpub data is requested and apply_mask is False, the returned
        array will contain both public and nonpublic data (although public
        data records will have the mask value set to True). If apply_mask
        is set to True, only nonpub records will be returned.
    target_records: int [optional]
        If start is specified and end is None, this will specify the number
        of additional records to return closest to start.
    force_64bands: bool [optional]
        For the case in which all spectra returned are mk4 100 band format,
        force the conversion to 64bands. Mixed formats are always returned in mk3
        64 band format.
    """
    if vrs is None:
        vrs = self.parameter_vars
    prefix = self.get_var_prefix(vrs[0])

    if start is not None and end is None:  # Target time
        if isinstance(start, str):
            start = datetime.strptime(start, "%Y-%m-%d %H:%M:%S")
        ts_I = self.get_target_timespan(
            cdip_utils.datetime_to_timestamp(start), target_records, prefix + "Time"
        )
        if ts_I[0] is not None:
            start = cdip_utils.timestamp_to_datetime(ts_I[0])
            end = cdip_utils.timestamp_to_datetime(ts_I[1])
        else:
            return None
    elif start is None:  # Use default 3 days back
        start = datetime.utcnow() - timedelta(days=3)
        end = datetime.utcnow()

    if pub_set is None:
        pub_set = self.pub_set

    if apply_mask is None:
        apply_mask = self.apply_mask

    self.force_64bands = force_64bands

    self.set_request_info(start, end, vrs, pub_set, apply_mask)
    print(self.deploy_num)

    if prefix == "xyz" and self.deploy_num is None:
        return self.__merge_xyz_request()
    elif prefix == "xyz" and self.deploy_num is not None:
        return self.__merge_active_request("xyz")
    elif self.deploy_num is None:
        return self.__merge_request()
    else:
        return self.__merge_active_request("rt")

`get_spectra(start: datetime = None, end: datetime = None, pub_set: str = 'public', apply_mask: bool = True, target_records: int = 0, force_64bands: bool = False) -> dict`

Calls get_series to return spectral data.

Source code in cdippy/stndata.py

def get_spectra(
    self,
    start: datetime = None,
    end: datetime = None,
    pub_set: str = "public",
    apply_mask: bool = True,
    target_records: int = 0,
    force_64bands: bool = False,
) -> dict:
    """Calls get_series to return spectral data."""
    return self.get_series(
        start,
        end,
        self.spectrum_vars,
        pub_set,
        apply_mask,
        target_records,
        force_64bands,
    )

`get_stn_meta() -> dict`

Returns a dict of station meta data.

Source code in cdippy/stndata.py

def get_stn_meta(self) -> dict:
    """Returns a dict of station meta data."""
    result = {}
    if self.meta is None:
        return result
    self.meta.set_request_info(vrs=self.meta_vars)
    result = self.meta.get_request()
    for attr_name in self.meta_attributes:
        if hasattr(self.meta.nc, attr_name):
            result[attr_name] = getattr(self.meta.nc, attr_name)
    return result

`get_target_timespan(target_timestamp: int, num_target_records: int, time_var: str) -> tuple`

Returns a timespan containing the n closest records to the target_timestamp.

PARAMETERS

target_timestamp : int A unix timestamp which is the target time about which the closest n records will be returned. n : int The number of records to return that are closest to the target timestamp. time_var : str The name of the time dimension variable to use. E.g. waveTime.

RETURNS

A 2-tuple of timestamps corresponding to i and i+n (where n may be negative) which will be the timestamps for the n records closest to the target_timestamp.

Source code in cdippy/stndata.py

def get_target_timespan(
    self, target_timestamp: int, num_target_records: int, time_var: str
) -> tuple:
    """Returns a timespan containing the n closest records to the target_timestamp.

    PARAMETERS
    ----------
    target_timestamp : int
        A unix timestamp which is the target time about which the closest
        n records will be returned.
    n : int
        The number of records to return that are closest to the target
        timestamp.
    time_var : str
        The name of the time dimension variable to use. E.g. waveTime.

    RETURNS
    -------
    A 2-tuple of timestamps corresponding to i and i+n (where n may
    be negative) which will be the timestamps for the n records
    closest to the target_timestamp.
    """
    r_ok = False
    if self.realtime.nc is not None:
        r_ok = True
    h_ok = False
    if self.historic.nc is not None:
        h_ok = True

    # Check realtime to find closest index

    r_closest_idx = None
    if r_ok:
        r_stamps = self.realtime.get_var(time_var)[:]
        r_last_idx = len(r_stamps) - 1
        i_b = bisect_left(r_stamps, target_timestamp)
        # i_b will be possibly one more than the last index
        i_b = min(i_b, r_last_idx)
        # Target timestamp is exactly equal to a data time
        if i_b == r_last_idx or r_stamps[i_b] == target_timestamp:
            r_closest_idx = i_b
        elif i_b > 0:
            r_closest_idx = cdip_utils.get_closest_index(
                i_b - 1, i_b, r_stamps, target_timestamp
            )

    # If closest index not found, check historic

    h_closest_idx = None
    h_last_idx = None  # Let's us know if h_stamps has been loaded
    if h_ok and not r_closest_idx:
        h_stamps = self.historic.get_var(time_var)[:]
        h_last_idx = len(h_stamps) - 1
        i_b = bisect_left(h_stamps, target_timestamp)
        i_b = min(i_b, h_last_idx)
        # Target timestamp is exactly equal to a data time
        if (i_b <= h_last_idx and h_stamps[i_b] == target_timestamp) or i_b == 0:
            h_closest_idx = i_b
        elif i_b >= h_last_idx:  # Target is between the two files
            if r_ok:
                if abs(h_stamps[h_last_idx] - target_timestamp) < abs(
                    r_stamps[0] - target_timestamp
                ):
                    h_closest_idx = i_b
                else:
                    r_closest_idx = 0
            else:  # No realtime file
                h_closest_idx = i_b
        else:  # Within middle of historic stamps
            h_closest_idx = cdip_utils.get_closest_index(
                i_b - 1, i_b, h_stamps, target_timestamp
            )

    # Now we have the closest index, find the intervals

    if r_closest_idx is not None:
        r_interval = cdip_utils.get_interval(
            r_stamps, r_closest_idx, num_target_records
        )
        # If bound exceeded toward H and H exists, cacluate h_interval
        if r_interval[2] < 0 and h_ok:
            if not h_last_idx:
                h_stamps = self.historic.get_var(time_var)[:]
                h_last_idx = len(h_stamps) - 1
            h_interval = cdip_utils.get_interval(
                h_stamps, h_last_idx, num_target_records + r_closest_idx + 1
            )
            return cdip_utils.combine_intervals(h_interval, r_interval)
        else:
            return r_interval
    elif h_closest_idx is not None:
        h_interval = cdip_utils.get_interval(
            h_stamps, h_closest_idx, num_target_records
        )
        # If bound exceeded toward R and R exists, cacluate r_interval
        if h_interval[2] > 0 and r_ok:
            r_interval = cdip_utils.get_interval(
                r_stamps, 0, num_target_records + h_closest_idx - h_last_idx - 1
            )
            return cdip_utils.combine_intervals(h_interval, r_interval)
        else:
            return h_interval

    # If we get to here there's a problem
    return (None, None, None)

`get_xyz(start: datetime = None, end: datetime = None, pub_set: str = 'public') -> dict`

Calls get_series to return displacement data.

Source code in cdippy/stndata.py

def get_xyz(
    self, start: datetime = None, end: datetime = None, pub_set: str = "public"
) -> dict:
    """Calls get_series to return displacement data."""
    return self.get_series(start, end, self.xyz_vars, pub_set)

`remove_duplicates(data_dict: dict) -> dict`

Duplicate records may exist after merge_ routines. This removes them.

Source code in cdippy/stndata.py

def remove_duplicates(self, data_dict: dict) -> dict:
    """Duplicate records may exist after merge_ routines. This removes them."""
    result = {}
    keys = list(data_dict.keys())
    if len(keys) > 0:
        key = keys[0]
        prefix = self.get_var_prefix(key)
        time_dimension_name = prefix + "Time"
        time_values, indices_of_unique_values = np.unique(
            data_dict[time_dimension_name], return_index=True
        )
        result[time_dimension_name] = time_values
        for key in keys:
            if key != time_dimension_name:
                result[key] = data_dict[key][indices_of_unique_values]
        return result
    else:
        return data_dict

CDIPpy API reference

cdippy.cdipnc

Active

__init__(stn: str, deployment: int, active_state_key: str, data_dir: str = None, org: str = None)

PARAMETERS

ActiveXY

__init__(stn, deployment, dataset, data_dir=None, org=None)

Archive

__init__(stn, deployment=None, data_dir=None, org=None)

get_request()

get_xyz_timestamp(xyzIndex: int) -> int

CDIPnc

__get_indices(times: list, start_stamp: int, end_stamp: int) -> tuple

__init__(data_dir: str = None, deployment: int = None)

PARAMETERS

__make_masked_array(nc_var: str, s_idx: int, e_idx: int) -> np.ma.masked_array

get_coverage_end() -> datetime

get_coverage_start() -> datetime

get_dataset_urls() -> dict

get_date_modified() -> datetime

get_flag_meanings(flag_name: str) -> list

get_flag_values(flag_name: str) -> list

get_pub_set(name: str) -> str

get_request() -> dict

RETURNS

get_var(var_name: str)

get_var_prefix(var_name: str) -> str

make_pub_mask(anc_name: str, s_idx: int, e_idx: int) -> np.ndarray

metaStationName() -> str

set_dataset_info(stn: str, org: str, dataset_name: str, deployment: int = None) -> None

PARAMETERS

set_request_info(start: datetime = None, end: datetime = None, vrs: list = ['waveHs'], pub_set: str = 'public', apply_mask: bool = True) -> None

PARAMETERS

set_timespan(start, end)

Historic

__init__(stn, data_dir=None, org=None)

Latest

__has_a_number(arr)

__init__(data_dir: str = None)

PARAMETERS

get_latest(pub_set: str = 'public', meta_vars: list = None, params: list = None, array_format=True) -> list

metaDeployLabels() -> list

metaDeployNumbers() -> list

metaLatitudes() -> list

metaLongitudes() -> list

metaSiteLabels() -> list

metaStationNames() -> list

metaWMOids() -> list

metaWaterDepths() -> list

Realtime

__init__(stn: str, data_dir: str = None, org: str = None)

RealtimeXY

__init__(stn, data_dir=None, org=None)

cdippy.nchashes

NcHashes

compare_hash_tables() -> list

cdippy.ncstats

NcStats

__init__(stn: str, data_dir: str = None)

by_month_count(cat_var, dim: str) -> pd.DataFrame

deployment_summary() -> dict

flag_counts(QC_flags: list = None) -> dict

load_file(nc_filename: str)

load_nc_files(types: list = ['realtime', 'historic', 'archive']) -> dict

make_stats() -> dict

nc_file_summary(nc_filename: str) -> dict

total_count(cat_var) -> pd.DataFrame

cdippy.ndbc

get_stn_info(wmo_id)

get_wmo_id(stn, store=True, filepath='.')

cdippy.plotting

make_annual_hs_boxplot(stn: str, year: int) -> Figure

make_compendium_plot(stns: str, start: str, end: str, params: str, x_inch: int) -> Figure

make_sst_climatology_plot(stn: str, x_inch: int = None, y_inch: int = None) -> Figure

cdippy.spectra

Spectra

__init__()

__str__()

get_bandSize()

get_spectraNum()

`cdippy.cdipnc`

`Active`

`init(stn: str, deployment: int, active_state_key: str, data_dir: str = None, org: str = None)`

`ActiveXY`

`init(stn, deployment, dataset, data_dir=None, org=None)`

`Archive`

`init(stn, deployment=None, data_dir=None, org=None)`

`get_request()`

`get_xyz_timestamp(xyzIndex: int) -> int`

`CDIPnc`

`__get_indices(times: list, start_stamp: int, end_stamp: int) -> tuple`

`init(data_dir: str = None, deployment: int = None)`

`__make_masked_array(nc_var: str, s_idx: int, e_idx: int) -> np.ma.masked_array`

`get_coverage_end() -> datetime`

`get_coverage_start() -> datetime`

`get_dataset_urls() -> dict`

`get_date_modified() -> datetime`

`get_flag_meanings(flag_name: str) -> list`

`get_flag_values(flag_name: str) -> list`

`get_pub_set(name: str) -> str`

`get_request() -> dict`

`get_var(var_name: str)`

`get_var_prefix(var_name: str) -> str`

`make_pub_mask(anc_name: str, s_idx: int, e_idx: int) -> np.ndarray`

`metaStationName() -> str`

`set_dataset_info(stn: str, org: str, dataset_name: str, deployment: int = None) -> None`

`set_request_info(start: datetime = None, end: datetime = None, vrs: list = ['waveHs'], pub_set: str = 'public', apply_mask: bool = True) -> None`

`set_timespan(start, end)`

`Historic`

`init(stn, data_dir=None, org=None)`

`Latest`

`__has_a_number(arr)`

`init(data_dir: str = None)`

`get_latest(pub_set: str = 'public', meta_vars: list = None, params: list = None, array_format=True) -> list`

`metaDeployLabels() -> list`

`metaDeployNumbers() -> list`

`metaLatitudes() -> list`

`metaLongitudes() -> list`

`metaSiteLabels() -> list`

`metaStationNames() -> list`

`metaWMOids() -> list`

`metaWaterDepths() -> list`

`Realtime`

`init(stn: str, data_dir: str = None, org: str = None)`

`RealtimeXY`

`init(stn, data_dir=None, org=None)`

`cdippy.nchashes`

`NcHashes`

`compare_hash_tables() -> list`

`cdippy.ncstats`

`NcStats`

`init(stn: str, data_dir: str = None)`

`by_month_count(cat_var, dim: str) -> pd.DataFrame`

`deployment_summary() -> dict`

`flag_counts(QC_flags: list = None) -> dict`

`load_file(nc_filename: str)`

`load_nc_files(types: list = ['realtime', 'historic', 'archive']) -> dict`

`make_stats() -> dict`

`nc_file_summary(nc_filename: str) -> dict`

`total_count(cat_var) -> pd.DataFrame`

`cdippy.ndbc`

`get_stn_info(wmo_id)`

`get_wmo_id(stn, store=True, filepath='.')`

`cdippy.plotting`

`make_annual_hs_boxplot(stn: str, year: int) -> Figure`

`make_compendium_plot(stns: str, start: str, end: str, params: str, x_inch: int) -> Figure`

`make_sst_climatology_plot(stn: str, x_inch: int = None, y_inch: int = None) -> Figure`

`cdippy.spectra`

`Spectra`

`init()`

`str()`

`get_bandSize()`

`get_spectraNum()`

`get_spectraType()`

`redist_specArr(objName)`

`set_spectrumArr_fromQuery(dataDict)`

`specArr_ToDict()`

`whichSpecClass(length)`

`Spectrum`

`calc_Hs(energy)`