CDIPpy API reference

cdippy.cdipnc

Active

Bases: CDIPnc

Loads an "active" (predeploy, moored, offsite, recovered) rt nc file for the given station and deployment.

E.g. a = Active('100', 6, 'predeploy') # The predeploy data for stn 100 dep 6.

Source code in cdippy/cdipnc.py
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
class Active(CDIPnc):
    """Loads an "active" (predeploy, moored, offsite, recovered) rt nc file
    for the given station and deployment.

    E.g. a = Active('100', 6, 'predeploy')  # The predeploy data for stn 100 dep 6.
    """

    def __init__(
        self,
        stn: str,
        deployment: int,
        active_state_key: str,
        data_dir: str = None,
        org: str = None,
    ):
        """
        PARAMETERS
        ----------
        stn : str
           Can be in 2, 3 or 5 char format e.g. 28, 028, 028p2
        active_state_key : str
            Values: predeploy|moored|offsite|recovered
        deployment : int [optional]
            Supply this to access specific station deployment data.
            Must be >= 1.
        data_dir : str [optional]
            Either a full path to a directory containing a local directory hierarchy
            of nc files. E.g. '/project/WNC' or a url to a THREDDS server.
        """
        CDIPnc.__init__(self, data_dir)
        self.set_dataset_info(stn, org, active_state_key, deployment)
        self.pub_set_default = "all"

__init__(stn: str, deployment: int, active_state_key: str, data_dir: str = None, org: str = None)

PARAMETERS

stn : str Can be in 2, 3 or 5 char format e.g. 28, 028, 028p2 active_state_key : str Values: predeploy|moored|offsite|recovered deployment : int [optional] Supply this to access specific station deployment data. Must be >= 1. data_dir : str [optional] Either a full path to a directory containing a local directory hierarchy of nc files. E.g. '/project/WNC' or a url to a THREDDS server.

Source code in cdippy/cdipnc.py
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
def __init__(
    self,
    stn: str,
    deployment: int,
    active_state_key: str,
    data_dir: str = None,
    org: str = None,
):
    """
    PARAMETERS
    ----------
    stn : str
       Can be in 2, 3 or 5 char format e.g. 28, 028, 028p2
    active_state_key : str
        Values: predeploy|moored|offsite|recovered
    deployment : int [optional]
        Supply this to access specific station deployment data.
        Must be >= 1.
    data_dir : str [optional]
        Either a full path to a directory containing a local directory hierarchy
        of nc files. E.g. '/project/WNC' or a url to a THREDDS server.
    """
    CDIPnc.__init__(self, data_dir)
    self.set_dataset_info(stn, org, active_state_key, deployment)
    self.pub_set_default = "all"

ActiveXY

Bases: Archive

Loads an "active" (predeploy, moored, offsite, recovered) xy nc file for the given station and deployment.

Source code in cdippy/cdipnc.py
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
class ActiveXY(Archive):
    """Loads an "active" (predeploy, moored, offsite, recovered) xy nc file
    for the given station and deployment.
    """

    def __init__(self, stn, deployment, dataset, data_dir=None, org=None):
        """
        PARAMETERS
            ----------
            dataset : str
                Active dataset name.
                Values are: predeploy|moored|offsite|recovered.
            For other parameters see CDIPnc.set_dataset_info.
        """
        CDIPnc.__init__(self, data_dir)
        self.set_dataset_info(stn, org, dataset + "xy", deployment)
        self.pub_set_default = "all"

__init__(stn, deployment, dataset, data_dir=None, org=None)

PARAMETERS ---------- dataset : str Active dataset name. Values are: predeploy|moored|offsite|recovered. For other parameters see CDIPnc.set_dataset_info.

Source code in cdippy/cdipnc.py
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
def __init__(self, stn, deployment, dataset, data_dir=None, org=None):
    """
    PARAMETERS
        ----------
        dataset : str
            Active dataset name.
            Values are: predeploy|moored|offsite|recovered.
        For other parameters see CDIPnc.set_dataset_info.
    """
    CDIPnc.__init__(self, data_dir)
    self.set_dataset_info(stn, org, dataset + "xy", deployment)
    self.pub_set_default = "all"

Archive

Bases: CDIPnc

Loads an archive (deployment) file for a given station and deployment.

Source code in cdippy/cdipnc.py
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
class Archive(CDIPnc):
    """Loads an archive (deployment) file for a given station and deployment."""

    def __init__(self, stn, deployment=None, data_dir=None, org=None):
        """For parameters see CDIPnc.set_dataset_info."""
        CDIPnc.__init__(self, data_dir)
        if not deployment:
            deployment = 1
        self.set_dataset_info(stn, org, "archive", deployment)

    def __get_idx_from_timestamp(self, timestamp: int) -> int:
        t0 = self.get_var("xyzStartTime")[0]
        r = self.get_var("xyzSampleRate")[0]
        # Mark I will have filter delay set to fill value
        d = self.get_var("xyzFilterDelay")
        d = 0 if d[0] is np.ma.masked else d[0]
        return int(round(r * (timestamp - t0 + d), 0))

    def __make_xyzTime(self, start_idx: int, end_idx: int) -> int:
        t0 = np.ma.asarray(self.get_var("xyzStartTime")[0])
        r = np.ma.asarray(self.get_var("xyzSampleRate")[0])
        # Mark I will have filter delay set to fill value
        d = self.get_var("xyzFilterDelay")
        d = 0 if d[0] is np.ma.masked else d[0]
        d = np.ma.asarray(d)
        i = np.ma.asarray(range(start_idx, end_idx))
        return t0 - d + i / r

    def get_xyz_timestamp(self, xyzIndex: int) -> int:
        """Returns the timestamp corresponding to the given xyz array index."""
        t0 = self.get_var("xyzStartTime")[0]
        r = self.get_var("xyzSampleRate")[0]
        # Mark I will have filter delay set to fill value
        d = self.get_var("xyzFilterDelay")
        d = 0 if d[0] is np.ma.masked else d[0]
        if t0 and r and d >= 0:
            return t0 - d + xyzIndex / r
        else:
            return None

    def get_request(self):
        """Overrides the base class method to handle xyz data requests."""

        # If not an xyz request, use base class version
        if self.get_var_prefix(self.vrs[0]) != "xyz":
            return super(Archive, self).get_request()

        # xyzData is shorthand for all these vars
        if self.vrs[0] == "xyzData":
            self.vrs = ["xyzXDisplacement", "xyzYDisplacement", "xyzZDisplacement"]

        # Handle the xyz request
        start_idx = self.__get_idx_from_timestamp(self.start_stamp)
        end_idx = self.__get_idx_from_timestamp(self.end_stamp)
        z = self.get_var("xyzZDisplacement")
        # Find out if the request timespan overlaps the data
        ts1 = cdip_utils.Timespan(start_idx, end_idx)
        ts2 = cdip_utils.Timespan(0, len(z) - 1)
        if not ts1.overlap(ts2):
            return {}
        # Make sure the indices will work with the arrays
        start_idx = max(0, start_idx)
        end_idx = min(len(z) - 1, end_idx)
        # Just calculate xyz times for the good indices
        xyzTime = self.__make_xyzTime(start_idx, end_idx)
        result = {"xyzTime": xyzTime}
        for vname in self.vrs:
            result[vname] = self.get_var(vname)[start_idx:end_idx]
        return result

__init__(stn, deployment=None, data_dir=None, org=None)

For parameters see CDIPnc.set_dataset_info.

Source code in cdippy/cdipnc.py
947
948
949
950
951
952
def __init__(self, stn, deployment=None, data_dir=None, org=None):
    """For parameters see CDIPnc.set_dataset_info."""
    CDIPnc.__init__(self, data_dir)
    if not deployment:
        deployment = 1
    self.set_dataset_info(stn, org, "archive", deployment)

get_request()

Overrides the base class method to handle xyz data requests.

Source code in cdippy/cdipnc.py
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
def get_request(self):
    """Overrides the base class method to handle xyz data requests."""

    # If not an xyz request, use base class version
    if self.get_var_prefix(self.vrs[0]) != "xyz":
        return super(Archive, self).get_request()

    # xyzData is shorthand for all these vars
    if self.vrs[0] == "xyzData":
        self.vrs = ["xyzXDisplacement", "xyzYDisplacement", "xyzZDisplacement"]

    # Handle the xyz request
    start_idx = self.__get_idx_from_timestamp(self.start_stamp)
    end_idx = self.__get_idx_from_timestamp(self.end_stamp)
    z = self.get_var("xyzZDisplacement")
    # Find out if the request timespan overlaps the data
    ts1 = cdip_utils.Timespan(start_idx, end_idx)
    ts2 = cdip_utils.Timespan(0, len(z) - 1)
    if not ts1.overlap(ts2):
        return {}
    # Make sure the indices will work with the arrays
    start_idx = max(0, start_idx)
    end_idx = min(len(z) - 1, end_idx)
    # Just calculate xyz times for the good indices
    xyzTime = self.__make_xyzTime(start_idx, end_idx)
    result = {"xyzTime": xyzTime}
    for vname in self.vrs:
        result[vname] = self.get_var(vname)[start_idx:end_idx]
    return result

get_xyz_timestamp(xyzIndex: int) -> int

Returns the timestamp corresponding to the given xyz array index.

Source code in cdippy/cdipnc.py
972
973
974
975
976
977
978
979
980
981
982
def get_xyz_timestamp(self, xyzIndex: int) -> int:
    """Returns the timestamp corresponding to the given xyz array index."""
    t0 = self.get_var("xyzStartTime")[0]
    r = self.get_var("xyzSampleRate")[0]
    # Mark I will have filter delay set to fill value
    d = self.get_var("xyzFilterDelay")
    d = 0 if d[0] is np.ma.masked else d[0]
    if t0 and r and d >= 0:
        return t0 - d + xyzIndex / r
    else:
        return None

CDIPnc

A base class used by the class StnData for retrieving data from CDIP netCDF (nc) files located either locally or remotely.

Files accessed remotely are served by CDIP's THREDDS server. Files accessed locally need to be located within a specific directory hierarchy.

For each CDIP nc file "type" such as historic.nc or archive.nc, there is a corresponding sub-class, e.g. Historic or Archive. Although the constructors of these classes can be used to access data, StnData is recommended because it seamlessly combines records across multiple files.

Source code in cdippy/cdipnc.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
class CDIPnc:
    """A base class used by the class StnData for retrieving data from
    CDIP netCDF (nc) files located either locally or remotely.

    Files accessed remotely are served by CDIP's THREDDS server.
    Files accessed locally need to be located within a specific
    directory hierarchy.

    For each CDIP nc file "type" such as historic.nc or archive.nc,
    there is a corresponding sub-class, e.g. Historic or Archive.
    Although the constructors of these classes can be used to access
    data, StnData is recommended because it seamlessly combines
    records across multiple files.
    """

    THREDDS_url = "https://thredds.cdip.ucsd.edu"
    dods = "thredds/dodsC"
    url = None

    # - Load_stn_nc_files only checks for this number of deployments
    max_deployments = 99

    # - Top level data dir for nc files. Files must be within subdirectories:
    # - i.e. <data_dir>/REALTIME, <data_dir>/ARCHIVE/201p1
    data_dir = None

    # DATA QUALITY FLAGS AND PUBLIC/NONPUB
    #
    # waveFlagPrimary (WFP): 1-good, 2-not_evaluated, 3-questionable, 4-bad, 9-missing
    # waveFlagSecondary (WFS): 0-unspecified, 1-sensor issues, 2... are specific messages e.g. Hs out of bounds
    #
    # Data for public release is distinguished by WFP=1 and found in all nc files.
    # Data not for public release is distinguished by WFP=4 and found in all files except historic.nc
    #
    # There are cases where WFP=1 and WFS!=0 - e.g. if frequency bands have been reformatted.
    # Records with WFP=4 are not necessarily bad data.
    # All xy records are flagged WFP=2 - not_evaluated.
    #
    # NC files: latest, pre-deploy, moored, offsite, recovered, historic, archive
    #
    pub_set_default = "public"
    # Dashed tags such as public-good are for backwards compatibility
    pub_set_map = {
        "public": "public",
        "nonpub": "nonpub",
        "all": "all",
        "public-good": "public",
        "nonpub-all": "nonpub",
        "both-all": "all",
    }

    # Applies the mask before data is returned
    apply_mask = True

    # Active datasets - deployments that span NOW
    active_datasets = {
        "predeploy": "p0",
        "moored": "p1",
        "offsite": "p2",
        "recovered": "p3",
    }

    # Spectral layout. For each dataset we need to determine if it is mk3 (64 bands)
    # or mk4 (100 bands) spectral layout. Prior to aggregation, if 1 dataset is mk3,
    # all spectral layouts must be converted to mk3 during aggregation.
    spectral_layout = None

    # REQUESTING DATA PROCEDURE
    #
    # HOW TO USE
    # 1. call set_request_info
    # 2. call get_request
    #
    # HOW IT WORKS
    # 1. For a given set of variables of the same type (e.g. 'wave'),
    #   a. determine the dimension var name and if it is a time dimension
    #   b. determine the ancillary variable name (e.g. 'waveFlagPrimary'), if it exists
    # 2. If the dimension is a time dimension, find the start and end indices based on the query
    #    (Use start and end indices to subset all variables henceforth)
    # 3. Create an ancillary variable mask based on the pub set (and start, end indices if applicable)
    # 4. For each variable,
    #    a. use start, end indices to create a masked array
    #    b. union the variable's mask with the ancillary mask
    #    c. set the new masked array variable's mask to the union mask
    # 5. Apply the mask if self.apply_mask set True.

    def __init__(self, data_dir: str = None, deployment: int = None):
        """PARAMETERS
        ----------
        data_dir : str [optional]
            Either a full path to a directory containing a local directory hierarchy
            of nc files. E.g. '/project/WNC' or a url to a THREDDS server.
        deployment : int [optional]
            Supply this to access specific station deployment data.
            Must be >= 1.
        """

        self.nc = None
        self.data_dir = data_dir
        self.deployment = deployment

    def set_request_info(
        self,
        start: datetime = None,
        end: datetime = None,
        vrs: list = ["waveHs"],
        pub_set: str = "public",
        apply_mask: bool = True,
    ) -> None:
        """Initializes data request information for get_request.

        PARAMETERS
        ----------
        start : str or datetime [optional] : default Jan 1, 1975
            Start time of data request (UTC). If provided as a string must
            be in the format Y-m-d H:M:S where Y is 4 chars and all others
            are 2 chars. Ex. '2020-03-30 19:32:56'.
        end : str or datetime [optional] : default now
            End time of data request (UTC). If not supplied defaults to now.
        vrs : list [optional] : default ['waveHs']
            A list of the names of variables to retrieve. They all must start
            with the same prefix, e.g. ['waveHs', 'waveTp', 'waveDp']
        pub_set: str [optional] values = public|nonpub|all
            Filters data based on data quality flags.
        apply_mask: bool [optional] default True
            Removes values from the masked array that have a mask value of True.
            Ex. If nonpub data is requested and apply_mask is False, the returned
            array will contain both public and nonpublic data (although public
            data records will have the mask value set to True). If apply_mask
            is set to True, only nonpub records will be returned.
        """
        if start is None:
            start = datetime(1975, 1, 1).replace(tzinfo=timezone.utc)
        if end is None:
            end = datetime.now(timezone.utc)
        self.set_timespan(start, end)
        self.pub_set = self.get_pub_set(pub_set)  # Standardize the set name
        if apply_mask is not None:
            self.apply_mask = apply_mask
        self.vrs = vrs

    def set_timespan(self, start, end):
        """Sets request timespan"""
        if isinstance(start, str):
            self.start_dt = datetime.strptime(start, "%Y-%m-%d %H:%M:%S").replace(
                tzinfo=timezone.utc
            )
        else:
            self.start_dt = start
        if isinstance(end, str):
            self.end_dt = datetime.strptime(end, "%Y-%m-%d %H:%M:%S").replace(
                tzinfo=timezone.utc
            )
        else:
            self.end_dt = end
        self.start_stamp = cdip_utils.datetime_to_timestamp(self.start_dt)

        self.end_stamp = cdip_utils.datetime_to_timestamp(self.end_dt)

    def get_request(self) -> dict:
        """Returns the data specified using set_request_info.

        RETURNS
        -------
        A dictionary containing keys of the requested variables each
        of which is a numpy masked array of data values. In addition,
        the time values are returned as well. For example, if waveHs
        was requested, the dictionary will look like this:
        {'waveHs': <np.masked_array>, 'waveTime': <np.masked_array>}
        """
        mask_results = {}
        save = {}
        result = {}

        # - Check if requested variable 0 exists
        first_var = self.get_var(self.vrs[0])
        if first_var is None:
            return result

        # Use first var to determine the dimension, grab it and find indices
        time_dim = None
        for dim_name in first_var.dimensions:
            nc_var = self.get_var(dim_name)
            if nc_var is None:  # To handle non-existing "count" variables
                continue
            if nc_var.units[0:7] == "seconds":
                time_dim = dim_name
                # dim_data = np.ma.asarray(self.nc.variables[dim_name][:])
                dim_data = self.__make_masked_array(nc_var, 0, nc_var.size)
                # - find time dimension start and end indices
                s_idx, e_idx = self.__get_indices(
                    dim_data[:], self.start_stamp, self.end_stamp
                )
                if s_idx == e_idx:
                    return result
                mask_results[time_dim] = dim_data[s_idx:e_idx]
            else:  # E.g. waveFrequency (Do I want to add to result?
                save[dim_name] = self.nc.variables[dim_name]

        # Grab the time subset of each variable
        for v_name in self.vrs:
            v = self.get_var(v_name)
            if v is None:
                continue
            if v_name == "metaStationName":
                # Use existing byte_arr_to_string method for station name
                result[v_name] = self.byte_arr_to_string(self.nc.variables[v_name][:])
            elif len(v.dimensions) == 1 and v.dimensions[0] == "maxStrlen64":
                arr = self.nc.variables[v_name][:]
                result[v_name] = self.byte_arr_to_string(arr).strip("\x00")
            elif time_dim:
                mask_results[v_name] = self.__make_masked_array(v, s_idx, e_idx)
            else:
                # !!! This could be a problem for 2-d arrays. Specifying end
                # index too large may reshape array?
                #
                # Also, there seems to be a bug for single values such as
                # metaWaterDepth in realtime files. Those variables have
                # no shape (shape is an empty tupble) and len(v) bombs even
                # though v[:] returns an array with one value.
                try:
                    v_len = len(v)
                except Exception:
                    v_len = 1
                result[v_name] = self.__make_masked_array(v, 0, v_len)

        # Use first var to determine the ancillary variable, e.g. waveFlagPrimary
        # If there is an ancillary variable, use pub/nonpub to create a mask
        if hasattr(first_var, "ancillary_variables"):
            anc_names = first_var.ancillary_variables.split(" ")
            anc_name = anc_names[0]
            # Create the variable mask using pub/nonpub choice
            if not time_dim:
                s_idx = None
            anc_mask = self.make_pub_mask(anc_name, s_idx, e_idx)
        else:
            anc_mask = None

        # Still a problem. 2-d vars.
        # Seems to work if the variable has no mask set. But
        # if mask set, returns 1-d var.
        for v_name in mask_results:
            if self.apply_mask and anc_mask is not None:
                v = mask_results[v_name]
                mask_results[v_name] = v[~anc_mask]
            result[v_name] = mask_results[v_name]

        return result

    def __make_masked_array(
        self, nc_var: str, s_idx: int, e_idx: int
    ) -> np.ma.masked_array:
        """Returns a numpy masked array for a given nc variable and indices.

        e_idx is appropriate for python arrays. I.e. one more than last index.
        """
        if len(nc_var.shape) <= 1:
            try:
                data = np.ma.asarray(nc_var[s_idx:e_idx])
            except Exception:
                try:
                    data = np.ma.asarray(nc_var[s_idx:e_idx])
                except Exception:
                    return None
            return data
        elif len(nc_var.shape) == 2:
            try:
                arr = np.ma.asarray(nc_var[s_idx:e_idx, :])
            except Exception:
                try:
                    arr = np.ma.asarray(nc_var[s_idx:e_idx, :])
                except Exception:
                    return None
            return arr

    def make_pub_mask(self, anc_name: str, s_idx: int, e_idx: int) -> np.ndarray:
        """Returns an np.ndarray of bools given pub_set and ancillary var"""

        # No s_idx, use whole array. Otherwise time subset the anc var.
        nc_primary = self.get_var(anc_name)
        if s_idx is None:
            s_idx = 0
            e_idx = len(nc_primary)
        primary_flag_values = nc_primary[s_idx:e_idx]

        if anc_name == "waveFrequencyFlagPrimary":
            return None  # Not sure about this one
        elif anc_name == "gpsStatusFlags":
            return np.ma.make_mask(primary_flag_values < 0, shrink=False)
        elif (
            anc_name == "waveFlagPrimary"
            or anc_name == "sstFlagPrimary"
            or anc_name == "acmFlagPrimary"
            or anc_name == "cat4FlagPrimary"
        ):
            public_mask = primary_flag_values != 1
        elif anc_name == "xyzFlagPrimary":
            public_mask = primary_flag_values != 2
        else:
            return None

        if self.pub_set == "public":
            return np.ma.make_mask(public_mask, shrink=False)
        elif self.pub_set == "nonpub":
            return np.ma.make_mask(~public_mask, shrink=False)
        elif self.pub_set == "all":
            return np.ma.make_mask(primary_flag_values < 0, shrink=False)

    def get_pub_set(self, name: str) -> str:
        """Returns either 'public', 'nonpub' or 'all'.

        Maintains backwards compatibility with prior pub_set names.
        """
        if name is None or name not in self.pub_set_map.keys():
            return self.pub_set_default
        else:
            return self.pub_set_map[name]

    def get_var_prefix(self, var_name: str) -> str:
        """Returns 'wave' part of the string 'waveHs'."""
        s = ""
        for c in var_name:
            if c.isupper():
                break
            s += c
        return s

    def get_flag_meanings(self, flag_name: str) -> list:
        """Returns flag category values and meanings given a flag_name."""
        return self.get_var(flag_name).flag_meanings.split(" ")

    def get_flag_values(self, flag_name: str) -> list:
        """Returns flag category values and meanings given a flag_name."""
        v = self.get_var(flag_name)
        if flag_name[0:3] == "gps":
            return v.flag_masks
        else:
            return v.flag_values

    def get_date_modified(self) -> datetime:
        """Returns the time the nc file was last modified."""
        return datetime.strptime(self.nc.date_modified, "%Y-%m-%dT%H:%M:%SZ")

    def get_coverage_start(self) -> datetime:
        """Returns the start time of the nc file data coverage."""
        return datetime.strptime(self.nc.time_coverage_start, "%Y-%m-%dT%H:%M:%SZ")

    def get_coverage_end(self) -> datetime:
        """Returns the end time of the nc file data coverage."""
        return datetime.strptime(self.nc.time_coverage_end, "%Y-%m-%dT%H:%M:%SZ")

    def __get_indices(self, times: list, start_stamp: int, end_stamp: int) -> tuple:
        """Returns start and end indices to include any times that are equal to start_stamp or end_stamp."""
        s_idx = bisect_left(times, start_stamp)  # Will include time if equal
        # Will give e_idx appropriate for python arrays
        e_idx = bisect_right(times, end_stamp, s_idx)
        return s_idx, e_idx

    def get_nc(self, url: str = None, retry: bool = False) -> netCDF4.Dataset:
        if not url:
            url = self.url
        try:
            return netCDF4.Dataset(url)
        except Exception as e:
            # Try again if unsuccessful (nc file not ready? THREDDS problem?)
            if retry:
                logger.warning(
                    msg=f"Retrying to open dataset at {url} due to an unexpected exception: {e}"
                )
                try:
                    return netCDF4.Dataset(url)
                except Exception:
                    pass
            logger.exception(
                msg=f"Failed to open dataset at {url} due to an unexpected exception: {e}"
            )
            return None

    def byte_arr_to_string(self, b_arr: np.ma.masked_array) -> str:
        if np.ma.is_masked(b_arr):
            b_arr = b_arr[~b_arr.mask]
        s = ""
        for c in b_arr[:].astype("U"):
            s += c
        return s

    def metaStationName(self) -> str:
        """Returns the metaStationName."""
        if self.nc is None:
            return None
        return self.byte_arr_to_string(self.nc.variables["metaStationName"][:])

    def get_var(self, var_name: str):
        """Checks if a variable exists then returns a pointer to it."""
        if self.nc is None or var_name not in self.nc.variables:
            return None
        return self.nc.variables[var_name]

    def get_dataset_urls(self) -> dict:
        """Returns a dict of two lists of urls (or paths) to all CDIP station datasets.

        The top level keys are 'realtime' and 'historic'. The urls are retrieved by
        either descending into the THREDDS catalog.xml or recursively walking through data_dir sub
        directories.

        For applications that need to use the data from multiple deployment files for
        a station, stndata:get_nc_files will load those files efficiently.
        """
        if self.data_dir is not None:
            result = {"realtime": [], "archive": []}
            # - Walk through data_dir sub dirs
            for dirpath, dirnames, filenames in os.walk(self.data_dir):
                if dirpath.find("REALTIME") >= 0:
                    for file in filenames:
                        if os.path.splitext(file)[1] == ".nc":
                            result["realtime"].append(os.path.join(dirpath, file))
                elif dirpath.find("ARCHIVE") >= 0:
                    for file in filenames:
                        if os.path.splitext(file)[1] == ".nc":
                            result["archive"].append(os.path.join(dirpath, file))
            return result

        catalog_url = "/".join([self.THREDDS_url, "thredds", "catalog.xml"])

        result = {}
        root = url_utils.load_et_root(catalog_url)
        catalogs = []
        url_utils.rfindta(root, catalogs, "catalogRef", "href")
        for catalog in catalogs:
            # - Archive data sets
            url = self.THREDDS_url + catalog
            cat = url_utils.load_et_root(url)
            if catalog.find("archive") >= 0:
                ar_urls = []
                url_utils.rfindta(cat, ar_urls, "catalogRef", "href")
                b_url = os.path.dirname(url)
                # - Station datasets
                ar_ds_urls = []
                for u in ar_urls:
                    url = b_url + "/" + u
                    ds = url_utils.load_et_root(url)
                    url_utils.rfindta(ds, ar_ds_urls, "dataset", "urlPath")
                full_urls = []
                for url in ar_ds_urls:
                    full_urls.append(
                        "/".join([self.THREDDS_url, self.dods, "cdip", url[5:]])
                    )
                result["archive"] = full_urls
            elif catalog.find("realtime") >= 0:
                rt_ds_urls = []
                url_utils.rfindta(cat, rt_ds_urls, "dataset", "urlPath")
                full_urls = []
                for url in rt_ds_urls:
                    full_urls.append(
                        "/".join([self.THREDDS_url, self.dods, "cdip", url[5:]])
                    )
                result["realtime"] = full_urls
        return result

    def set_dataset_info(
        self, stn: str, org: str, dataset_name: str, deployment: int = None
    ) -> None:
        """Sets self.stn, org, filename, url and loads self.nc. The key to understanding all of
        this is that we are ultimately setting _url_, which can be an actual path to the
        nc file or a url to THREDDS DoDS service.

        PARAMETERS
        ----------
        stn : str
           Can be in 3char (e.g. 028) or 5char (e.g. 028p2) format for org=cdip
        org: str
            (Organization) Values are: cdip|ww3|external
        dataset_name : str
            Values: realtime|historic|archive|realtimexy|archivexy|
                    predeploy|moored|offsite|recovered
        deployment : int [optional]
            Supply this to access specific station deployment data.
            Must be >= 1.

        Paths are:
            <top_dir>/EXTERNAL/WW3/<filename>  [filename=<stn>_<org_dir>_<dataset_name>.nc][CDIP stn like 192w3]
            <top_dir>/REALTIME/<filename> [filename=<stn><p1>_rt.nc]
            <top_dir>/REALTIME/<filename> [filename=<stn><p1>_xy.nc]
            <top_dir>/ARCHIVE/<stn>/<filename> [filename=<stn3><p1>_<deployment>.nc]
            <top_dir>/PREDEPLOY/<stn>/<filename> [filename=<stn3><pX>_<deployment>_rt.nc]**
            <top_dir>/PREDEPLOY/<stn>/<filename> [filename=<stn3><pX>_<deployment>_xy.nc]**

            **Active deployment directories are PREDEPLOY (p0), MOORED (p1), OFFSITE (p2)  and RECOVERED (p3)
              pX = p0|p1|p2|p3; deployment = dXX e.g. d01

        Urls are:
            http://thredds.cdip.ucsd/thredds/dodsC/<org1>/<org_dir>/<filename>
               [org1=external|cdip,org_dir=WW3|OWI etc]
            http://thredds.cdip.ucsd/thredds/dodsC/<org1>/<dataset_name>/<filename>

            Note:
               Since adding dataset_name, we no longer need the 5char stn id
               for org=cdip datasets. The p_val will be 'p1' for every dataset except
               active datasets in buoy states predeploy (p0), offsite (p2) and recovered (p3).
        """
        ext = ".nc"

        # Allowing data_dir to be either url or path
        __using_path = False
        if self.data_dir:
            if self.data_dir[0:4] == "http":
                self.THREDDS_url = self.data_dir
            else:
                __using_path = True

        if org is None:
            org = "cdip"
        if org == "cdip":
            org1 = "cdip"
        else:
            org1 = "external"
        # Org_dir follows 'external' and always uppercase (isn't used when org is cdip)
        org_dir = org.upper()

        # Handle the xy datasets
        if "xy" in dataset_name:
            ftype = "xy"
            dataset_name = dataset_name[0:-2]
        else:
            ftype = "rt"

        # Historic and archive both use archive as a dataset_dir
        # Lowercase for url, uppercase for url
        if dataset_name == "historic":
            dataset_dir = "archive"
        else:
            dataset_dir = dataset_name

        # Local paths use uppercase
        if __using_path:
            org1 = org1.upper()
            dataset_dir = dataset_dir.upper()
            if org == "cdip":
                url_pre = self.data_dir
            else:
                url_pre = "/".join([self.data_dir, org1])
        else:
            url_pre = "/".join([self.THREDDS_url, self.dods, org1])

        # Set p_val to 'p1' - it will get changed appropriately below
        stn = stn[0:3] + "p1"

        # Make filename and url
        if org == "cdip":
            if type(deployment) is not str:
                deployment = "d" + str(deployment).zfill(2)
            if dataset_name in self.active_datasets.keys():
                stn = stn[0:3] + self.active_datasets[dataset_name]
                dataset_name = "_".join([deployment, ftype])
            elif dataset_name == "realtime":
                dataset_name = ftype
            elif dataset_name == "historic":
                dataset_dir = "/".join([dataset_dir, stn])
            elif dataset_name == "archive" and deployment:
                dataset_name = deployment
                dataset_dir = "/".join([dataset_dir, stn])
            self.filename = "_".join([stn, dataset_name + ext])
            self.url = "/".join([url_pre, dataset_dir, self.filename])
        else:
            if stn[3:4] == "p" and org == "ww3":  # Cdip stn id
                stn_tmp = ndbc.get_wmo_id(stn[0:3])
            else:
                stn_tmp = stn
            self.filename = "_".join([stn_tmp, org_dir, dataset_name + ext])
            self.url = "/".join([url_pre, org_dir, self.filename])

        self.stn = stn
        self.org = org
        self.nc = self.get_nc()

__get_indices(times: list, start_stamp: int, end_stamp: int) -> tuple

Returns start and end indices to include any times that are equal to start_stamp or end_stamp.

Source code in cdippy/cdipnc.py
370
371
372
373
374
375
def __get_indices(self, times: list, start_stamp: int, end_stamp: int) -> tuple:
    """Returns start and end indices to include any times that are equal to start_stamp or end_stamp."""
    s_idx = bisect_left(times, start_stamp)  # Will include time if equal
    # Will give e_idx appropriate for python arrays
    e_idx = bisect_right(times, end_stamp, s_idx)
    return s_idx, e_idx

__init__(data_dir: str = None, deployment: int = None)

PARAMETERS

data_dir : str [optional] Either a full path to a directory containing a local directory hierarchy of nc files. E.g. '/project/WNC' or a url to a THREDDS server. deployment : int [optional] Supply this to access specific station deployment data. Must be >= 1.

Source code in cdippy/cdipnc.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def __init__(self, data_dir: str = None, deployment: int = None):
    """PARAMETERS
    ----------
    data_dir : str [optional]
        Either a full path to a directory containing a local directory hierarchy
        of nc files. E.g. '/project/WNC' or a url to a THREDDS server.
    deployment : int [optional]
        Supply this to access specific station deployment data.
        Must be >= 1.
    """

    self.nc = None
    self.data_dir = data_dir
    self.deployment = deployment

__make_masked_array(nc_var: str, s_idx: int, e_idx: int) -> np.ma.masked_array

Returns a numpy masked array for a given nc variable and indices.

e_idx is appropriate for python arrays. I.e. one more than last index.

Source code in cdippy/cdipnc.py
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
def __make_masked_array(
    self, nc_var: str, s_idx: int, e_idx: int
) -> np.ma.masked_array:
    """Returns a numpy masked array for a given nc variable and indices.

    e_idx is appropriate for python arrays. I.e. one more than last index.
    """
    if len(nc_var.shape) <= 1:
        try:
            data = np.ma.asarray(nc_var[s_idx:e_idx])
        except Exception:
            try:
                data = np.ma.asarray(nc_var[s_idx:e_idx])
            except Exception:
                return None
        return data
    elif len(nc_var.shape) == 2:
        try:
            arr = np.ma.asarray(nc_var[s_idx:e_idx, :])
        except Exception:
            try:
                arr = np.ma.asarray(nc_var[s_idx:e_idx, :])
            except Exception:
                return None
        return arr

get_coverage_end() -> datetime

Returns the end time of the nc file data coverage.

Source code in cdippy/cdipnc.py
366
367
368
def get_coverage_end(self) -> datetime:
    """Returns the end time of the nc file data coverage."""
    return datetime.strptime(self.nc.time_coverage_end, "%Y-%m-%dT%H:%M:%SZ")

get_coverage_start() -> datetime

Returns the start time of the nc file data coverage.

Source code in cdippy/cdipnc.py
362
363
364
def get_coverage_start(self) -> datetime:
    """Returns the start time of the nc file data coverage."""
    return datetime.strptime(self.nc.time_coverage_start, "%Y-%m-%dT%H:%M:%SZ")

get_dataset_urls() -> dict

Returns a dict of two lists of urls (or paths) to all CDIP station datasets.

The top level keys are 'realtime' and 'historic'. The urls are retrieved by either descending into the THREDDS catalog.xml or recursively walking through data_dir sub directories.

For applications that need to use the data from multiple deployment files for a station, stndata:get_nc_files will load those files efficiently.

Source code in cdippy/cdipnc.py
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
def get_dataset_urls(self) -> dict:
    """Returns a dict of two lists of urls (or paths) to all CDIP station datasets.

    The top level keys are 'realtime' and 'historic'. The urls are retrieved by
    either descending into the THREDDS catalog.xml or recursively walking through data_dir sub
    directories.

    For applications that need to use the data from multiple deployment files for
    a station, stndata:get_nc_files will load those files efficiently.
    """
    if self.data_dir is not None:
        result = {"realtime": [], "archive": []}
        # - Walk through data_dir sub dirs
        for dirpath, dirnames, filenames in os.walk(self.data_dir):
            if dirpath.find("REALTIME") >= 0:
                for file in filenames:
                    if os.path.splitext(file)[1] == ".nc":
                        result["realtime"].append(os.path.join(dirpath, file))
            elif dirpath.find("ARCHIVE") >= 0:
                for file in filenames:
                    if os.path.splitext(file)[1] == ".nc":
                        result["archive"].append(os.path.join(dirpath, file))
        return result

    catalog_url = "/".join([self.THREDDS_url, "thredds", "catalog.xml"])

    result = {}
    root = url_utils.load_et_root(catalog_url)
    catalogs = []
    url_utils.rfindta(root, catalogs, "catalogRef", "href")
    for catalog in catalogs:
        # - Archive data sets
        url = self.THREDDS_url + catalog
        cat = url_utils.load_et_root(url)
        if catalog.find("archive") >= 0:
            ar_urls = []
            url_utils.rfindta(cat, ar_urls, "catalogRef", "href")
            b_url = os.path.dirname(url)
            # - Station datasets
            ar_ds_urls = []
            for u in ar_urls:
                url = b_url + "/" + u
                ds = url_utils.load_et_root(url)
                url_utils.rfindta(ds, ar_ds_urls, "dataset", "urlPath")
            full_urls = []
            for url in ar_ds_urls:
                full_urls.append(
                    "/".join([self.THREDDS_url, self.dods, "cdip", url[5:]])
                )
            result["archive"] = full_urls
        elif catalog.find("realtime") >= 0:
            rt_ds_urls = []
            url_utils.rfindta(cat, rt_ds_urls, "dataset", "urlPath")
            full_urls = []
            for url in rt_ds_urls:
                full_urls.append(
                    "/".join([self.THREDDS_url, self.dods, "cdip", url[5:]])
                )
            result["realtime"] = full_urls
    return result

get_date_modified() -> datetime

Returns the time the nc file was last modified.

Source code in cdippy/cdipnc.py
358
359
360
def get_date_modified(self) -> datetime:
    """Returns the time the nc file was last modified."""
    return datetime.strptime(self.nc.date_modified, "%Y-%m-%dT%H:%M:%SZ")

get_flag_meanings(flag_name: str) -> list

Returns flag category values and meanings given a flag_name.

Source code in cdippy/cdipnc.py
346
347
348
def get_flag_meanings(self, flag_name: str) -> list:
    """Returns flag category values and meanings given a flag_name."""
    return self.get_var(flag_name).flag_meanings.split(" ")

get_flag_values(flag_name: str) -> list

Returns flag category values and meanings given a flag_name.

Source code in cdippy/cdipnc.py
350
351
352
353
354
355
356
def get_flag_values(self, flag_name: str) -> list:
    """Returns flag category values and meanings given a flag_name."""
    v = self.get_var(flag_name)
    if flag_name[0:3] == "gps":
        return v.flag_masks
    else:
        return v.flag_values

get_pub_set(name: str) -> str

Returns either 'public', 'nonpub' or 'all'.

Maintains backwards compatibility with prior pub_set names.

Source code in cdippy/cdipnc.py
327
328
329
330
331
332
333
334
335
def get_pub_set(self, name: str) -> str:
    """Returns either 'public', 'nonpub' or 'all'.

    Maintains backwards compatibility with prior pub_set names.
    """
    if name is None or name not in self.pub_set_map.keys():
        return self.pub_set_default
    else:
        return self.pub_set_map[name]

get_request() -> dict

Returns the data specified using set_request_info.

RETURNS

A dictionary containing keys of the requested variables each of which is a numpy masked array of data values. In addition, the time values are returned as well. For example, if waveHs was requested, the dictionary will look like this:

Source code in cdippy/cdipnc.py
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
def get_request(self) -> dict:
    """Returns the data specified using set_request_info.

    RETURNS
    -------
    A dictionary containing keys of the requested variables each
    of which is a numpy masked array of data values. In addition,
    the time values are returned as well. For example, if waveHs
    was requested, the dictionary will look like this:
    {'waveHs': <np.masked_array>, 'waveTime': <np.masked_array>}
    """
    mask_results = {}
    save = {}
    result = {}

    # - Check if requested variable 0 exists
    first_var = self.get_var(self.vrs[0])
    if first_var is None:
        return result

    # Use first var to determine the dimension, grab it and find indices
    time_dim = None
    for dim_name in first_var.dimensions:
        nc_var = self.get_var(dim_name)
        if nc_var is None:  # To handle non-existing "count" variables
            continue
        if nc_var.units[0:7] == "seconds":
            time_dim = dim_name
            # dim_data = np.ma.asarray(self.nc.variables[dim_name][:])
            dim_data = self.__make_masked_array(nc_var, 0, nc_var.size)
            # - find time dimension start and end indices
            s_idx, e_idx = self.__get_indices(
                dim_data[:], self.start_stamp, self.end_stamp
            )
            if s_idx == e_idx:
                return result
            mask_results[time_dim] = dim_data[s_idx:e_idx]
        else:  # E.g. waveFrequency (Do I want to add to result?
            save[dim_name] = self.nc.variables[dim_name]

    # Grab the time subset of each variable
    for v_name in self.vrs:
        v = self.get_var(v_name)
        if v is None:
            continue
        if v_name == "metaStationName":
            # Use existing byte_arr_to_string method for station name
            result[v_name] = self.byte_arr_to_string(self.nc.variables[v_name][:])
        elif len(v.dimensions) == 1 and v.dimensions[0] == "maxStrlen64":
            arr = self.nc.variables[v_name][:]
            result[v_name] = self.byte_arr_to_string(arr).strip("\x00")
        elif time_dim:
            mask_results[v_name] = self.__make_masked_array(v, s_idx, e_idx)
        else:
            # !!! This could be a problem for 2-d arrays. Specifying end
            # index too large may reshape array?
            #
            # Also, there seems to be a bug for single values such as
            # metaWaterDepth in realtime files. Those variables have
            # no shape (shape is an empty tupble) and len(v) bombs even
            # though v[:] returns an array with one value.
            try:
                v_len = len(v)
            except Exception:
                v_len = 1
            result[v_name] = self.__make_masked_array(v, 0, v_len)

    # Use first var to determine the ancillary variable, e.g. waveFlagPrimary
    # If there is an ancillary variable, use pub/nonpub to create a mask
    if hasattr(first_var, "ancillary_variables"):
        anc_names = first_var.ancillary_variables.split(" ")
        anc_name = anc_names[0]
        # Create the variable mask using pub/nonpub choice
        if not time_dim:
            s_idx = None
        anc_mask = self.make_pub_mask(anc_name, s_idx, e_idx)
    else:
        anc_mask = None

    # Still a problem. 2-d vars.
    # Seems to work if the variable has no mask set. But
    # if mask set, returns 1-d var.
    for v_name in mask_results:
        if self.apply_mask and anc_mask is not None:
            v = mask_results[v_name]
            mask_results[v_name] = v[~anc_mask]
        result[v_name] = mask_results[v_name]

    return result

get_var(var_name: str)

Checks if a variable exists then returns a pointer to it.

Source code in cdippy/cdipnc.py
411
412
413
414
415
def get_var(self, var_name: str):
    """Checks if a variable exists then returns a pointer to it."""
    if self.nc is None or var_name not in self.nc.variables:
        return None
    return self.nc.variables[var_name]

get_var_prefix(var_name: str) -> str

Returns 'wave' part of the string 'waveHs'.

Source code in cdippy/cdipnc.py
337
338
339
340
341
342
343
344
def get_var_prefix(self, var_name: str) -> str:
    """Returns 'wave' part of the string 'waveHs'."""
    s = ""
    for c in var_name:
        if c.isupper():
            break
        s += c
    return s

make_pub_mask(anc_name: str, s_idx: int, e_idx: int) -> np.ndarray

Returns an np.ndarray of bools given pub_set and ancillary var

Source code in cdippy/cdipnc.py
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
def make_pub_mask(self, anc_name: str, s_idx: int, e_idx: int) -> np.ndarray:
    """Returns an np.ndarray of bools given pub_set and ancillary var"""

    # No s_idx, use whole array. Otherwise time subset the anc var.
    nc_primary = self.get_var(anc_name)
    if s_idx is None:
        s_idx = 0
        e_idx = len(nc_primary)
    primary_flag_values = nc_primary[s_idx:e_idx]

    if anc_name == "waveFrequencyFlagPrimary":
        return None  # Not sure about this one
    elif anc_name == "gpsStatusFlags":
        return np.ma.make_mask(primary_flag_values < 0, shrink=False)
    elif (
        anc_name == "waveFlagPrimary"
        or anc_name == "sstFlagPrimary"
        or anc_name == "acmFlagPrimary"
        or anc_name == "cat4FlagPrimary"
    ):
        public_mask = primary_flag_values != 1
    elif anc_name == "xyzFlagPrimary":
        public_mask = primary_flag_values != 2
    else:
        return None

    if self.pub_set == "public":
        return np.ma.make_mask(public_mask, shrink=False)
    elif self.pub_set == "nonpub":
        return np.ma.make_mask(~public_mask, shrink=False)
    elif self.pub_set == "all":
        return np.ma.make_mask(primary_flag_values < 0, shrink=False)

metaStationName() -> str

Returns the metaStationName.

Source code in cdippy/cdipnc.py
405
406
407
408
409
def metaStationName(self) -> str:
    """Returns the metaStationName."""
    if self.nc is None:
        return None
    return self.byte_arr_to_string(self.nc.variables["metaStationName"][:])

set_dataset_info(stn: str, org: str, dataset_name: str, deployment: int = None) -> None

Sets self.stn, org, filename, url and loads self.nc. The key to understanding all of this is that we are ultimately setting url, which can be an actual path to the nc file or a url to THREDDS DoDS service.

PARAMETERS

stn : str Can be in 3char (e.g. 028) or 5char (e.g. 028p2) format for org=cdip org: str (Organization) Values are: cdip|ww3|external dataset_name : str Values: realtime|historic|archive|realtimexy|archivexy| predeploy|moored|offsite|recovered deployment : int [optional] Supply this to access specific station deployment data. Must be >= 1.

Paths are

/EXTERNAL/WW3/ [filename=.nc][CDIP stn like 192w3] /REALTIME/ [filename=rt.nc] /REALTIME/ [filename=_xy.nc] /ARCHIVE// [filename=.nc] /PREDEPLOY// [filename=__rt.nc] /PREDEPLOY// [filename=__xy.nc]

**Active deployment directories are PREDEPLOY (p0), MOORED (p1), OFFSITE (p2) and RECOVERED (p3) pX = p0|p1|p2|p3; deployment = dXX e.g. d01

Urls are

http://thredds.cdip.ucsd/thredds/dodsC/// [org1=external|cdip,org_dir=WW3|OWI etc] http://thredds.cdip.ucsd/thredds/dodsC///

Note: Since adding dataset_name, we no longer need the 5char stn id for org=cdip datasets. The p_val will be 'p1' for every dataset except active datasets in buoy states predeploy (p0), offsite (p2) and recovered (p3).

Source code in cdippy/cdipnc.py
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
def set_dataset_info(
    self, stn: str, org: str, dataset_name: str, deployment: int = None
) -> None:
    """Sets self.stn, org, filename, url and loads self.nc. The key to understanding all of
    this is that we are ultimately setting _url_, which can be an actual path to the
    nc file or a url to THREDDS DoDS service.

    PARAMETERS
    ----------
    stn : str
       Can be in 3char (e.g. 028) or 5char (e.g. 028p2) format for org=cdip
    org: str
        (Organization) Values are: cdip|ww3|external
    dataset_name : str
        Values: realtime|historic|archive|realtimexy|archivexy|
                predeploy|moored|offsite|recovered
    deployment : int [optional]
        Supply this to access specific station deployment data.
        Must be >= 1.

    Paths are:
        <top_dir>/EXTERNAL/WW3/<filename>  [filename=<stn>_<org_dir>_<dataset_name>.nc][CDIP stn like 192w3]
        <top_dir>/REALTIME/<filename> [filename=<stn><p1>_rt.nc]
        <top_dir>/REALTIME/<filename> [filename=<stn><p1>_xy.nc]
        <top_dir>/ARCHIVE/<stn>/<filename> [filename=<stn3><p1>_<deployment>.nc]
        <top_dir>/PREDEPLOY/<stn>/<filename> [filename=<stn3><pX>_<deployment>_rt.nc]**
        <top_dir>/PREDEPLOY/<stn>/<filename> [filename=<stn3><pX>_<deployment>_xy.nc]**

        **Active deployment directories are PREDEPLOY (p0), MOORED (p1), OFFSITE (p2)  and RECOVERED (p3)
          pX = p0|p1|p2|p3; deployment = dXX e.g. d01

    Urls are:
        http://thredds.cdip.ucsd/thredds/dodsC/<org1>/<org_dir>/<filename>
           [org1=external|cdip,org_dir=WW3|OWI etc]
        http://thredds.cdip.ucsd/thredds/dodsC/<org1>/<dataset_name>/<filename>

        Note:
           Since adding dataset_name, we no longer need the 5char stn id
           for org=cdip datasets. The p_val will be 'p1' for every dataset except
           active datasets in buoy states predeploy (p0), offsite (p2) and recovered (p3).
    """
    ext = ".nc"

    # Allowing data_dir to be either url or path
    __using_path = False
    if self.data_dir:
        if self.data_dir[0:4] == "http":
            self.THREDDS_url = self.data_dir
        else:
            __using_path = True

    if org is None:
        org = "cdip"
    if org == "cdip":
        org1 = "cdip"
    else:
        org1 = "external"
    # Org_dir follows 'external' and always uppercase (isn't used when org is cdip)
    org_dir = org.upper()

    # Handle the xy datasets
    if "xy" in dataset_name:
        ftype = "xy"
        dataset_name = dataset_name[0:-2]
    else:
        ftype = "rt"

    # Historic and archive both use archive as a dataset_dir
    # Lowercase for url, uppercase for url
    if dataset_name == "historic":
        dataset_dir = "archive"
    else:
        dataset_dir = dataset_name

    # Local paths use uppercase
    if __using_path:
        org1 = org1.upper()
        dataset_dir = dataset_dir.upper()
        if org == "cdip":
            url_pre = self.data_dir
        else:
            url_pre = "/".join([self.data_dir, org1])
    else:
        url_pre = "/".join([self.THREDDS_url, self.dods, org1])

    # Set p_val to 'p1' - it will get changed appropriately below
    stn = stn[0:3] + "p1"

    # Make filename and url
    if org == "cdip":
        if type(deployment) is not str:
            deployment = "d" + str(deployment).zfill(2)
        if dataset_name in self.active_datasets.keys():
            stn = stn[0:3] + self.active_datasets[dataset_name]
            dataset_name = "_".join([deployment, ftype])
        elif dataset_name == "realtime":
            dataset_name = ftype
        elif dataset_name == "historic":
            dataset_dir = "/".join([dataset_dir, stn])
        elif dataset_name == "archive" and deployment:
            dataset_name = deployment
            dataset_dir = "/".join([dataset_dir, stn])
        self.filename = "_".join([stn, dataset_name + ext])
        self.url = "/".join([url_pre, dataset_dir, self.filename])
    else:
        if stn[3:4] == "p" and org == "ww3":  # Cdip stn id
            stn_tmp = ndbc.get_wmo_id(stn[0:3])
        else:
            stn_tmp = stn
        self.filename = "_".join([stn_tmp, org_dir, dataset_name + ext])
        self.url = "/".join([url_pre, org_dir, self.filename])

    self.stn = stn
    self.org = org
    self.nc = self.get_nc()

set_request_info(start: datetime = None, end: datetime = None, vrs: list = ['waveHs'], pub_set: str = 'public', apply_mask: bool = True) -> None

Initializes data request information for get_request.

PARAMETERS

start : str or datetime [optional] : default Jan 1, 1975 Start time of data request (UTC). If provided as a string must be in the format Y-m-d H:M:S where Y is 4 chars and all others are 2 chars. Ex. '2020-03-30 19:32:56'. end : str or datetime [optional] : default now End time of data request (UTC). If not supplied defaults to now. vrs : list [optional] : default ['waveHs'] A list of the names of variables to retrieve. They all must start with the same prefix, e.g. ['waveHs', 'waveTp', 'waveDp'] pub_set: str [optional] values = public|nonpub|all Filters data based on data quality flags. apply_mask: bool [optional] default True Removes values from the masked array that have a mask value of True. Ex. If nonpub data is requested and apply_mask is False, the returned array will contain both public and nonpublic data (although public data records will have the mask value set to True). If apply_mask is set to True, only nonpub records will be returned.

Source code in cdippy/cdipnc.py
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def set_request_info(
    self,
    start: datetime = None,
    end: datetime = None,
    vrs: list = ["waveHs"],
    pub_set: str = "public",
    apply_mask: bool = True,
) -> None:
    """Initializes data request information for get_request.

    PARAMETERS
    ----------
    start : str or datetime [optional] : default Jan 1, 1975
        Start time of data request (UTC). If provided as a string must
        be in the format Y-m-d H:M:S where Y is 4 chars and all others
        are 2 chars. Ex. '2020-03-30 19:32:56'.
    end : str or datetime [optional] : default now
        End time of data request (UTC). If not supplied defaults to now.
    vrs : list [optional] : default ['waveHs']
        A list of the names of variables to retrieve. They all must start
        with the same prefix, e.g. ['waveHs', 'waveTp', 'waveDp']
    pub_set: str [optional] values = public|nonpub|all
        Filters data based on data quality flags.
    apply_mask: bool [optional] default True
        Removes values from the masked array that have a mask value of True.
        Ex. If nonpub data is requested and apply_mask is False, the returned
        array will contain both public and nonpublic data (although public
        data records will have the mask value set to True). If apply_mask
        is set to True, only nonpub records will be returned.
    """
    if start is None:
        start = datetime(1975, 1, 1).replace(tzinfo=timezone.utc)
    if end is None:
        end = datetime.now(timezone.utc)
    self.set_timespan(start, end)
    self.pub_set = self.get_pub_set(pub_set)  # Standardize the set name
    if apply_mask is not None:
        self.apply_mask = apply_mask
    self.vrs = vrs

set_timespan(start, end)

Sets request timespan

Source code in cdippy/cdipnc.py
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
def set_timespan(self, start, end):
    """Sets request timespan"""
    if isinstance(start, str):
        self.start_dt = datetime.strptime(start, "%Y-%m-%d %H:%M:%S").replace(
            tzinfo=timezone.utc
        )
    else:
        self.start_dt = start
    if isinstance(end, str):
        self.end_dt = datetime.strptime(end, "%Y-%m-%d %H:%M:%S").replace(
            tzinfo=timezone.utc
        )
    else:
        self.end_dt = end
    self.start_stamp = cdip_utils.datetime_to_timestamp(self.start_dt)

    self.end_stamp = cdip_utils.datetime_to_timestamp(self.end_dt)

Historic

Bases: CDIPnc

Loads the historic nc file for a given station.

Source code in cdippy/cdipnc.py
931
932
933
934
935
936
937
938
class Historic(CDIPnc):
    """Loads the historic nc file for a given station."""

    def __init__(self, stn, data_dir=None, org=None):
        """For parameters see CDIPnc.set_dataset_info."""

        CDIPnc.__init__(self, data_dir)
        self.set_dataset_info(stn, org, "historic")

__init__(stn, data_dir=None, org=None)

For parameters see CDIPnc.set_dataset_info.

Source code in cdippy/cdipnc.py
934
935
936
937
938
def __init__(self, stn, data_dir=None, org=None):
    """For parameters see CDIPnc.set_dataset_info."""

    CDIPnc.__init__(self, data_dir)
    self.set_dataset_info(stn, org, "historic")

Latest

Bases: CDIPnc

Loads the latest_3day.nc and has methods for retrieving the data.

Source code in cdippy/cdipnc.py
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
class Latest(CDIPnc):
    """Loads the latest_3day.nc and has methods for retrieving the data."""

    # Do not apply the mask to get_request calls.
    apply_mask = False

    def __init__(self, data_dir: str = None):
        """PARAMETERS
        ----------
        data_dir : str [optional]
            Either a full path to a directory containing a local directory hierarchy
            of nc files. E.g. '/project/WNC' or a url to a THREDDS server.
        """

        CDIPnc.__init__(self, data_dir)
        self.labels = []  # - Holds stn labels, e.g. '100p1' for this instance
        # Set latest timespan (Latest_3day goes up to 30 minutes beyond now)
        now_plus_30min = datetime.now(timezone.utc) + timedelta(minutes=30)
        # Using the unix epoch to catch all data in latest_3day in case the file is very old
        epoch = datetime.fromtimestamp(0)
        self.set_timespan(epoch, now_plus_30min)

        # Set basic information and init self.nc
        self.filename = "latest_3day.nc"
        if self.data_dir:
            self.url = "/".join([self.data_dir, "REALTIME", self.filename])
        else:
            self.url = "/".join(
                [CDIPnc.THREDDS_url, CDIPnc.dods, "cdip/realtime/latest_3day.nc"]
            )
        self.nc = self.get_nc(self.url)

    def metaStationNames(self) -> list:
        """Get list of latest station names."""
        if self.nc is None:
            return None
        names = []
        for name_arr in self.nc.variables["metaStationName"]:
            names.append(self.byte_arr_to_string(name_arr))
        return names

    def metaSiteLabels(self) -> list:
        """Sets and returns self.labels, a list of station labels, e.g. ['100p1',...]."""
        if self.nc is None:
            return None
        for label_arr in self.nc.variables["metaSiteLabel"]:
            self.labels.append(self.byte_arr_to_string(label_arr))
        return self.labels

    def metaDeployLabels(self) -> list:
        """Returns a list of metaDeployLabels."""
        if self.nc is None:
            return None
        labels = []
        for label_arr in self.nc.variables["metaDeployLabel"]:
            labels.append(self.byte_arr_to_string(label_arr))
        return labels

    def metaDeployNumbers(self) -> list:
        """Returns a list of metaDeployNumbers."""
        if self.nc is None:
            return None
        numbers = []
        for number in self.nc.variables["metaDeployNumber"]:
            numbers.append(number)
        return numbers

    def metaWMOids(self) -> list:
        """Returns a list of WMO ids, e.g. ['46225',...]."""
        if self.nc is None:
            return None
        labels = []
        for label_arr in self.nc.variables["metaWMOid"]:
            labels.append(self.byte_arr_to_string(label_arr))
        return labels

    def metaLatitudes(self) -> list:
        """Returns a list of station latitudes, e.g. [23.4,...]."""
        if self.nc is None:
            return None
        lats = []
        for lat in self.nc.variables["metaLatitude"][:]:
            lats.append(lat)
        return lats

    def metaLongitudes(self) -> list:
        """Returns a list of station longitudes, e.g. [23.4,...]."""
        if self.nc is None:
            return None
        lons = []
        for lon in self.nc.variables["metaLongitude"][:]:
            lons.append(lon)
        return lons

    def metaWaterDepths(self) -> list:
        """Returns a list of station water depths."""
        if self.nc is None:
            return None
        depths = []
        for d in self.nc.variables["metaWaterDepth"][:]:
            depths.append(d)
        return depths

    def get_latest(
        self,
        pub_set: str = "public",
        meta_vars: list = None,
        params: list = None,
        array_format=True,
    ) -> list:
        """
        By default, array_format = True, it will return a dictionary of numpy masked
        arrays of the latest requested parameters as well as metadata information.

        If array_format = False, it returns a list of dicts. Each dict will contain
        latest station data and metadata.

        Parameter data values that are masked or non-existant are set to np.nan.
        Time values (e.g. 'waveTime') for the wave data if masked or non-existant
        are set to None.

        Both meta_vars and params if None (or not included in the argument list) will
        return default sets of meta_vars and parameters. If meta_vars and params are set
        just those will be returned.
        """

        # Use these if params (or meta_vars) is None

        default_params_by_type = {
            "wave": ["waveHs", "waveTp", "waveDp", "waveTa"],
            "sst": ["sstSeaSurfaceTemperature"],
            "acm": ["acmSpeed", "acmDirection"],
            "cat4": ["cat4AirTemperature"],
            "gps": ["gpsLongitude", "gpsLatitude"],
            "meta": [
                "metaLongitude",
                "metaLatitude",
                "metaWaterDepth",
                "metaStationName",
                "metaSiteLabel",
                "metaDeployLabel",
                "metaWMOid",
            ],
        }

        if params is None:
            params = []
            for t in default_params_by_type:
                if "meta" not in t:
                    params += default_params_by_type[t]

        # Initialize requested parameters by type

        requested_params = {}
        for typ in default_params_by_type:
            for p in params:
                if typ in p:
                    if typ not in requested_params:
                        requested_params[typ] = []
                    requested_params[typ].append(p)
        requested_types = list(set(requested_params.keys()))

        self.pub_set = self.get_pub_set(pub_set)

        # Load meta variables

        if meta_vars is None:
            meta_vars = default_params_by_type["meta"]

        meta = {}
        for p in meta_vars:
            meta[p] = getattr(self, p + "s")()

        # We always need these to remove duplicates

        site_labels = self.metaSiteLabels()
        deploy_labels = self.metaDeployLabels()

        # Loop through the data types (e.g. 'wave', 'sst', 'acm' ...)
        # and grab data for the parameters requested.

        req = {}
        for typ in requested_types:

            # Add the parameters requested into the request list
            self.vrs = requested_params[typ].copy()

            # Add the necessary time variables into the request list
            self.vrs += [typ + "Time", typ + "TimeOffset", typ + "TimeBounds"]

            # Make the data request for the included parameters and time variables.
            req[typ] = self.get_request()

            # We don't quality check the GPS
            if typ != "gps":
                pub_mask = self.make_pub_mask(typ + "FlagPrimary", None, None)
                mask = np.ma.mask_or(req[typ][typ + "TimeOffset"].mask, pub_mask)
                req[typ][typ + "TimeOffset"].mask = mask

        num_stations = self.get_var("waveTimeOffset").shape[1]

        result = {}  # Store station dictionaries
        for s in range(num_stations):
            stn = {}

            # To remove duplicates (p1 usually) use the site label as a key, e.g. 162p1
            # We will be keeping the pX with the greatest deploy label.

            site_label = site_labels[s]
            if site_label in result:
                if deploy_labels[s] < result[site_label]["deploy_label"]:
                    continue

            stn["deploy_label"] = deploy_labels[s]

            latest_timestamp = -1  # To help find a time
            latest_type = None  # for the group of
            waves_included = False  # parameters.
            has_data = False
            for typ in requested_types:
                offsets = req[typ][typ + "TimeOffset"][:, s]
                t_n = typ + "Time"
                tb_n = typ + "TimeBounds"
                # Find the highest data index (latest data) for the type
                # using the TimeOffset.
                idx = -1
                if self.__has_a_number(offsets):
                    idx = np.ma.flatnotmasked_edges(offsets)[1]
                    stn[t_n] = req[typ][t_n][idx] + offsets[idx]
                    stn[tb_n] = np.ma.array([None, None])
                    for i in [0, 1]:
                        stn[tb_n][i] = req[typ][tb_n][idx][i] + offsets[idx]
                    for pm in requested_params[typ]:
                        stn[pm] = req[typ][pm][idx, s]
                    if typ != "gps":
                        has_data = True
                else:
                    stn[t_n] = np.nan
                    stn[tb_n] = np.nan
                    for pm in requested_params[typ]:
                        stn[pm] = np.nan
                if stn[t_n] is not np.nan and typ != "gps":
                    if typ == "wave":
                        waves_included = True
                    if stn[t_n] > latest_timestamp:
                        latest_timestamp = stn[t_n]
                        latest_type = typ
            stn["hasParameterData"] = has_data
            if latest_type is not None:
                group_type = "wave" if waves_included else latest_type
                stn["groupTime"] = stn[group_type + "Time"]
                stn["groupTimeBounds"] = stn[group_type + "TimeBounds"]
                least_timestamp = max(stn["groupTime"] - 1800, 0)
                for typ in requested_types:
                    t_n = typ + "Time"
                    if stn[t_n] is not np.nan and stn[t_n] < least_timestamp:
                        stn[t_n] = np.nan
            else:
                stn["groupTime"] = np.nan
                stn["groupTimeBounds"] = np.nan
            for m in meta_vars:
                stn[m] = meta[m][s]
            if stn["hasParameterData"] or (len(params) == 0 and len(meta_vars) > 0):
                result[site_label] = stn

        # To satisfy the original array_format = False, remove the site Labels

        new_result = []
        for site_label in result:
            new_result.append(result[site_label])
        result = new_result

        # Return parameters as lists in a single dict rather than a list of dicts.

        array_result = {}
        if array_format:
            for r in result:
                for key in r:
                    if key not in array_result:
                        array_result[key] = []
                    array_result[key].append(r[key])
            result = array_result

        return result

    def __has_a_number(self, arr):
        """Test if there is at least one number in the array"""
        for x in arr:
            if isinstance(x, numbers.Number):
                return True
        return False

__has_a_number(arr)

Test if there is at least one number in the array

Source code in cdippy/cdipnc.py
880
881
882
883
884
885
def __has_a_number(self, arr):
    """Test if there is at least one number in the array"""
    for x in arr:
        if isinstance(x, numbers.Number):
            return True
    return False

__init__(data_dir: str = None)

PARAMETERS

data_dir : str [optional] Either a full path to a directory containing a local directory hierarchy of nc files. E.g. '/project/WNC' or a url to a THREDDS server.

Source code in cdippy/cdipnc.py
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
def __init__(self, data_dir: str = None):
    """PARAMETERS
    ----------
    data_dir : str [optional]
        Either a full path to a directory containing a local directory hierarchy
        of nc files. E.g. '/project/WNC' or a url to a THREDDS server.
    """

    CDIPnc.__init__(self, data_dir)
    self.labels = []  # - Holds stn labels, e.g. '100p1' for this instance
    # Set latest timespan (Latest_3day goes up to 30 minutes beyond now)
    now_plus_30min = datetime.now(timezone.utc) + timedelta(minutes=30)
    # Using the unix epoch to catch all data in latest_3day in case the file is very old
    epoch = datetime.fromtimestamp(0)
    self.set_timespan(epoch, now_plus_30min)

    # Set basic information and init self.nc
    self.filename = "latest_3day.nc"
    if self.data_dir:
        self.url = "/".join([self.data_dir, "REALTIME", self.filename])
    else:
        self.url = "/".join(
            [CDIPnc.THREDDS_url, CDIPnc.dods, "cdip/realtime/latest_3day.nc"]
        )
    self.nc = self.get_nc(self.url)

get_latest(pub_set: str = 'public', meta_vars: list = None, params: list = None, array_format=True) -> list

By default, array_format = True, it will return a dictionary of numpy masked arrays of the latest requested parameters as well as metadata information.

If array_format = False, it returns a list of dicts. Each dict will contain latest station data and metadata.

Parameter data values that are masked or non-existant are set to np.nan. Time values (e.g. 'waveTime') for the wave data if masked or non-existant are set to None.

Both meta_vars and params if None (or not included in the argument list) will return default sets of meta_vars and parameters. If meta_vars and params are set just those will be returned.

Source code in cdippy/cdipnc.py
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
def get_latest(
    self,
    pub_set: str = "public",
    meta_vars: list = None,
    params: list = None,
    array_format=True,
) -> list:
    """
    By default, array_format = True, it will return a dictionary of numpy masked
    arrays of the latest requested parameters as well as metadata information.

    If array_format = False, it returns a list of dicts. Each dict will contain
    latest station data and metadata.

    Parameter data values that are masked or non-existant are set to np.nan.
    Time values (e.g. 'waveTime') for the wave data if masked or non-existant
    are set to None.

    Both meta_vars and params if None (or not included in the argument list) will
    return default sets of meta_vars and parameters. If meta_vars and params are set
    just those will be returned.
    """

    # Use these if params (or meta_vars) is None

    default_params_by_type = {
        "wave": ["waveHs", "waveTp", "waveDp", "waveTa"],
        "sst": ["sstSeaSurfaceTemperature"],
        "acm": ["acmSpeed", "acmDirection"],
        "cat4": ["cat4AirTemperature"],
        "gps": ["gpsLongitude", "gpsLatitude"],
        "meta": [
            "metaLongitude",
            "metaLatitude",
            "metaWaterDepth",
            "metaStationName",
            "metaSiteLabel",
            "metaDeployLabel",
            "metaWMOid",
        ],
    }

    if params is None:
        params = []
        for t in default_params_by_type:
            if "meta" not in t:
                params += default_params_by_type[t]

    # Initialize requested parameters by type

    requested_params = {}
    for typ in default_params_by_type:
        for p in params:
            if typ in p:
                if typ not in requested_params:
                    requested_params[typ] = []
                requested_params[typ].append(p)
    requested_types = list(set(requested_params.keys()))

    self.pub_set = self.get_pub_set(pub_set)

    # Load meta variables

    if meta_vars is None:
        meta_vars = default_params_by_type["meta"]

    meta = {}
    for p in meta_vars:
        meta[p] = getattr(self, p + "s")()

    # We always need these to remove duplicates

    site_labels = self.metaSiteLabels()
    deploy_labels = self.metaDeployLabels()

    # Loop through the data types (e.g. 'wave', 'sst', 'acm' ...)
    # and grab data for the parameters requested.

    req = {}
    for typ in requested_types:

        # Add the parameters requested into the request list
        self.vrs = requested_params[typ].copy()

        # Add the necessary time variables into the request list
        self.vrs += [typ + "Time", typ + "TimeOffset", typ + "TimeBounds"]

        # Make the data request for the included parameters and time variables.
        req[typ] = self.get_request()

        # We don't quality check the GPS
        if typ != "gps":
            pub_mask = self.make_pub_mask(typ + "FlagPrimary", None, None)
            mask = np.ma.mask_or(req[typ][typ + "TimeOffset"].mask, pub_mask)
            req[typ][typ + "TimeOffset"].mask = mask

    num_stations = self.get_var("waveTimeOffset").shape[1]

    result = {}  # Store station dictionaries
    for s in range(num_stations):
        stn = {}

        # To remove duplicates (p1 usually) use the site label as a key, e.g. 162p1
        # We will be keeping the pX with the greatest deploy label.

        site_label = site_labels[s]
        if site_label in result:
            if deploy_labels[s] < result[site_label]["deploy_label"]:
                continue

        stn["deploy_label"] = deploy_labels[s]

        latest_timestamp = -1  # To help find a time
        latest_type = None  # for the group of
        waves_included = False  # parameters.
        has_data = False
        for typ in requested_types:
            offsets = req[typ][typ + "TimeOffset"][:, s]
            t_n = typ + "Time"
            tb_n = typ + "TimeBounds"
            # Find the highest data index (latest data) for the type
            # using the TimeOffset.
            idx = -1
            if self.__has_a_number(offsets):
                idx = np.ma.flatnotmasked_edges(offsets)[1]
                stn[t_n] = req[typ][t_n][idx] + offsets[idx]
                stn[tb_n] = np.ma.array([None, None])
                for i in [0, 1]:
                    stn[tb_n][i] = req[typ][tb_n][idx][i] + offsets[idx]
                for pm in requested_params[typ]:
                    stn[pm] = req[typ][pm][idx, s]
                if typ != "gps":
                    has_data = True
            else:
                stn[t_n] = np.nan
                stn[tb_n] = np.nan
                for pm in requested_params[typ]:
                    stn[pm] = np.nan
            if stn[t_n] is not np.nan and typ != "gps":
                if typ == "wave":
                    waves_included = True
                if stn[t_n] > latest_timestamp:
                    latest_timestamp = stn[t_n]
                    latest_type = typ
        stn["hasParameterData"] = has_data
        if latest_type is not None:
            group_type = "wave" if waves_included else latest_type
            stn["groupTime"] = stn[group_type + "Time"]
            stn["groupTimeBounds"] = stn[group_type + "TimeBounds"]
            least_timestamp = max(stn["groupTime"] - 1800, 0)
            for typ in requested_types:
                t_n = typ + "Time"
                if stn[t_n] is not np.nan and stn[t_n] < least_timestamp:
                    stn[t_n] = np.nan
        else:
            stn["groupTime"] = np.nan
            stn["groupTimeBounds"] = np.nan
        for m in meta_vars:
            stn[m] = meta[m][s]
        if stn["hasParameterData"] or (len(params) == 0 and len(meta_vars) > 0):
            result[site_label] = stn

    # To satisfy the original array_format = False, remove the site Labels

    new_result = []
    for site_label in result:
        new_result.append(result[site_label])
    result = new_result

    # Return parameters as lists in a single dict rather than a list of dicts.

    array_result = {}
    if array_format:
        for r in result:
            for key in r:
                if key not in array_result:
                    array_result[key] = []
                array_result[key].append(r[key])
        result = array_result

    return result

metaDeployLabels() -> list

Returns a list of metaDeployLabels.

Source code in cdippy/cdipnc.py
644
645
646
647
648
649
650
651
def metaDeployLabels(self) -> list:
    """Returns a list of metaDeployLabels."""
    if self.nc is None:
        return None
    labels = []
    for label_arr in self.nc.variables["metaDeployLabel"]:
        labels.append(self.byte_arr_to_string(label_arr))
    return labels

metaDeployNumbers() -> list

Returns a list of metaDeployNumbers.

Source code in cdippy/cdipnc.py
653
654
655
656
657
658
659
660
def metaDeployNumbers(self) -> list:
    """Returns a list of metaDeployNumbers."""
    if self.nc is None:
        return None
    numbers = []
    for number in self.nc.variables["metaDeployNumber"]:
        numbers.append(number)
    return numbers

metaLatitudes() -> list

Returns a list of station latitudes, e.g. [23.4,...].

Source code in cdippy/cdipnc.py
671
672
673
674
675
676
677
678
def metaLatitudes(self) -> list:
    """Returns a list of station latitudes, e.g. [23.4,...]."""
    if self.nc is None:
        return None
    lats = []
    for lat in self.nc.variables["metaLatitude"][:]:
        lats.append(lat)
    return lats

metaLongitudes() -> list

Returns a list of station longitudes, e.g. [23.4,...].

Source code in cdippy/cdipnc.py
680
681
682
683
684
685
686
687
def metaLongitudes(self) -> list:
    """Returns a list of station longitudes, e.g. [23.4,...]."""
    if self.nc is None:
        return None
    lons = []
    for lon in self.nc.variables["metaLongitude"][:]:
        lons.append(lon)
    return lons

metaSiteLabels() -> list

Sets and returns self.labels, a list of station labels, e.g. ['100p1',...].

Source code in cdippy/cdipnc.py
636
637
638
639
640
641
642
def metaSiteLabels(self) -> list:
    """Sets and returns self.labels, a list of station labels, e.g. ['100p1',...]."""
    if self.nc is None:
        return None
    for label_arr in self.nc.variables["metaSiteLabel"]:
        self.labels.append(self.byte_arr_to_string(label_arr))
    return self.labels

metaStationNames() -> list

Get list of latest station names.

Source code in cdippy/cdipnc.py
627
628
629
630
631
632
633
634
def metaStationNames(self) -> list:
    """Get list of latest station names."""
    if self.nc is None:
        return None
    names = []
    for name_arr in self.nc.variables["metaStationName"]:
        names.append(self.byte_arr_to_string(name_arr))
    return names

metaWMOids() -> list

Returns a list of WMO ids, e.g. ['46225',...].

Source code in cdippy/cdipnc.py
662
663
664
665
666
667
668
669
def metaWMOids(self) -> list:
    """Returns a list of WMO ids, e.g. ['46225',...]."""
    if self.nc is None:
        return None
    labels = []
    for label_arr in self.nc.variables["metaWMOid"]:
        labels.append(self.byte_arr_to_string(label_arr))
    return labels

metaWaterDepths() -> list

Returns a list of station water depths.

Source code in cdippy/cdipnc.py
689
690
691
692
693
694
695
696
def metaWaterDepths(self) -> list:
    """Returns a list of station water depths."""
    if self.nc is None:
        return None
    depths = []
    for d in self.nc.variables["metaWaterDepth"][:]:
        depths.append(d)
    return depths

Realtime

Bases: CDIPnc

Loads the realtime nc file for the given station.

Source code in cdippy/cdipnc.py
922
923
924
925
926
927
928
class Realtime(CDIPnc):
    """Loads the realtime nc file for the given station."""

    def __init__(self, stn: str, data_dir: str = None, org: str = None):
        """For parameters: See CDIPnc.set_dataset_info."""
        CDIPnc.__init__(self, data_dir)
        self.set_dataset_info(stn, org, "realtime")

__init__(stn: str, data_dir: str = None, org: str = None)

For parameters: See CDIPnc.set_dataset_info.

Source code in cdippy/cdipnc.py
925
926
927
928
def __init__(self, stn: str, data_dir: str = None, org: str = None):
    """For parameters: See CDIPnc.set_dataset_info."""
    CDIPnc.__init__(self, data_dir)
    self.set_dataset_info(stn, org, "realtime")

RealtimeXY

Bases: Archive

Loads the realtime xy nc file for the given station.

Source code in cdippy/cdipnc.py
1034
1035
1036
1037
1038
1039
1040
class RealtimeXY(Archive):
    """Loads the realtime xy nc file for the given station."""

    def __init__(self, stn, data_dir=None, org=None):
        """For parameters see CDIPnc.set_dataset_info."""
        CDIPnc.__init__(self, data_dir)
        self.set_dataset_info(stn, org, "realtimexy")

__init__(stn, data_dir=None, org=None)

For parameters see CDIPnc.set_dataset_info.

Source code in cdippy/cdipnc.py
1037
1038
1039
1040
def __init__(self, stn, data_dir=None, org=None):
    """For parameters see CDIPnc.set_dataset_info."""
    CDIPnc.__init__(self, data_dir)
    self.set_dataset_info(stn, org, "realtimexy")

cdippy.nchashes

NcHashes

A class that checks for changes to datasets since by reading the online list of historic netCDF file hashes.

Source code in cdippy/nchashes.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
class NcHashes:
    """
    A class that checks for changes to datasets since by reading the online list of historic netCDF file hashes.
    """

    hashes_url = "http://cdip.ucsd.edu/data_access/metadata/wavecdf_by_datemod.txt"
    new_hashes = {}

    def __init__(self, hash_file_location=""):
        self.hash_pkl = hash_file_location + "/HASH.pkl"

    def load_hash_table(self):
        lines = url_utils.read_url(self.hashes_url).strip().split("\n")
        for line in lines:
            if line[0:8] == "filename":
                continue
            fields = line.split("\t")
            if len(fields) < 7:
                continue
            self.new_hashes[fields[0]] = fields[6]

    def compare_hash_tables(self) -> list:
        """
        Compare the current in-memory list of files, loaded by `load_hash_table` to the list saved in HASH.pkl and return a list of stations that are new or have changed.

        Returns:
            changed ([str]): A list of nc files that have changed or are since HASH.pkl was last saved.
        """
        old_hashes = self._get_old_hashes()
        changed = []
        if old_hashes:
            if len(self.new_hashes) == 0:
                return []
            for key in self.new_hashes:
                if key not in old_hashes.keys() or (
                    key in old_hashes.keys() and old_hashes[key] != self.new_hashes[key]
                ):
                    changed.append(key)
        return changed

    def save_new_hashes(self):
        cdip_utils.pkl_dump(self.new_hashes, self.hash_pkl)

    def _get_old_hashes(self):
        return cdip_utils.pkl_load(self.hash_pkl)

compare_hash_tables() -> list

Compare the current in-memory list of files, loaded by load_hash_table to the list saved in HASH.pkl and return a list of stations that are new or have changed.

Returns:
  • changed( [str] ) –

    A list of nc files that have changed or are since HASH.pkl was last saved.

Source code in cdippy/nchashes.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def compare_hash_tables(self) -> list:
    """
    Compare the current in-memory list of files, loaded by `load_hash_table` to the list saved in HASH.pkl and return a list of stations that are new or have changed.

    Returns:
        changed ([str]): A list of nc files that have changed or are since HASH.pkl was last saved.
    """
    old_hashes = self._get_old_hashes()
    changed = []
    if old_hashes:
        if len(self.new_hashes) == 0:
            return []
        for key in self.new_hashes:
            if key not in old_hashes.keys() or (
                key in old_hashes.keys() and old_hashes[key] != self.new_hashes[key]
            ):
                changed.append(key)
    return changed

cdippy.ncstats

NcStats

Bases: StnData

Produces data availability statistics for a given station.

This class provides methods to
  • Return counts for the entire station record, intended for use by web applications.
  • Save availability counts (e.g., xyz counts) for individual NetCDF files. Updates to totals are calculated by re-summarizing any files that have changed and aggregating all files to produce new totals.
Source code in cdippy/ncstats.py
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
class NcStats(StnData):
    """Produces data availability statistics for a given station.

    This class provides methods to:
        * Return counts for the entire station record, intended for use by web applications.
        * Save availability counts (e.g., xyz counts) for individual NetCDF files.
          Updates to totals are calculated by re-summarizing any files that have changed
          and aggregating all files to produce new totals.
    """

    QC_flags = ["waveFlagPrimary", "sstFlagPrimary", "gpsStatusFlags"]

    def __init__(self, stn: str, data_dir: str = None):
        """Initializes an NcStats instance.

        Args:
            stn (str): Station identifier.
            data_dir (str, optional): Path to the data directory. Defaults to None.
        """
        StnData.__init__(self, stn, data_dir)

        self.date_modifieds = {}
        self.start = datetime.strptime("1975-01-01 00:00:00", "%Y-%m-%d %H:%M:%S")
        self.end = datetime.now(timezone.utc)
        self.pub_set = "all"

    def make_stats(self) -> dict:
        """Computes station-level statistics.

        Returns:
            dict: A dictionary containing:
                - "flag_counts" (dict): Flag count summaries for the station.
                - "deployments" (dict): Deployment summary statistics.
        """
        result = {}
        result["flag_counts"] = self.flag_counts()
        result["deployments"] = self.deployment_summary()
        return result

    def deployment_summary(self) -> dict:
        """Generates deployment summary statistics.

        Returns:
            dict: A dictionary containing:
                - Deployment IDs as keys, with values containing start and end coverage times.
                - "number_of_deployments" (int): The number of deployments.
        """
        self.load_nc_files()
        result = {}
        dep_cnt = 0
        for nc_name in self.nc_files:
            dep = nc_name[-6:-3]
            if dep[0:1] == "d":
                dep_cnt += 1
                result[dep] = {}
                result[dep]["time_coverage_start"] = self.nc_files[
                    nc_name
                ].get_coverage_start()
                result[dep]["time_coverage_end"] = self.nc_files[
                    nc_name
                ].get_coverage_end()
        result["number_of_deployments"] = dep_cnt
        return result

    def load_nc_files(self, types: list = ["realtime", "historic", "archive"]) -> dict:
        """Loads NetCDF files for the station.

        Args:
            types (list, optional): List of file categories to load. Defaults to
                ["realtime", "historic", "archive"].

        Returns:
            dict: Dictionary of NetCDF file objects keyed by filename.
        """
        self.nc_files = self.get_nc_files(types)

    def load_file(self, nc_filename: str):
        """Loads a specific NetCDF file into the instance.

        Args:
            nc_filename (str): Filename of the NetCDF file.

        Sets:
            self.nc: Loaded NetCDF file object.
        """
        if nc_filename in self.nc_files:
            self.nc = self.nc_files[nc_filename]
        else:
            self.nc = self.get_nc(self.filename_to_url(nc_filename))

    def load_date_modifieds(self):
        pass

    def store_date_modified(self):
        pass

    def nc_file_summaries(self) -> dict:
        self.load_nc_files()
        result = {}
        for nc_name in self.nc_files:
            result[nc_name] = self.nc_file_summary(nc_name)
        return result

    def nc_file_summary(self, nc_filename: str) -> dict:
        """Computes a summary for a given NetCDF file.

        Args:
            nc_filename (str): Name of the NetCDF file.

        Returns:
            dict: Summary statistics for the file, including:
                - "flag_counts" (dict): Flag count statistics.
        """
        if self.nc is None:
            self.load_file(nc_filename)
        result = {}
        # - Currently have just one summary
        result["flag_counts"] = self.flag_counts()
        return result

    def flag_counts(self, QC_flags: list = None) -> dict:
        """Computes counts of flag variables for the entire station record.

        Args:
            QC_flags (list, optional): List of quality-control flag variable names.
                Defaults to `self.QC_flags`.

        Returns:
            dict: A dictionary containing:
                - "totals" (dict[str, pandas.DataFrame]): Total counts per flag.
                - "by_month" (dict[str, pandas.DataFrame]): Monthly counts per flag.
        """
        result = {"totals": {}, "by_month": {}}
        if not QC_flags:
            QC_flags = self.QC_flags
        for flag_name in QC_flags:
            dim = self.meta.get_var_prefix(flag_name)
            self.data = self.get_series(self.start, self.end, [flag_name], self.pub_set)
            cat_var = self.make_categorical_flag_var(flag_name)
            result["totals"][flag_name] = self.total_count(cat_var)
            result["by_month"][flag_name] = self.by_month_count(cat_var, dim)
        return result

    def total_count(self, cat_var) -> pd.DataFrame:
        """Counts totals for a given categorical flag variable.

        Args:
            cat_var (pandas.Categorical): Categorical flag variable.

        Returns:
            pandas.DataFrame: DataFrame with counts grouped by category.
        """
        return pd.DataFrame({"cnt": cat_var}).groupby(cat_var).count()

    def by_month_count(self, cat_var, dim: str) -> pd.DataFrame:
        """Counts observations by month for a given flag variable.

        Args:
            cat_var (pandas.Categorical): Categorical flag variable.
            dim (str): Dimension name prefix for the time variable.

        Returns:
            pandas.DataFrame: DataFrame with counts grouped by month and flag value.
        """
        df = pd.DataFrame(
            {"cnt": cat_var}, index=pd.to_datetime(self.data[dim + "Time"], unit="s")
        )
        mon_map = df.index.map(lambda x: str(x.year) + str("{:02d}".format(x.month)))
        return df.groupby([mon_map, cat_var]).count().fillna(0).astype(int)

    def make_categorical_flag_var(self, flag_name: str):
        cat = pd.Categorical(
            self.data[flag_name], categories=self.meta.get_flag_values(flag_name)
        )
        return cat.rename_categories(self.meta.get_flag_meanings(flag_name))

__init__(stn: str, data_dir: str = None)

Initializes an NcStats instance.

Parameters:
  • stn (str) –

    Station identifier.

  • data_dir (str, default: None ) –

    Path to the data directory. Defaults to None.

Source code in cdippy/ncstats.py
18
19
20
21
22
23
24
25
26
27
28
29
30
def __init__(self, stn: str, data_dir: str = None):
    """Initializes an NcStats instance.

    Args:
        stn (str): Station identifier.
        data_dir (str, optional): Path to the data directory. Defaults to None.
    """
    StnData.__init__(self, stn, data_dir)

    self.date_modifieds = {}
    self.start = datetime.strptime("1975-01-01 00:00:00", "%Y-%m-%d %H:%M:%S")
    self.end = datetime.now(timezone.utc)
    self.pub_set = "all"

by_month_count(cat_var, dim: str) -> pd.DataFrame

Counts observations by month for a given flag variable.

Parameters:
  • cat_var (Categorical) –

    Categorical flag variable.

  • dim (str) –

    Dimension name prefix for the time variable.

Returns:
  • DataFrame –

    pandas.DataFrame: DataFrame with counts grouped by month and flag value.

Source code in cdippy/ncstats.py
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
def by_month_count(self, cat_var, dim: str) -> pd.DataFrame:
    """Counts observations by month for a given flag variable.

    Args:
        cat_var (pandas.Categorical): Categorical flag variable.
        dim (str): Dimension name prefix for the time variable.

    Returns:
        pandas.DataFrame: DataFrame with counts grouped by month and flag value.
    """
    df = pd.DataFrame(
        {"cnt": cat_var}, index=pd.to_datetime(self.data[dim + "Time"], unit="s")
    )
    mon_map = df.index.map(lambda x: str(x.year) + str("{:02d}".format(x.month)))
    return df.groupby([mon_map, cat_var]).count().fillna(0).astype(int)

deployment_summary() -> dict

Generates deployment summary statistics.

Returns:
  • dict( dict ) –

    A dictionary containing: - Deployment IDs as keys, with values containing start and end coverage times. - "number_of_deployments" (int): The number of deployments.

Source code in cdippy/ncstats.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def deployment_summary(self) -> dict:
    """Generates deployment summary statistics.

    Returns:
        dict: A dictionary containing:
            - Deployment IDs as keys, with values containing start and end coverage times.
            - "number_of_deployments" (int): The number of deployments.
    """
    self.load_nc_files()
    result = {}
    dep_cnt = 0
    for nc_name in self.nc_files:
        dep = nc_name[-6:-3]
        if dep[0:1] == "d":
            dep_cnt += 1
            result[dep] = {}
            result[dep]["time_coverage_start"] = self.nc_files[
                nc_name
            ].get_coverage_start()
            result[dep]["time_coverage_end"] = self.nc_files[
                nc_name
            ].get_coverage_end()
    result["number_of_deployments"] = dep_cnt
    return result

flag_counts(QC_flags: list = None) -> dict

Computes counts of flag variables for the entire station record.

Parameters:
  • QC_flags (list, default: None ) –

    List of quality-control flag variable names. Defaults to self.QC_flags.

Returns:
  • dict( dict ) –

    A dictionary containing: - "totals" (dict[str, pandas.DataFrame]): Total counts per flag. - "by_month" (dict[str, pandas.DataFrame]): Monthly counts per flag.

Source code in cdippy/ncstats.py
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def flag_counts(self, QC_flags: list = None) -> dict:
    """Computes counts of flag variables for the entire station record.

    Args:
        QC_flags (list, optional): List of quality-control flag variable names.
            Defaults to `self.QC_flags`.

    Returns:
        dict: A dictionary containing:
            - "totals" (dict[str, pandas.DataFrame]): Total counts per flag.
            - "by_month" (dict[str, pandas.DataFrame]): Monthly counts per flag.
    """
    result = {"totals": {}, "by_month": {}}
    if not QC_flags:
        QC_flags = self.QC_flags
    for flag_name in QC_flags:
        dim = self.meta.get_var_prefix(flag_name)
        self.data = self.get_series(self.start, self.end, [flag_name], self.pub_set)
        cat_var = self.make_categorical_flag_var(flag_name)
        result["totals"][flag_name] = self.total_count(cat_var)
        result["by_month"][flag_name] = self.by_month_count(cat_var, dim)
    return result

load_file(nc_filename: str)

Loads a specific NetCDF file into the instance.

Parameters:
  • nc_filename (str) –

    Filename of the NetCDF file.

Sets

self.nc: Loaded NetCDF file object.

Source code in cdippy/ncstats.py
82
83
84
85
86
87
88
89
90
91
92
93
94
def load_file(self, nc_filename: str):
    """Loads a specific NetCDF file into the instance.

    Args:
        nc_filename (str): Filename of the NetCDF file.

    Sets:
        self.nc: Loaded NetCDF file object.
    """
    if nc_filename in self.nc_files:
        self.nc = self.nc_files[nc_filename]
    else:
        self.nc = self.get_nc(self.filename_to_url(nc_filename))

load_nc_files(types: list = ['realtime', 'historic', 'archive']) -> dict

Loads NetCDF files for the station.

Parameters:
  • types (list, default: ['realtime', 'historic', 'archive'] ) –

    List of file categories to load. Defaults to ["realtime", "historic", "archive"].

Returns:
  • dict( dict ) –

    Dictionary of NetCDF file objects keyed by filename.

Source code in cdippy/ncstats.py
70
71
72
73
74
75
76
77
78
79
80
def load_nc_files(self, types: list = ["realtime", "historic", "archive"]) -> dict:
    """Loads NetCDF files for the station.

    Args:
        types (list, optional): List of file categories to load. Defaults to
            ["realtime", "historic", "archive"].

    Returns:
        dict: Dictionary of NetCDF file objects keyed by filename.
    """
    self.nc_files = self.get_nc_files(types)

make_stats() -> dict

Computes station-level statistics.

Returns:
  • dict( dict ) –

    A dictionary containing: - "flag_counts" (dict): Flag count summaries for the station. - "deployments" (dict): Deployment summary statistics.

Source code in cdippy/ncstats.py
32
33
34
35
36
37
38
39
40
41
42
43
def make_stats(self) -> dict:
    """Computes station-level statistics.

    Returns:
        dict: A dictionary containing:
            - "flag_counts" (dict): Flag count summaries for the station.
            - "deployments" (dict): Deployment summary statistics.
    """
    result = {}
    result["flag_counts"] = self.flag_counts()
    result["deployments"] = self.deployment_summary()
    return result

nc_file_summary(nc_filename: str) -> dict

Computes a summary for a given NetCDF file.

Parameters:
  • nc_filename (str) –

    Name of the NetCDF file.

Returns:
  • dict( dict ) –

    Summary statistics for the file, including: - "flag_counts" (dict): Flag count statistics.

Source code in cdippy/ncstats.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
def nc_file_summary(self, nc_filename: str) -> dict:
    """Computes a summary for a given NetCDF file.

    Args:
        nc_filename (str): Name of the NetCDF file.

    Returns:
        dict: Summary statistics for the file, including:
            - "flag_counts" (dict): Flag count statistics.
    """
    if self.nc is None:
        self.load_file(nc_filename)
    result = {}
    # - Currently have just one summary
    result["flag_counts"] = self.flag_counts()
    return result

total_count(cat_var) -> pd.DataFrame

Counts totals for a given categorical flag variable.

Parameters:
  • cat_var (Categorical) –

    Categorical flag variable.

Returns:
  • DataFrame –

    pandas.DataFrame: DataFrame with counts grouped by category.

Source code in cdippy/ncstats.py
149
150
151
152
153
154
155
156
157
158
def total_count(self, cat_var) -> pd.DataFrame:
    """Counts totals for a given categorical flag variable.

    Args:
        cat_var (pandas.Categorical): Categorical flag variable.

    Returns:
        pandas.DataFrame: DataFrame with counts grouped by category.
    """
    return pd.DataFrame({"cnt": cat_var}).groupby(cat_var).count()

cdippy.ndbc

Methods for working with NDBC

get_stn_info(wmo_id)

Work in progress, querying ndbc sos service.

Source code in cdippy/ndbc.py
19
20
21
22
23
24
25
def get_stn_info(wmo_id):
    """Work in progress, querying ndbc sos service."""
    qry = "&".join([request, service, version, outputformat, describe_stn + wmo_id])
    url = "?".join([sos_base, qry])
    root = url_utils.load_et_root(url)
    results = []
    url_utils.rfindt(root, results, "description")

get_wmo_id(stn, store=True, filepath='.')

Queries cdip wmo id table for a given station. Drops pickle file locally.

Source code in cdippy/ndbc.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def get_wmo_id(
    stn,
    store=True,
    filepath=".",
):
    """Queries cdip wmo id table for a given station. Drops pickle file locally."""
    pkl_fl = filepath + "/WMO_IDS.pkl" if store else None
    now = datetime.now(timezone.utc)
    if not pkl_fl or now.minute == 23 or not os.path.isfile(pkl_fl):
        url = "/".join([cdip_base, "wmo_ids"])
        r = url_utils.read_url(url)
        ids = {}
        for line in r.splitlines():
            ids[line[0:3]] = line[5:].strip()
        if pkl_fl:
            cdip_utils.pkl_dump(ids, pkl_fl)
    else:
        ids = cdip_utils.pkl_load(pkl_fl)
    if stn in ids:
        return ids[stn]
    return None

cdippy.plotting

make_annual_hs_boxplot(stn: str, year: int) -> Figure

Create a boxplot of annual significant wave heights for a station.

Parameters:
  • stn (str) –

    A 5-char station identifier, e.g. '100p1'.

  • year (int) –

    The year to plot.

Returns:
  • fig( Figure ) –

    A matplotlib.pyplot.Figure object for the created plot.

Source code in cdippy/plotting.py
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
def make_annual_hs_boxplot(stn: str, year: int) -> Figure:
    """
    Create a boxplot of annual significant wave heights for a station.

    Args:
        stn (str): A 5-char station identifier, e.g. '100p1'.
        year (int): The year to plot.

    Returns:
        fig (Figure): A matplotlib.pyplot.Figure object for the created plot.
    """

    return plots.annual_hs_boxplot.make_plot(stn, year)

make_compendium_plot(stns: str, start: str, end: str, params: str, x_inch: int) -> Figure

CDIP's classic compendium plot for multiple stations and parameters.

Parameters:
  • stns (str) –

    A comma-delimited list of 5-char station identifiers, e.g. '100p1,201p1'.

  • start (str) –

    Start time of data series formatted as 'yyyymm[ddHHMMss]' where 'ddHHMMss' are optional components.

  • end (str) –

    End time of data series ('yyyymm[ddHHMMss]') If 'None' is provided, defaults to the current date and time.

  • params (str) –

    A comma-delimited string of parameter names, e.g. 'waveHs,waveTp'.

Returns:
  • fig( Figure ) –

    A matplotlib.pyplot.Figure object for the created plot.

Source code in cdippy/plotting.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def make_compendium_plot(
    stns: str, start: str, end: str, params: str, x_inch: int
) -> Figure:
    """CDIP's classic compendium plot for multiple stations and parameters.

    Args:
        stns (str): A comma-delimited list of 5-char station identifiers, e.g. '100p1,201p1'.
        start (str): Start time of data series formatted as 'yyyymm[ddHHMMss]' where 'ddHHMMss' are optional components.
        end (str): End time of data series ('yyyymm[ddHHMMss]') If 'None' is provided, defaults to the current date and time.
        params (str): A comma-delimited string of parameter names, e.g. 'waveHs,waveTp'.

    Returns:
        fig (Figure): A matplotlib.pyplot.Figure object for the created plot.

    """

    return plots.compendium.make_plot(stns, start, end, params, x_inch)

make_sst_climatology_plot(stn: str, x_inch: int = None, y_inch: int = None) -> Figure

Create a plot of yearly climatology of sea surface temperature at a station for all years of available data.

Parameters:
  • stn (str) –

    A 5-char station identifier, e.g. '100p1'.

Returns:
  • fig( Figure ) –

    A matplotlib.pyplot.Figure object for the created plot.

Source code in cdippy/plotting.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def make_sst_climatology_plot(
    stn: str, x_inch: int = None, y_inch: int = None
) -> Figure:
    """
    Create a plot of yearly climatology of sea surface temperature at a station for all years of available data.

    Args:
        stn (str): A 5-char station identifier, e.g. '100p1'.

    Returns:
        fig (Figure): A matplotlib.pyplot.Figure object for the created plot.
    """

    return plots.sst_climatology.make_plot(stn, x_inch, y_inch)

cdippy.spectra

Author: Sarah Heim. (Some of which is a port of Corey Olfe's code)

This code was originally taken from the cdip_mobile site.

Spectra

Bases: object

Source code in cdippy/spectra.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
class Spectra(object):
    def __init__(self):
        """initializing Spectra. Meant for using methods to create array
            of Spectrum subClass objects

        :ivar specArr: array of Spectrum subClass objects
        """
        self.specArr = []

    def get_spectraNum(self):
        """return the number of objects (spectrum) in the specArr"""
        return len(self.specArr)

    def __str__(self):
        """Spectra is an array of Spectrum(s)"""
        return "Spectra is an array of {0} Spectrum(s)".format(self.get_spectraNum())

    def get_spectraType(self):
        """returns the type of Class of the first object in specArr,
        all should be the same i.e. Spectrum_64band"""
        if self.get_spectraNum() > 0:
            return type(self.specArr[0])
        else:
            return None

    def get_bandSize(self):
        """returns the size (number of freq/bands) of the spectrum in spectra"""
        if self.get_spectraNum() > 0:
            return len(self.specArr[0].freq)
        else:
            return 0

    def whichSpecClass(self, length):
        """
        Return the type subClass of Spectrum is appropriate according length passed
        i.e. Spectrum_64band
        :var int lenght: length/count of the number of frequencies
        """
        specObjs = Spectrum.__subclasses__()
        for sObj in specObjs:
            objNum = len(sObj().freq)
            if objNum == length:
                return sObj
        return None

    # def get_spectrumArr_from_StnData(self, stn, start, end):
    def set_spectrumArr_fromQuery(self, dataDict):
        """
        specArr is empty. Create Spectrum objects and put in specArr

        :var dataDict: dictionary (output from cdippy.stndata query)
        """
        bandNum = len(dataDict["waveEnergyDensity"][0])
        specCls = self.whichSpecClass(bandNum)
        for e, ep in enumerate(dataDict["waveTime"]):
            # create Spectrum object of appropriate type for each time
            # i.e.: spec = Spectrum_64band(stn)
            spec = specCls()
            spec.set_specAtts(dataDict, e)
            self.specArr.append(spec)

    def specArr_ToDict(self):
        """Output the specArr as a dictionary with keys like waveA1Value, waveEnergyDensity etc."""
        newDict = {}
        if len(self.specArr) == 0:
            return newDict

        names = [
            "waveTime",
            "waveEnergyDensity",
            "waveMeanDirection",
            "waveA1Value",
            "waveA2Value",
            "waveB1Value",
            "waveB2Value",
        ]
        lists = {}
        for name in names:
            lists[name] = []
        if hasattr(self.specArr[0], "check") and self.specArr[0].check is not None:
            lists["waveCheckFactor"] = []

        for s in self.specArr:
            lists["waveTime"].append(s.wTime)
            lists["waveEnergyDensity"].append(s.ener_dens)
            lists["waveMeanDirection"].append(s.dMean)
            lists["waveA1Value"].append(s.a1)
            lists["waveA2Value"].append(s.a2)
            lists["waveB1Value"].append(s.b1)
            lists["waveB2Value"].append(s.b2)
            if "waveCheckFactor" in lists:
                lists["waveCheckFactor"].append(s.check)

        newDict["waveTime"] = np.ma.array(lists["waveTime"])
        newDict["waveEnergyDensity"] = np.ma.array(lists["waveEnergyDensity"])
        newDict["waveMeanDirection"] = np.ma.array(lists["waveMeanDirection"])
        newDict["waveA1Value"] = np.ma.array(lists["waveA1Value"])
        newDict["waveA2Value"] = np.ma.array(lists["waveA2Value"])
        newDict["waveB1Value"] = np.ma.array(lists["waveB1Value"])
        newDict["waveB2Value"] = np.ma.array(lists["waveB2Value"])
        if "waveCheckFactor" in lists:
            newDict["waveCheckFactor"] = np.ma.array(lists["waveCheckFactor"])

        return newDict

    def redist_specArr(self, objName):
        """
        Will redistribute spectrum if necessary (if different type)

        :var int objName: name of the subClass to redistribute to. .i.e. ``Spectrum_9band``
        """
        if self.get_spectraType() != objName:
            for i, sp in enumerate(self.specArr):
                self.specArr[i] = sp.redistribute_sp(objName)

__init__()

initializing Spectra. Meant for using methods to create array of Spectrum subClass objects

:ivar specArr: array of Spectrum subClass objects

Source code in cdippy/spectra.py
16
17
18
19
20
21
22
def __init__(self):
    """initializing Spectra. Meant for using methods to create array
        of Spectrum subClass objects

    :ivar specArr: array of Spectrum subClass objects
    """
    self.specArr = []

__str__()

Spectra is an array of Spectrum(s)

Source code in cdippy/spectra.py
28
29
30
def __str__(self):
    """Spectra is an array of Spectrum(s)"""
    return "Spectra is an array of {0} Spectrum(s)".format(self.get_spectraNum())

get_bandSize()

returns the size (number of freq/bands) of the spectrum in spectra

Source code in cdippy/spectra.py
40
41
42
43
44
45
def get_bandSize(self):
    """returns the size (number of freq/bands) of the spectrum in spectra"""
    if self.get_spectraNum() > 0:
        return len(self.specArr[0].freq)
    else:
        return 0

get_spectraNum()

return the number of objects (spectrum) in the specArr

Source code in cdippy/spectra.py
24
25
26
def get_spectraNum(self):
    """return the number of objects (spectrum) in the specArr"""
    return len(self.specArr)

get_spectraType()

returns the type of Class of the first object in specArr, all should be the same i.e. Spectrum_64band

Source code in cdippy/spectra.py
32
33
34
35
36
37
38
def get_spectraType(self):
    """returns the type of Class of the first object in specArr,
    all should be the same i.e. Spectrum_64band"""
    if self.get_spectraNum() > 0:
        return type(self.specArr[0])
    else:
        return None

redist_specArr(objName)

Will redistribute spectrum if necessary (if different type)

:var int objName: name of the subClass to redistribute to. .i.e. Spectrum_9band

Source code in cdippy/spectra.py
120
121
122
123
124
125
126
127
128
def redist_specArr(self, objName):
    """
    Will redistribute spectrum if necessary (if different type)

    :var int objName: name of the subClass to redistribute to. .i.e. ``Spectrum_9band``
    """
    if self.get_spectraType() != objName:
        for i, sp in enumerate(self.specArr):
            self.specArr[i] = sp.redistribute_sp(objName)

set_spectrumArr_fromQuery(dataDict)

specArr is empty. Create Spectrum objects and put in specArr

:var dataDict: dictionary (output from cdippy.stndata query)

Source code in cdippy/spectra.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def set_spectrumArr_fromQuery(self, dataDict):
    """
    specArr is empty. Create Spectrum objects and put in specArr

    :var dataDict: dictionary (output from cdippy.stndata query)
    """
    bandNum = len(dataDict["waveEnergyDensity"][0])
    specCls = self.whichSpecClass(bandNum)
    for e, ep in enumerate(dataDict["waveTime"]):
        # create Spectrum object of appropriate type for each time
        # i.e.: spec = Spectrum_64band(stn)
        spec = specCls()
        spec.set_specAtts(dataDict, e)
        self.specArr.append(spec)

specArr_ToDict()

Output the specArr as a dictionary with keys like waveA1Value, waveEnergyDensity etc.

Source code in cdippy/spectra.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def specArr_ToDict(self):
    """Output the specArr as a dictionary with keys like waveA1Value, waveEnergyDensity etc."""
    newDict = {}
    if len(self.specArr) == 0:
        return newDict

    names = [
        "waveTime",
        "waveEnergyDensity",
        "waveMeanDirection",
        "waveA1Value",
        "waveA2Value",
        "waveB1Value",
        "waveB2Value",
    ]
    lists = {}
    for name in names:
        lists[name] = []
    if hasattr(self.specArr[0], "check") and self.specArr[0].check is not None:
        lists["waveCheckFactor"] = []

    for s in self.specArr:
        lists["waveTime"].append(s.wTime)
        lists["waveEnergyDensity"].append(s.ener_dens)
        lists["waveMeanDirection"].append(s.dMean)
        lists["waveA1Value"].append(s.a1)
        lists["waveA2Value"].append(s.a2)
        lists["waveB1Value"].append(s.b1)
        lists["waveB2Value"].append(s.b2)
        if "waveCheckFactor" in lists:
            lists["waveCheckFactor"].append(s.check)

    newDict["waveTime"] = np.ma.array(lists["waveTime"])
    newDict["waveEnergyDensity"] = np.ma.array(lists["waveEnergyDensity"])
    newDict["waveMeanDirection"] = np.ma.array(lists["waveMeanDirection"])
    newDict["waveA1Value"] = np.ma.array(lists["waveA1Value"])
    newDict["waveA2Value"] = np.ma.array(lists["waveA2Value"])
    newDict["waveB1Value"] = np.ma.array(lists["waveB1Value"])
    newDict["waveB2Value"] = np.ma.array(lists["waveB2Value"])
    if "waveCheckFactor" in lists:
        newDict["waveCheckFactor"] = np.ma.array(lists["waveCheckFactor"])

    return newDict

whichSpecClass(length)

Return the type subClass of Spectrum is appropriate according length passed i.e. Spectrum_64band :var int lenght: length/count of the number of frequencies

Source code in cdippy/spectra.py
47
48
49
50
51
52
53
54
55
56
57
58
def whichSpecClass(self, length):
    """
    Return the type subClass of Spectrum is appropriate according length passed
    i.e. Spectrum_64band
    :var int lenght: length/count of the number of frequencies
    """
    specObjs = Spectrum.__subclasses__()
    for sObj in specObjs:
        objNum = len(sObj().freq)
        if objNum == length:
            return sObj
    return None

Spectrum

Bases: object

Source code in cdippy/spectra.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
class Spectrum(object):
    def __init__(self):
        pass

    def __str__(self):
        return str(self.__dict__)
        # return "Station %s: \n\tstart: %s \n\tend : %s" % (self.stn, self.start.isoformat(), self.end.isoformat())

    def set_specAtts(self, query, i):
        """Set spectra attributes from cdippy.stndata query

        :var mArr query: multi-dimentional array returned from cdippy.stndata
        :var int i: index
        """
        self.wTime = query["waveTime"][i]
        self.dMean = query["waveMeanDirection"][i]
        self.ener_dens = query["waveEnergyDensity"][i]
        self.a1 = query["waveA1Value"][i]
        self.b1 = query["waveB1Value"][i]
        self.a2 = query["waveA2Value"][i]
        self.b2 = query["waveB2Value"][i]
        self.check = (
            query["waveCheckFactor"][i] if "waveCheckFactor" in query.keys() else None
        )

    def set_FreqBands(self, num, sz):
        """Makes array of frequencies
        :var int num: frequency or bandwith?
        :var int sz: size, number of bands
        """

        self.freq = np.ma.array(list(map(lambda x: x * num, range(1, sz + 1))))
        self.bandwidth = np.ma.array(([num] * sz), dtype=np.float32)
        # return list(map(lambda x: x*num, range(1, sz+1)))

    def freq_cutoffs(self):
        """returns array of tuples of all the (low,high) frequencies;
        a.k.a.bots/tops"""
        arr = []
        for i, f in enumerate(self.freq):
            b = self.bandwidth[i]
            # if i< 25: print(i, f, b)
            arr.append((f - b / 2, f + b / 2))
        return arr

    def recip(self, f):
        """returns INTEGER of reciprocal of number.
        Specifically for converting frequency (float) to period(integer)"""
        return round(1 / f)

    def peri_cutoffs(self):
        """returns array of tuples of all the (low,high) periods"""
        return list(map(lambda x: tuple(map(self.recip, x)), self.freq_cutoffs()))

    # def get_center_periods(self):
    #     return list(map(lambda x: "%.1f" % (1/x), self.freq))

    def ma_to_list(self, marray):
        """
        :var str marray: string name of attribute that contains a masked array
        """
        return list(np.ma.getdata(getattr(self, marray)))

    def get_Energy(self):
        """units:meters**2 per bandwidth.
        sum(get_energy) is Total Energy"""
        return self.ener_dens * self.bandwidth

    def get_SigWaveHt(self):
        """units: meters"""
        # return list(map(lambda x: self.calc_Hs(x), self.get_Energy()))
        return map(lambda x: self.calc_Hs(x), self.get_Energy())

    def get_Tp(self):
        # index with the most energy
        ind = np.argmax(list(self.get_SigWaveHt()))
        return 1 / (self.freq[ind])

    def get_Dp(self):
        # index with the most energy
        ind = np.argmax(list(self.get_SigWaveHt()))
        return self.dMean[ind]

    def calc_Hs(self, energy):
        """returns the square root of energy x 4"""
        return energy**0.5 * 4

    def total_Hs(self):
        """square root of Total Energy x 4"""
        # return self.calc_Hs(np.sum(self.get_Energy()))
        return self.calc_Hs(np.sum(self.get_Energy()))

    def redistribute_sp(self, specInstClass):
        """
        translation of Corey's redistribute_sp code:
        c  Subroutine that redistributes a spectrum into a new spectral layout.

        :var specInstClass: the class to redistribute to can be instance or name of Class
        """
        # c--   Initialize the new spectral dist (redist_sp)
        try:
            cls = getattr(MODULE, specInstClass)
        except Exception:
            return
        redist_sp = cls()
        reBands = len(redist_sp.freq)
        redist_sp.wTime = self.wTime
        redist_sp.dMean = np.ma.array(([-1] * reBands), dtype=np.float32)
        redist_sp.ener_dens = np.ma.zeros(reBands, dtype=np.float32)
        redist_sp.a1 = np.ma.zeros(reBands, dtype=np.float32)
        redist_sp.b1 = np.ma.zeros(reBands, dtype=np.float32)
        redist_sp.a2 = np.ma.zeros(reBands, dtype=np.float32)
        redist_sp.b2 = np.ma.zeros(reBands, dtype=np.float32)
        if hasattr(self, "check") and self.check is not None:
            self.check.mask = False
            redist_sp.check = np.ma.zeros(reBands, dtype=np.float32)

        redist_botsTops = redist_sp.freq_cutoffs()
        orig_botsTops = self.freq_cutoffs()

        # c--   Do the business - loop over the new bins, adding in each of the original
        # c--   spectral bands to the appropriate bin. Partition bands where necessary.
        for i in range(reBands):
            cos_sum = 0
            sin_sum = 0
            miss_dir = False
            rBot, rTop = redist_botsTops[i][0], redist_botsTops[i][1]
            # print('%s: (%.3f, %.3f)' % (i, rBot, rTop))
            for j, ob in enumerate(self.freq):
                # minor re-write of bot/top
                # c--   If the full band falls into the current bin, add the entire contents
                # c--   If the bottom of the band falls in the bin, add in the appropriate portion
                # c--   If the top of the band falls in the bin, add in the appropriate portion
                # c--   If the middle of the band falls in the bin, add in the appropriate portion
                oBot, oTop = orig_botsTops[j][0], orig_botsTops[j][1]
                bot = rBot if rBot >= oBot else oBot
                top = rTop if rTop <= oTop else oTop
                if bot < top:
                    # Moved band_calcs here:
                    # c  Helper for REDISTRIBUTE_SP; adds components of original spectral layout
                    # c  into the redistributed layout, weighting by energy
                    curr_energy = self.ener_dens[j] * (top - bot)
                    # [redist_sp, miss_dir, sin_sum, cos_sum] = self.band_calcs(redist_sp, curr_energy, sin_sum, cos_sum, miss_dir, i, j)
                    if curr_energy != 0:
                        redist_sp.ener_dens[i] += curr_energy
                        # print('\tredist(%.3f, %.3f) new(%.3f, %.3f), %f, %f, %f' %
                        # (oBot, oTop, bot, top, self.ener_dens[j],
                        # curr_energy, redist_sp.ener_dens[i]))
                        if self.dMean[j] == -1:
                            miss_dir = True
                        else:
                            redist_sp.a1[i] += curr_energy * self.a1[j]
                            redist_sp.b1[i] += curr_energy * self.b1[j]
                            redist_sp.a2[i] += curr_energy * self.a2[j]
                            redist_sp.b2[i] += curr_energy * self.b2[j]
                            if hasattr(self, "check") and self.check is not None:
                                redist_sp.check[i] += curr_energy * self.check[j]
                            sin_sum += curr_energy * math.sin(
                                math.radians(self.dMean[j])
                            )
                            cos_sum += curr_energy * math.cos(
                                math.radians(self.dMean[j])
                            )

            # c--   Calculate direction and calc ener_dens once bin is complete
            if redist_sp.ener_dens[i] > 0:
                redist_sp.ener_dens[i] /= redist_sp.bandwidth[i]
                if not miss_dir:
                    sin_avg = sin_sum / redist_sp.ener_dens[i]
                    cos_avg = cos_sum / redist_sp.ener_dens[i]
                    redist_sp.dMean[i] = math.degrees(math.atan2(sin_avg, cos_avg))
                    if redist_sp.dMean[i] < 0:
                        redist_sp.dMean[i] += 360
                    redist_sp.a1[i] /= redist_sp.bandwidth[i]
                    redist_sp.b1[i] /= redist_sp.bandwidth[i]
                    redist_sp.a2[i] /= redist_sp.bandwidth[i]
                    redist_sp.b2[i] /= redist_sp.bandwidth[i]
                    if hasattr(self, "check") and self.check is not None:
                        redist_sp.check[i] /= redist_sp.bandwidth[i]

            # c--   Normalize once energy redistributed
            # c  Subroutine that normalizes the coefficients in a sp_data_block. Direction
            # c  is set to -1 for any band in which the coeffs can't be normalized
            # c  by energy.
            if redist_sp.dMean[i] != -1:
                redist_sp.a1[i] /= redist_sp.ener_dens[i]
                redist_sp.b1[i] /= redist_sp.ener_dens[i]
                redist_sp.a2[i] /= redist_sp.ener_dens[i]
                redist_sp.b2[i] /= redist_sp.ener_dens[i]
                if hasattr(self, "check") and self.check is not None:
                    redist_sp.check[i] /= redist_sp.ener_dens[i]
                    if redist_sp.check[i] > 2.55:
                        redist_sp.check[i] = 2.55
                max_coeff = max(
                    redist_sp.a1[i], redist_sp.b1[i], redist_sp.a2[i], redist_sp.b2[i]
                )
                min_coeff = min(
                    redist_sp.a1[i], redist_sp.b1[i], redist_sp.a2[i], redist_sp.b2[i]
                )
                if max_coeff > 1 or min_coeff < -1:
                    redist_sp.dMean[i] = -1

        return redist_sp

calc_Hs(energy)

returns the square root of energy x 4

Source code in cdippy/spectra.py
214
215
216
def calc_Hs(self, energy):
    """returns the square root of energy x 4"""
    return energy**0.5 * 4

freq_cutoffs()

returns array of tuples of all the (low,high) frequencies; a.k.a.bots/tops

Source code in cdippy/spectra.py
166
167
168
169
170
171
172
173
174
def freq_cutoffs(self):
    """returns array of tuples of all the (low,high) frequencies;
    a.k.a.bots/tops"""
    arr = []
    for i, f in enumerate(self.freq):
        b = self.bandwidth[i]
        # if i< 25: print(i, f, b)
        arr.append((f - b / 2, f + b / 2))
    return arr

get_Energy()

units:meters**2 per bandwidth. sum(get_energy) is Total Energy

Source code in cdippy/spectra.py
194
195
196
197
def get_Energy(self):
    """units:meters**2 per bandwidth.
    sum(get_energy) is Total Energy"""
    return self.ener_dens * self.bandwidth

get_SigWaveHt()

units: meters

Source code in cdippy/spectra.py
199
200
201
202
def get_SigWaveHt(self):
    """units: meters"""
    # return list(map(lambda x: self.calc_Hs(x), self.get_Energy()))
    return map(lambda x: self.calc_Hs(x), self.get_Energy())

ma_to_list(marray)

:var str marray: string name of attribute that contains a masked array

Source code in cdippy/spectra.py
188
189
190
191
192
def ma_to_list(self, marray):
    """
    :var str marray: string name of attribute that contains a masked array
    """
    return list(np.ma.getdata(getattr(self, marray)))

peri_cutoffs()

returns array of tuples of all the (low,high) periods

Source code in cdippy/spectra.py
181
182
183
def peri_cutoffs(self):
    """returns array of tuples of all the (low,high) periods"""
    return list(map(lambda x: tuple(map(self.recip, x)), self.freq_cutoffs()))

recip(f)

returns INTEGER of reciprocal of number. Specifically for converting frequency (float) to period(integer)

Source code in cdippy/spectra.py
176
177
178
179
def recip(self, f):
    """returns INTEGER of reciprocal of number.
    Specifically for converting frequency (float) to period(integer)"""
    return round(1 / f)

redistribute_sp(specInstClass)

translation of Corey's redistribute_sp code: c Subroutine that redistributes a spectrum into a new spectral layout.

:var specInstClass: the class to redistribute to can be instance or name of Class

Source code in cdippy/spectra.py
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
def redistribute_sp(self, specInstClass):
    """
    translation of Corey's redistribute_sp code:
    c  Subroutine that redistributes a spectrum into a new spectral layout.

    :var specInstClass: the class to redistribute to can be instance or name of Class
    """
    # c--   Initialize the new spectral dist (redist_sp)
    try:
        cls = getattr(MODULE, specInstClass)
    except Exception:
        return
    redist_sp = cls()
    reBands = len(redist_sp.freq)
    redist_sp.wTime = self.wTime
    redist_sp.dMean = np.ma.array(([-1] * reBands), dtype=np.float32)
    redist_sp.ener_dens = np.ma.zeros(reBands, dtype=np.float32)
    redist_sp.a1 = np.ma.zeros(reBands, dtype=np.float32)
    redist_sp.b1 = np.ma.zeros(reBands, dtype=np.float32)
    redist_sp.a2 = np.ma.zeros(reBands, dtype=np.float32)
    redist_sp.b2 = np.ma.zeros(reBands, dtype=np.float32)
    if hasattr(self, "check") and self.check is not None:
        self.check.mask = False
        redist_sp.check = np.ma.zeros(reBands, dtype=np.float32)

    redist_botsTops = redist_sp.freq_cutoffs()
    orig_botsTops = self.freq_cutoffs()

    # c--   Do the business - loop over the new bins, adding in each of the original
    # c--   spectral bands to the appropriate bin. Partition bands where necessary.
    for i in range(reBands):
        cos_sum = 0
        sin_sum = 0
        miss_dir = False
        rBot, rTop = redist_botsTops[i][0], redist_botsTops[i][1]
        # print('%s: (%.3f, %.3f)' % (i, rBot, rTop))
        for j, ob in enumerate(self.freq):
            # minor re-write of bot/top
            # c--   If the full band falls into the current bin, add the entire contents
            # c--   If the bottom of the band falls in the bin, add in the appropriate portion
            # c--   If the top of the band falls in the bin, add in the appropriate portion
            # c--   If the middle of the band falls in the bin, add in the appropriate portion
            oBot, oTop = orig_botsTops[j][0], orig_botsTops[j][1]
            bot = rBot if rBot >= oBot else oBot
            top = rTop if rTop <= oTop else oTop
            if bot < top:
                # Moved band_calcs here:
                # c  Helper for REDISTRIBUTE_SP; adds components of original spectral layout
                # c  into the redistributed layout, weighting by energy
                curr_energy = self.ener_dens[j] * (top - bot)
                # [redist_sp, miss_dir, sin_sum, cos_sum] = self.band_calcs(redist_sp, curr_energy, sin_sum, cos_sum, miss_dir, i, j)
                if curr_energy != 0:
                    redist_sp.ener_dens[i] += curr_energy
                    # print('\tredist(%.3f, %.3f) new(%.3f, %.3f), %f, %f, %f' %
                    # (oBot, oTop, bot, top, self.ener_dens[j],
                    # curr_energy, redist_sp.ener_dens[i]))
                    if self.dMean[j] == -1:
                        miss_dir = True
                    else:
                        redist_sp.a1[i] += curr_energy * self.a1[j]
                        redist_sp.b1[i] += curr_energy * self.b1[j]
                        redist_sp.a2[i] += curr_energy * self.a2[j]
                        redist_sp.b2[i] += curr_energy * self.b2[j]
                        if hasattr(self, "check") and self.check is not None:
                            redist_sp.check[i] += curr_energy * self.check[j]
                        sin_sum += curr_energy * math.sin(
                            math.radians(self.dMean[j])
                        )
                        cos_sum += curr_energy * math.cos(
                            math.radians(self.dMean[j])
                        )

        # c--   Calculate direction and calc ener_dens once bin is complete
        if redist_sp.ener_dens[i] > 0:
            redist_sp.ener_dens[i] /= redist_sp.bandwidth[i]
            if not miss_dir:
                sin_avg = sin_sum / redist_sp.ener_dens[i]
                cos_avg = cos_sum / redist_sp.ener_dens[i]
                redist_sp.dMean[i] = math.degrees(math.atan2(sin_avg, cos_avg))
                if redist_sp.dMean[i] < 0:
                    redist_sp.dMean[i] += 360
                redist_sp.a1[i] /= redist_sp.bandwidth[i]
                redist_sp.b1[i] /= redist_sp.bandwidth[i]
                redist_sp.a2[i] /= redist_sp.bandwidth[i]
                redist_sp.b2[i] /= redist_sp.bandwidth[i]
                if hasattr(self, "check") and self.check is not None:
                    redist_sp.check[i] /= redist_sp.bandwidth[i]

        # c--   Normalize once energy redistributed
        # c  Subroutine that normalizes the coefficients in a sp_data_block. Direction
        # c  is set to -1 for any band in which the coeffs can't be normalized
        # c  by energy.
        if redist_sp.dMean[i] != -1:
            redist_sp.a1[i] /= redist_sp.ener_dens[i]
            redist_sp.b1[i] /= redist_sp.ener_dens[i]
            redist_sp.a2[i] /= redist_sp.ener_dens[i]
            redist_sp.b2[i] /= redist_sp.ener_dens[i]
            if hasattr(self, "check") and self.check is not None:
                redist_sp.check[i] /= redist_sp.ener_dens[i]
                if redist_sp.check[i] > 2.55:
                    redist_sp.check[i] = 2.55
            max_coeff = max(
                redist_sp.a1[i], redist_sp.b1[i], redist_sp.a2[i], redist_sp.b2[i]
            )
            min_coeff = min(
                redist_sp.a1[i], redist_sp.b1[i], redist_sp.a2[i], redist_sp.b2[i]
            )
            if max_coeff > 1 or min_coeff < -1:
                redist_sp.dMean[i] = -1

    return redist_sp

set_FreqBands(num, sz)

Makes array of frequencies :var int num: frequency or bandwith? :var int sz: size, number of bands

Source code in cdippy/spectra.py
156
157
158
159
160
161
162
163
def set_FreqBands(self, num, sz):
    """Makes array of frequencies
    :var int num: frequency or bandwith?
    :var int sz: size, number of bands
    """

    self.freq = np.ma.array(list(map(lambda x: x * num, range(1, sz + 1))))
    self.bandwidth = np.ma.array(([num] * sz), dtype=np.float32)

set_specAtts(query, i)

Set spectra attributes from cdippy.stndata query

:var mArr query: multi-dimentional array returned from cdippy.stndata :var int i: index

Source code in cdippy/spectra.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
def set_specAtts(self, query, i):
    """Set spectra attributes from cdippy.stndata query

    :var mArr query: multi-dimentional array returned from cdippy.stndata
    :var int i: index
    """
    self.wTime = query["waveTime"][i]
    self.dMean = query["waveMeanDirection"][i]
    self.ener_dens = query["waveEnergyDensity"][i]
    self.a1 = query["waveA1Value"][i]
    self.b1 = query["waveB1Value"][i]
    self.a2 = query["waveA2Value"][i]
    self.b2 = query["waveB2Value"][i]
    self.check = (
        query["waveCheckFactor"][i] if "waveCheckFactor" in query.keys() else None
    )

total_Hs()

square root of Total Energy x 4

Source code in cdippy/spectra.py
218
219
220
221
def total_Hs(self):
    """square root of Total Energy x 4"""
    # return self.calc_Hs(np.sum(self.get_Energy()))
    return self.calc_Hs(np.sum(self.get_Energy()))

cdippy.stndata

StnData

Bases: CDIPnc

Returns data and metadata for the specified station.

This class merges data from multiple CDIP netCDF files to produce a single dictionary with keys of the requested variables. Each key corresponds to a numpy masked array.

METHODS

get_series(start, end, vrs) Returns data for a station given start date, end date and a list of variables. get_parameters(start, end) Calls get_series with vrs set to parameter variables. get_spectra(start, end) Calls get_series with vrs set to spectrum variables. get_xyz Calls get_series with vrs set to xyz variables. get_stn_meta Returns all station meta variables. get_nc_files Returns a dictionary of all this station's netCDF files. get_target_times Returns a 2-tuple of timestamps, an interval corresponding to n records to the right or left of target_timestamp.

Source code in cdippy/stndata.py
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
class StnData(CDIPnc):
    """Returns data and metadata for the specified station.

    This class merges data from multiple CDIP netCDF files to
    produce a single dictionary with keys of the requested variables.
    Each key corresponds to a numpy masked array.

    METHODS
    -------
    get_series(start, end, vrs)
        Returns data for a station given start date, end date and a
        list of variables.
    get_parameters(start, end)
        Calls get_series with vrs set to parameter variables.
    get_spectra(start, end)
        Calls get_series with vrs set to spectrum variables.
    get_xyz
        Calls get_series with vrs set to xyz variables.
    get_stn_meta
        Returns all station meta variables.
    get_nc_files
        Returns a dictionary of all this station's netCDF files.
    get_target_times
        Returns a 2-tuple of timestamps, an interval corresponding
        to  n records to the right or left of target_timestamp.
    """

    nc_file_types = [
        "historic",
        "archive",
        "predeploy",
        "moored",
        "offsite",
        "recovered",
    ]

    # Commonly requested sets of variables
    parameter_vars = ["waveHs", "waveTp", "waveDp", "waveTa"]
    xyz_vars = ["xyzXDisplacement", "xyzYDisplacement", "xyzZDisplacement"]
    gps_vars = ["gpsLatitude", "gpsLongitude", "gpsStatusFlags"]
    spectrum_vars = [
        "waveEnergyDensity",
        "waveMeanDirection",
        "waveA1Value",
        "waveB1Value",
        "waveA2Value",
        "waveB2Value",
        "waveCheckFactor",
    ]
    meta_vars = [
        "metaStationName",
        "metaDeployLatitude",
        "metaDeployLongitude",
        "metaWaterDepth",
        "metaDeclination",
    ]
    meta_attributes = [
        "wmo_id",
        "geospatial_lat_min",
        "geospatial_lat_max",
        "geospatial_lat_units",
        "geospatial_lat_resolution",
        "geospatial_lon_min",
        "geospatial_lon_max",
        "geospatial_lon_units",
        "geospatial_lon_resolution",
        "geospatial_vertical_min",
        "geospatial_vertical_max",
        "geospatial_vertical_units",
        "geospatial_vertical_resolution",
        "time_coverage_start",
        "time_coverage_end",
        "date_created",
        "date_modified",
    ]

    pub_set = None
    vrs = None
    meta = None

    def __init__(
        self, stn: str, data_dir: str = None, org: str = None, deploy_num: int = None
    ):
        """
        PARAMETERS
        ----------
        stn : str
           Can be in 2, 3 or 5 char format e.g. 28, 028, 028p2
        data_dir : str [optional]
            Either a full path to a directory containing a local directory hierarchy
            of nc files. E.g. '/project/WNC' or a url to a THREDDS server.
        org: str
            (Organization) Values are: cdip|ww3|external
        deploy_num : int [optional]
            Supply this to access specific station deployment data.
            Must be >= 1.
        """
        self.nc = None
        self.stn = stn
        self.data_dir = data_dir
        self.org = org

        # Accept numbers for cdip stations
        if type(stn) is not str:
            stn = str(stn).zfill(3) + "p1"

        # Initialize nc file used for meta information
        self.deploy_num = deploy_num
        if deploy_num:
            # Check all active datasets in this order p3 -> p2 -> p1 -> p0
            p_lookup = dict([[v, k] for k, v in self.active_datasets.items()])
            __found_active_meta = False
            for p in reversed(sorted(p_lookup)):
                self.meta = Active(
                    self.stn, self.deploy_num, p_lookup[p], self.data_dir, self.org
                )
                if self.meta.nc:
                    __found_active_meta = True
                    break
            if not __found_active_meta:
                self.meta = Archive(self.stn, self.deploy_num, self.data_dir, self.org)
        else:
            self.historic = Historic(self.stn, self.data_dir, self.org)
            self.realtime = Realtime(self.stn, self.data_dir, self.org)
            if self.historic and self.historic.nc:
                self.meta = self.historic
            else:
                if self.realtime and self.realtime.nc:
                    self.meta = self.realtime
        if self.meta is None:
            return None

    def get_stn_meta(self) -> dict:
        """Returns a dict of station meta data."""
        result = {}
        if self.meta is None:
            return result
        self.meta.set_request_info(vrs=self.meta_vars)
        result = self.meta.get_request()
        for attr_name in self.meta_attributes:
            if hasattr(self.meta.nc, attr_name):
                result[attr_name] = getattr(self.meta.nc, attr_name)
        return result

    def get_parameters(
        self,
        start: datetime = None,
        end: datetime = None,
        pub_set: str = "public",
        apply_mask=True,
        target_records=0,
    ) -> dict:
        """Calls get_series to return wave parameters."""
        return self.get_series(
            start, end, self.parameter_vars, pub_set, apply_mask, target_records
        )

    def get_xyz(
        self, start: datetime = None, end: datetime = None, pub_set: str = "public"
    ) -> dict:
        """Calls get_series to return displacement data."""
        return self.get_series(start, end, self.xyz_vars, pub_set)

    def get_spectra(
        self,
        start: datetime = None,
        end: datetime = None,
        pub_set: str = "public",
        apply_mask: bool = True,
        target_records: int = 0,
        force_64bands: bool = False,
    ) -> dict:
        """Calls get_series to return spectral data."""
        return self.get_series(
            start,
            end,
            self.spectrum_vars,
            pub_set,
            apply_mask,
            target_records,
            force_64bands,
        )

    def get_series(
        self,
        start: datetime = None,
        end: datetime = None,
        vrs: list = None,
        pub_set: str = None,
        apply_mask: bool = None,
        target_records: int = 0,
        force_64bands: bool = False,
    ) -> dict:
        """
        Returns a dict of data between start and end dates with specified quality.

        PARAMETERS
        ----------
        start : str or datetime [optional] : default Jan 1, 1975
            Start time of data request (UTC). If provided as a string must
            be in the format Y-m-d H:M:S where Y is 4 chars and all others
            are 2 chars. Ex. '2020-03-30 19:32:56'.
        end : str or datetime [optional] : default now
            End time of data request (UTC). If not supplied defaults to now.
        vrs : list [optional] : default ['waveHs']
            A list of the names of variables to retrieve. They all must start
            with the same prefix, e.g. ['waveHs', 'waveTp', 'waveDp']
        pub_set: str [optional] values = public|nonpub|all
            Filters data based on data quality flags.
        apply_mask: bool [optional] default True
            Removes values from the masked array that have a mask value of True.
            Ex. If nonpub data is requested and apply_mask is False, the returned
            array will contain both public and nonpublic data (although public
            data records will have the mask value set to True). If apply_mask
            is set to True, only nonpub records will be returned.
        target_records: int [optional]
            If start is specified and end is None, this will specify the number
            of additional records to return closest to start.
        force_64bands: bool [optional]
            For the case in which all spectra returned are mk4 100 band format,
            force the conversion to 64bands. Mixed formats are always returned in mk3
            64 band format.
        """
        if vrs is None:
            vrs = self.parameter_vars
        prefix = self.get_var_prefix(vrs[0])

        if start is not None and end is None:  # Target time
            if isinstance(start, str):
                start = datetime.strptime(start, "%Y-%m-%d %H:%M:%S")
            ts_I = self.get_target_timespan(
                cdip_utils.datetime_to_timestamp(start), target_records, prefix + "Time"
            )
            if ts_I[0] is not None:
                start = cdip_utils.timestamp_to_datetime(ts_I[0])
                end = cdip_utils.timestamp_to_datetime(ts_I[1])
            else:
                return None
        elif start is None:  # Use default 3 days back
            start = datetime.utcnow() - timedelta(days=3)
            end = datetime.utcnow()

        if pub_set is None:
            pub_set = self.pub_set

        if apply_mask is None:
            apply_mask = self.apply_mask

        self.force_64bands = force_64bands

        self.set_request_info(start, end, vrs, pub_set, apply_mask)
        print(self.deploy_num)

        if prefix == "xyz" and self.deploy_num is None:
            return self.__merge_xyz_request()
        elif prefix == "xyz" and self.deploy_num is not None:
            return self.__merge_active_request("xyz")
        elif self.deploy_num is None:
            return self.__merge_request()
        else:
            return self.__merge_active_request("rt")

    def __aggregate_dicts(self, dict1: dict, dict2: dict) -> dict:
        """
        Returns a dict of data combined from two dictionaries. Dict1 has oldest data.
        All the other __merge methods end up using this method.

        This method also redistributes 100 band spectra to 64 band format if 1) both
        formats are present in dict1 and dict2 or 2) the force_64bands option is True.
        """
        # Union the keys to make sure we check each one
        ukeys = set(dict1.keys()) | set(dict2.keys())
        # Determine if there are any spectra vars to redistribute
        svars = set(self.spectrum_vars) & ukeys

        if len(svars) != 0:
            key = next(iter(svars))  # retrieves an element from the set
            shape1 = dict1[key].shape[1] if key in dict1.keys() else 0
            shape2 = dict2[key].shape[1] if key in dict2.keys() else 0
            shapes = [shape1, shape2]
            if 100 in shapes and (self.force_64bands or 64 in shapes):
                dicts = [dict1, dict2]
                for i, shape in enumerate(shapes):
                    if shape == 100:
                        spectra_obj = Spectra()
                        spectra_obj.set_spectrumArr_fromQuery(dicts[i])
                        spectra_obj.redist_specArr("Spectrum_64band")
                        redistributed_dict = spectra_obj.specArr_ToDict()
                        for v in self.spectrum_vars:
                            if v in dicts[i].keys():
                                dicts[i][v] = redistributed_dict[v]
        # Concatenate the variables
        result = {}
        for key in ukeys:
            if key in dict2 and key in dict1:
                result[key] = ma.concatenate([dict1[key], dict2[key]])
            elif key in dict2:
                result[key] = dict2[key]
            else:
                result[key] = dict1[key]
        return result

    def __merge_archive_helper(self, cdip_nc: CDIPnc, result):
        file_start_stamp = cdip_utils.datetime_to_timestamp(
            cdip_nc.get_coverage_start()
        )
        file_end_stamp = cdip_utils.datetime_to_timestamp(cdip_nc.get_coverage_end())
        file_timespan = cdip_utils.Timespan(file_start_stamp, file_end_stamp)
        request_timespan = cdip_utils.Timespan(self.start_stamp, self.end_stamp)
        if request_timespan.overlap(file_timespan):
            cdip_nc.start_stamp = self.start_stamp
            cdip_nc.end_stamp = self.end_stamp
            cdip_nc.pub_set = self.pub_set
            cdip_nc.apply_mask = self.apply_mask
            cdip_nc.vrs = self.vrs
            tmp_result = cdip_nc.get_request()

            result = self.__aggregate_dicts(result, tmp_result)
        return result, file_start_stamp

    def __merge_xyz_helper(
        self, cdip_nc: CDIPnc, request_timespan: cdip_utils.Timespan, result: dict
    ):
        # Try the next file if it is without xyz data
        z = cdip_nc.get_var("xyzZDisplacement")
        if z is None:
            return result, self.start_stamp
        # Try the next file if start_stamp cannot be calculated
        start_stamp = cdip_nc.get_xyz_timestamp(0)
        end_stamp = cdip_nc.get_xyz_timestamp(len(z) - 1)
        if start_stamp is None:
            return result, self.start_stamp
        file_timespan = cdip_utils.Timespan(start_stamp, end_stamp)
        # Add data if request timespan overlaps data timespan
        if request_timespan.overlap(file_timespan):
            cdip_nc.start_stamp = self.start_stamp
            cdip_nc.end_stamp = self.end_stamp
            cdip_nc.pub_set = self.pub_set
            cdip_nc.apply_mask = self.apply_mask
            cdip_nc.vrs = self.vrs
            tmp_result = cdip_nc.get_request()
            result = self.__aggregate_dicts(result, tmp_result)
        return result, start_stamp

    def remove_duplicates(self, data_dict: dict) -> dict:
        """Duplicate records may exist after merge_ routines. This removes them."""
        result = {}
        keys = list(data_dict.keys())
        if len(keys) > 0:
            key = keys[0]
            prefix = self.get_var_prefix(key)
            time_dimension_name = prefix + "Time"
            time_values, indices_of_unique_values = np.unique(
                data_dict[time_dimension_name], return_index=True
            )
            result[time_dimension_name] = time_values
            for key in keys:
                if key != time_dimension_name:
                    result[key] = data_dict[key][indices_of_unique_values]
            return result
        else:
            return data_dict

    def __merge_xyz_request(self):
        """Merge xyz data from realtime and archive nc files."""
        if self.vrs and self.vrs[0] == "xyzData":
            self.vrs = ["xyzXDisplacement", "xyzYDisplacement", "xyzZDisplacement"]
        request_timespan = cdip_utils.Timespan(self.start_stamp, self.end_stamp)
        arch_file_used = False
        rt_file_used = False
        result = {}

        # First get realtime data if it exists
        rt = RealtimeXY(self.stn)
        if rt.nc is not None:
            rt_file_used = True
            result, start_stamp = self.__merge_xyz_helper(rt, request_timespan, result)

        # If the request start time is more recent than the realtime
        # start time, no need to look in the archives
        if self.start_stamp > start_stamp:
            return result

        # Second, look in archive files for data
        for dep in range(1, self.max_deployments):
            ar = Archive(self.stn, dep, self.data_dir, self.org)
            if ar.nc is None:
                break
            arch_file_used = True
            result, start_stamp = self.__merge_xyz_helper(ar, request_timespan, result)
            # Break if file start stamp is greater than request end stamp
            if start_stamp > self.end_stamp:
                break

        if rt_file_used and arch_file_used:
            result = self.remove_duplicates(result)
        return result

    def __merge_active_request(self, nc_class_type: str = "rt"):
        """
        Returns data for a given request across active datasets.

        When deploy_num is supplied all files (active and archive)
        are checked for data.
        """
        sorted_datasets = sorted(
            self.meta.active_datasets.items(), key=operator.itemgetter(1)
        )

        result = {}
        num_files_used = 0
        for ds in sorted_datasets:
            if nc_class_type == "xyz":
                a = ActiveXY(self.stn, self.deploy_num, ds[0], self.data_dir, self.org)
            else:
                a = Active(self.stn, self.deploy_num, ds[0], self.data_dir, self.org)

            if ds[0] == "moored" and a.nc is None:
                a = Archive(
                    self.stn[0:3] + "p1", self.deploy_num, self.data_dir, self.org
                )

            if a.nc is not None:
                a.vrs = self.vrs
                a.start_stamp = self.start_stamp
                a.end_stamp = self.end_stamp
                a.pub_set = self.pub_set
                a.apply_mask = self.apply_mask
                tmp_result = a.get_request()
                result = self.__aggregate_dicts(result, tmp_result)
                num_files_used += 1

        if num_files_used > 1:
            result = self.remove_duplicates(result)
        return result

    def __merge_request(self):
        """Returns data for given request across realtime and historic files"""

        num_files_used = 0
        rt = {}
        r = self.realtime
        # Note that we are assuming that waveTime will work for every time dim.
        if r.nc is not None and r.get_var("waveTime")[0] <= self.end_stamp:
            num_files_used += 1
            r.vrs = self.vrs
            r.start_stamp = self.start_stamp
            r.end_stamp = self.end_stamp
            r.pub_set = self.pub_set
            r.apply_mask = self.apply_mask
            rt = r.get_request()

        ht = {}
        h = self.historic
        # Historic file contains public data
        if (
            h.nc is not None
            and h.get_var("waveTime")[-1] >= self.start_stamp
            and self.pub_set == "public"
        ):
            num_files_used += 1
            h.vrs = self.vrs
            h.start_stamp = self.start_stamp
            h.end_stamp = self.end_stamp
            h.pub_set = self.pub_set
            h.apply_mask = self.apply_mask
            ht = h.get_request()

        result = self.__aggregate_dicts(ht, rt)

        # Check Archive files if requesting non-pub data
        if self.pub_set != "public":
            for dep in range(1, self.max_deployments):
                ar = Archive(self.stn, dep, self.data_dir, self.org)
                if ar.nc is None:
                    break
                num_files_used += 1
                result, start_stamp = self.__merge_archive_helper(ar, result)
                # Break if file start stamp is greater than request end stamp
                if start_stamp > self.end_stamp:
                    break

        if num_files_used > 1:
            result = self.remove_duplicates(result)
        return result

    def get_nc_files(self, types: list = nc_file_types) -> dict:
        """Returns dict of netCDF4 objects of a station's netcdf files"""
        result = {}
        for ftype in types:
            if ftype == "historic":
                ht = Historic(self.stn, self.data_dir, self.org)
                if ht.nc:
                    result[ht.filename] = ht.nc
            if ftype == "archive":
                for dep in range(1, self.max_deployments):
                    ar = Archive(self.stn, dep, self.data_dir, self.org)
                    if ar.nc is None:
                        break
                    result[ar.filename] = ar
            if ftype in self.meta.active_datasets:
                for dep in range(1, self.max_deployments):
                    ac = Active(self.stn, dep, ftype, self.data_dir, self.org)
                    if ac.nc is not None:
                        result[ac.filename] = ac
                    ac = ActiveXY(self.stn, dep, ftype, self.data_dir, self.org)
                    if ac.nc is not None:
                        result[ac.filename] = ac
        return result

    def get_target_timespan(
        self, target_timestamp: int, num_target_records: int, time_var: str
    ) -> tuple:
        """Returns a timespan containing the n closest records to the target_timestamp.

        PARAMETERS
        ----------
        target_timestamp : int
            A unix timestamp which is the target time about which the closest
            n records will be returned.
        n : int
            The number of records to return that are closest to the target
            timestamp.
        time_var : str
            The name of the time dimension variable to use. E.g. waveTime.

        RETURNS
        -------
        A 2-tuple of timestamps corresponding to i and i+n (where n may
        be negative) which will be the timestamps for the n records
        closest to the target_timestamp.
        """
        r_ok = False
        if self.realtime.nc is not None:
            r_ok = True
        h_ok = False
        if self.historic.nc is not None:
            h_ok = True

        # Check realtime to find closest index

        r_closest_idx = None
        if r_ok:
            r_stamps = self.realtime.get_var(time_var)[:]
            r_last_idx = len(r_stamps) - 1
            i_b = bisect_left(r_stamps, target_timestamp)
            # i_b will be possibly one more than the last index
            i_b = min(i_b, r_last_idx)
            # Target timestamp is exactly equal to a data time
            if i_b == r_last_idx or r_stamps[i_b] == target_timestamp:
                r_closest_idx = i_b
            elif i_b > 0:
                r_closest_idx = cdip_utils.get_closest_index(
                    i_b - 1, i_b, r_stamps, target_timestamp
                )

        # If closest index not found, check historic

        h_closest_idx = None
        h_last_idx = None  # Let's us know if h_stamps has been loaded
        if h_ok and not r_closest_idx:
            h_stamps = self.historic.get_var(time_var)[:]
            h_last_idx = len(h_stamps) - 1
            i_b = bisect_left(h_stamps, target_timestamp)
            i_b = min(i_b, h_last_idx)
            # Target timestamp is exactly equal to a data time
            if (i_b <= h_last_idx and h_stamps[i_b] == target_timestamp) or i_b == 0:
                h_closest_idx = i_b
            elif i_b >= h_last_idx:  # Target is between the two files
                if r_ok:
                    if abs(h_stamps[h_last_idx] - target_timestamp) < abs(
                        r_stamps[0] - target_timestamp
                    ):
                        h_closest_idx = i_b
                    else:
                        r_closest_idx = 0
                else:  # No realtime file
                    h_closest_idx = i_b
            else:  # Within middle of historic stamps
                h_closest_idx = cdip_utils.get_closest_index(
                    i_b - 1, i_b, h_stamps, target_timestamp
                )

        # Now we have the closest index, find the intervals

        if r_closest_idx is not None:
            r_interval = cdip_utils.get_interval(
                r_stamps, r_closest_idx, num_target_records
            )
            # If bound exceeded toward H and H exists, cacluate h_interval
            if r_interval[2] < 0 and h_ok:
                if not h_last_idx:
                    h_stamps = self.historic.get_var(time_var)[:]
                    h_last_idx = len(h_stamps) - 1
                h_interval = cdip_utils.get_interval(
                    h_stamps, h_last_idx, num_target_records + r_closest_idx + 1
                )
                return cdip_utils.combine_intervals(h_interval, r_interval)
            else:
                return r_interval
        elif h_closest_idx is not None:
            h_interval = cdip_utils.get_interval(
                h_stamps, h_closest_idx, num_target_records
            )
            # If bound exceeded toward R and R exists, cacluate r_interval
            if h_interval[2] > 0 and r_ok:
                r_interval = cdip_utils.get_interval(
                    r_stamps, 0, num_target_records + h_closest_idx - h_last_idx - 1
                )
                return cdip_utils.combine_intervals(h_interval, r_interval)
            else:
                return h_interval

        # If we get to here there's a problem
        return (None, None, None)

__aggregate_dicts(dict1: dict, dict2: dict) -> dict

Returns a dict of data combined from two dictionaries. Dict1 has oldest data. All the other __merge methods end up using this method.

This method also redistributes 100 band spectra to 64 band format if 1) both formats are present in dict1 and dict2 or 2) the force_64bands option is True.

Source code in cdippy/stndata.py
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
def __aggregate_dicts(self, dict1: dict, dict2: dict) -> dict:
    """
    Returns a dict of data combined from two dictionaries. Dict1 has oldest data.
    All the other __merge methods end up using this method.

    This method also redistributes 100 band spectra to 64 band format if 1) both
    formats are present in dict1 and dict2 or 2) the force_64bands option is True.
    """
    # Union the keys to make sure we check each one
    ukeys = set(dict1.keys()) | set(dict2.keys())
    # Determine if there are any spectra vars to redistribute
    svars = set(self.spectrum_vars) & ukeys

    if len(svars) != 0:
        key = next(iter(svars))  # retrieves an element from the set
        shape1 = dict1[key].shape[1] if key in dict1.keys() else 0
        shape2 = dict2[key].shape[1] if key in dict2.keys() else 0
        shapes = [shape1, shape2]
        if 100 in shapes and (self.force_64bands or 64 in shapes):
            dicts = [dict1, dict2]
            for i, shape in enumerate(shapes):
                if shape == 100:
                    spectra_obj = Spectra()
                    spectra_obj.set_spectrumArr_fromQuery(dicts[i])
                    spectra_obj.redist_specArr("Spectrum_64band")
                    redistributed_dict = spectra_obj.specArr_ToDict()
                    for v in self.spectrum_vars:
                        if v in dicts[i].keys():
                            dicts[i][v] = redistributed_dict[v]
    # Concatenate the variables
    result = {}
    for key in ukeys:
        if key in dict2 and key in dict1:
            result[key] = ma.concatenate([dict1[key], dict2[key]])
        elif key in dict2:
            result[key] = dict2[key]
        else:
            result[key] = dict1[key]
    return result

__init__(stn: str, data_dir: str = None, org: str = None, deploy_num: int = None)

PARAMETERS

stn : str Can be in 2, 3 or 5 char format e.g. 28, 028, 028p2 data_dir : str [optional] Either a full path to a directory containing a local directory hierarchy of nc files. E.g. '/project/WNC' or a url to a THREDDS server. org: str (Organization) Values are: cdip|ww3|external deploy_num : int [optional] Supply this to access specific station deployment data. Must be >= 1.

Source code in cdippy/stndata.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
def __init__(
    self, stn: str, data_dir: str = None, org: str = None, deploy_num: int = None
):
    """
    PARAMETERS
    ----------
    stn : str
       Can be in 2, 3 or 5 char format e.g. 28, 028, 028p2
    data_dir : str [optional]
        Either a full path to a directory containing a local directory hierarchy
        of nc files. E.g. '/project/WNC' or a url to a THREDDS server.
    org: str
        (Organization) Values are: cdip|ww3|external
    deploy_num : int [optional]
        Supply this to access specific station deployment data.
        Must be >= 1.
    """
    self.nc = None
    self.stn = stn
    self.data_dir = data_dir
    self.org = org

    # Accept numbers for cdip stations
    if type(stn) is not str:
        stn = str(stn).zfill(3) + "p1"

    # Initialize nc file used for meta information
    self.deploy_num = deploy_num
    if deploy_num:
        # Check all active datasets in this order p3 -> p2 -> p1 -> p0
        p_lookup = dict([[v, k] for k, v in self.active_datasets.items()])
        __found_active_meta = False
        for p in reversed(sorted(p_lookup)):
            self.meta = Active(
                self.stn, self.deploy_num, p_lookup[p], self.data_dir, self.org
            )
            if self.meta.nc:
                __found_active_meta = True
                break
        if not __found_active_meta:
            self.meta = Archive(self.stn, self.deploy_num, self.data_dir, self.org)
    else:
        self.historic = Historic(self.stn, self.data_dir, self.org)
        self.realtime = Realtime(self.stn, self.data_dir, self.org)
        if self.historic and self.historic.nc:
            self.meta = self.historic
        else:
            if self.realtime and self.realtime.nc:
                self.meta = self.realtime
    if self.meta is None:
        return None

__merge_active_request(nc_class_type: str = 'rt')

Returns data for a given request across active datasets.

When deploy_num is supplied all files (active and archive) are checked for data.

Source code in cdippy/stndata.py
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
def __merge_active_request(self, nc_class_type: str = "rt"):
    """
    Returns data for a given request across active datasets.

    When deploy_num is supplied all files (active and archive)
    are checked for data.
    """
    sorted_datasets = sorted(
        self.meta.active_datasets.items(), key=operator.itemgetter(1)
    )

    result = {}
    num_files_used = 0
    for ds in sorted_datasets:
        if nc_class_type == "xyz":
            a = ActiveXY(self.stn, self.deploy_num, ds[0], self.data_dir, self.org)
        else:
            a = Active(self.stn, self.deploy_num, ds[0], self.data_dir, self.org)

        if ds[0] == "moored" and a.nc is None:
            a = Archive(
                self.stn[0:3] + "p1", self.deploy_num, self.data_dir, self.org
            )

        if a.nc is not None:
            a.vrs = self.vrs
            a.start_stamp = self.start_stamp
            a.end_stamp = self.end_stamp
            a.pub_set = self.pub_set
            a.apply_mask = self.apply_mask
            tmp_result = a.get_request()
            result = self.__aggregate_dicts(result, tmp_result)
            num_files_used += 1

    if num_files_used > 1:
        result = self.remove_duplicates(result)
    return result

__merge_request()

Returns data for given request across realtime and historic files

Source code in cdippy/stndata.py
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
def __merge_request(self):
    """Returns data for given request across realtime and historic files"""

    num_files_used = 0
    rt = {}
    r = self.realtime
    # Note that we are assuming that waveTime will work for every time dim.
    if r.nc is not None and r.get_var("waveTime")[0] <= self.end_stamp:
        num_files_used += 1
        r.vrs = self.vrs
        r.start_stamp = self.start_stamp
        r.end_stamp = self.end_stamp
        r.pub_set = self.pub_set
        r.apply_mask = self.apply_mask
        rt = r.get_request()

    ht = {}
    h = self.historic
    # Historic file contains public data
    if (
        h.nc is not None
        and h.get_var("waveTime")[-1] >= self.start_stamp
        and self.pub_set == "public"
    ):
        num_files_used += 1
        h.vrs = self.vrs
        h.start_stamp = self.start_stamp
        h.end_stamp = self.end_stamp
        h.pub_set = self.pub_set
        h.apply_mask = self.apply_mask
        ht = h.get_request()

    result = self.__aggregate_dicts(ht, rt)

    # Check Archive files if requesting non-pub data
    if self.pub_set != "public":
        for dep in range(1, self.max_deployments):
            ar = Archive(self.stn, dep, self.data_dir, self.org)
            if ar.nc is None:
                break
            num_files_used += 1
            result, start_stamp = self.__merge_archive_helper(ar, result)
            # Break if file start stamp is greater than request end stamp
            if start_stamp > self.end_stamp:
                break

    if num_files_used > 1:
        result = self.remove_duplicates(result)
    return result

__merge_xyz_request()

Merge xyz data from realtime and archive nc files.

Source code in cdippy/stndata.py
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
def __merge_xyz_request(self):
    """Merge xyz data from realtime and archive nc files."""
    if self.vrs and self.vrs[0] == "xyzData":
        self.vrs = ["xyzXDisplacement", "xyzYDisplacement", "xyzZDisplacement"]
    request_timespan = cdip_utils.Timespan(self.start_stamp, self.end_stamp)
    arch_file_used = False
    rt_file_used = False
    result = {}

    # First get realtime data if it exists
    rt = RealtimeXY(self.stn)
    if rt.nc is not None:
        rt_file_used = True
        result, start_stamp = self.__merge_xyz_helper(rt, request_timespan, result)

    # If the request start time is more recent than the realtime
    # start time, no need to look in the archives
    if self.start_stamp > start_stamp:
        return result

    # Second, look in archive files for data
    for dep in range(1, self.max_deployments):
        ar = Archive(self.stn, dep, self.data_dir, self.org)
        if ar.nc is None:
            break
        arch_file_used = True
        result, start_stamp = self.__merge_xyz_helper(ar, request_timespan, result)
        # Break if file start stamp is greater than request end stamp
        if start_stamp > self.end_stamp:
            break

    if rt_file_used and arch_file_used:
        result = self.remove_duplicates(result)
    return result

get_nc_files(types: list = nc_file_types) -> dict

Returns dict of netCDF4 objects of a station's netcdf files

Source code in cdippy/stndata.py
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
def get_nc_files(self, types: list = nc_file_types) -> dict:
    """Returns dict of netCDF4 objects of a station's netcdf files"""
    result = {}
    for ftype in types:
        if ftype == "historic":
            ht = Historic(self.stn, self.data_dir, self.org)
            if ht.nc:
                result[ht.filename] = ht.nc
        if ftype == "archive":
            for dep in range(1, self.max_deployments):
                ar = Archive(self.stn, dep, self.data_dir, self.org)
                if ar.nc is None:
                    break
                result[ar.filename] = ar
        if ftype in self.meta.active_datasets:
            for dep in range(1, self.max_deployments):
                ac = Active(self.stn, dep, ftype, self.data_dir, self.org)
                if ac.nc is not None:
                    result[ac.filename] = ac
                ac = ActiveXY(self.stn, dep, ftype, self.data_dir, self.org)
                if ac.nc is not None:
                    result[ac.filename] = ac
    return result

get_parameters(start: datetime = None, end: datetime = None, pub_set: str = 'public', apply_mask=True, target_records=0) -> dict

Calls get_series to return wave parameters.

Source code in cdippy/stndata.py
165
166
167
168
169
170
171
172
173
174
175
176
def get_parameters(
    self,
    start: datetime = None,
    end: datetime = None,
    pub_set: str = "public",
    apply_mask=True,
    target_records=0,
) -> dict:
    """Calls get_series to return wave parameters."""
    return self.get_series(
        start, end, self.parameter_vars, pub_set, apply_mask, target_records
    )

get_series(start: datetime = None, end: datetime = None, vrs: list = None, pub_set: str = None, apply_mask: bool = None, target_records: int = 0, force_64bands: bool = False) -> dict

Returns a dict of data between start and end dates with specified quality.

PARAMETERS

start : str or datetime [optional] : default Jan 1, 1975 Start time of data request (UTC). If provided as a string must be in the format Y-m-d H:M:S where Y is 4 chars and all others are 2 chars. Ex. '2020-03-30 19:32:56'. end : str or datetime [optional] : default now End time of data request (UTC). If not supplied defaults to now. vrs : list [optional] : default ['waveHs'] A list of the names of variables to retrieve. They all must start with the same prefix, e.g. ['waveHs', 'waveTp', 'waveDp'] pub_set: str [optional] values = public|nonpub|all Filters data based on data quality flags. apply_mask: bool [optional] default True Removes values from the masked array that have a mask value of True. Ex. If nonpub data is requested and apply_mask is False, the returned array will contain both public and nonpublic data (although public data records will have the mask value set to True). If apply_mask is set to True, only nonpub records will be returned. target_records: int [optional] If start is specified and end is None, this will specify the number of additional records to return closest to start. force_64bands: bool [optional] For the case in which all spectra returned are mk4 100 band format, force the conversion to 64bands. Mixed formats are always returned in mk3 64 band format.

Source code in cdippy/stndata.py
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
def get_series(
    self,
    start: datetime = None,
    end: datetime = None,
    vrs: list = None,
    pub_set: str = None,
    apply_mask: bool = None,
    target_records: int = 0,
    force_64bands: bool = False,
) -> dict:
    """
    Returns a dict of data between start and end dates with specified quality.

    PARAMETERS
    ----------
    start : str or datetime [optional] : default Jan 1, 1975
        Start time of data request (UTC). If provided as a string must
        be in the format Y-m-d H:M:S where Y is 4 chars and all others
        are 2 chars. Ex. '2020-03-30 19:32:56'.
    end : str or datetime [optional] : default now
        End time of data request (UTC). If not supplied defaults to now.
    vrs : list [optional] : default ['waveHs']
        A list of the names of variables to retrieve. They all must start
        with the same prefix, e.g. ['waveHs', 'waveTp', 'waveDp']
    pub_set: str [optional] values = public|nonpub|all
        Filters data based on data quality flags.
    apply_mask: bool [optional] default True
        Removes values from the masked array that have a mask value of True.
        Ex. If nonpub data is requested and apply_mask is False, the returned
        array will contain both public and nonpublic data (although public
        data records will have the mask value set to True). If apply_mask
        is set to True, only nonpub records will be returned.
    target_records: int [optional]
        If start is specified and end is None, this will specify the number
        of additional records to return closest to start.
    force_64bands: bool [optional]
        For the case in which all spectra returned are mk4 100 band format,
        force the conversion to 64bands. Mixed formats are always returned in mk3
        64 band format.
    """
    if vrs is None:
        vrs = self.parameter_vars
    prefix = self.get_var_prefix(vrs[0])

    if start is not None and end is None:  # Target time
        if isinstance(start, str):
            start = datetime.strptime(start, "%Y-%m-%d %H:%M:%S")
        ts_I = self.get_target_timespan(
            cdip_utils.datetime_to_timestamp(start), target_records, prefix + "Time"
        )
        if ts_I[0] is not None:
            start = cdip_utils.timestamp_to_datetime(ts_I[0])
            end = cdip_utils.timestamp_to_datetime(ts_I[1])
        else:
            return None
    elif start is None:  # Use default 3 days back
        start = datetime.utcnow() - timedelta(days=3)
        end = datetime.utcnow()

    if pub_set is None:
        pub_set = self.pub_set

    if apply_mask is None:
        apply_mask = self.apply_mask

    self.force_64bands = force_64bands

    self.set_request_info(start, end, vrs, pub_set, apply_mask)
    print(self.deploy_num)

    if prefix == "xyz" and self.deploy_num is None:
        return self.__merge_xyz_request()
    elif prefix == "xyz" and self.deploy_num is not None:
        return self.__merge_active_request("xyz")
    elif self.deploy_num is None:
        return self.__merge_request()
    else:
        return self.__merge_active_request("rt")

get_spectra(start: datetime = None, end: datetime = None, pub_set: str = 'public', apply_mask: bool = True, target_records: int = 0, force_64bands: bool = False) -> dict

Calls get_series to return spectral data.

Source code in cdippy/stndata.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
def get_spectra(
    self,
    start: datetime = None,
    end: datetime = None,
    pub_set: str = "public",
    apply_mask: bool = True,
    target_records: int = 0,
    force_64bands: bool = False,
) -> dict:
    """Calls get_series to return spectral data."""
    return self.get_series(
        start,
        end,
        self.spectrum_vars,
        pub_set,
        apply_mask,
        target_records,
        force_64bands,
    )

get_stn_meta() -> dict

Returns a dict of station meta data.

Source code in cdippy/stndata.py
153
154
155
156
157
158
159
160
161
162
163
def get_stn_meta(self) -> dict:
    """Returns a dict of station meta data."""
    result = {}
    if self.meta is None:
        return result
    self.meta.set_request_info(vrs=self.meta_vars)
    result = self.meta.get_request()
    for attr_name in self.meta_attributes:
        if hasattr(self.meta.nc, attr_name):
            result[attr_name] = getattr(self.meta.nc, attr_name)
    return result

get_target_timespan(target_timestamp: int, num_target_records: int, time_var: str) -> tuple

Returns a timespan containing the n closest records to the target_timestamp.

PARAMETERS

target_timestamp : int A unix timestamp which is the target time about which the closest n records will be returned. n : int The number of records to return that are closest to the target timestamp. time_var : str The name of the time dimension variable to use. E.g. waveTime.

RETURNS

A 2-tuple of timestamps corresponding to i and i+n (where n may be negative) which will be the timestamps for the n records closest to the target_timestamp.

Source code in cdippy/stndata.py
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
def get_target_timespan(
    self, target_timestamp: int, num_target_records: int, time_var: str
) -> tuple:
    """Returns a timespan containing the n closest records to the target_timestamp.

    PARAMETERS
    ----------
    target_timestamp : int
        A unix timestamp which is the target time about which the closest
        n records will be returned.
    n : int
        The number of records to return that are closest to the target
        timestamp.
    time_var : str
        The name of the time dimension variable to use. E.g. waveTime.

    RETURNS
    -------
    A 2-tuple of timestamps corresponding to i and i+n (where n may
    be negative) which will be the timestamps for the n records
    closest to the target_timestamp.
    """
    r_ok = False
    if self.realtime.nc is not None:
        r_ok = True
    h_ok = False
    if self.historic.nc is not None:
        h_ok = True

    # Check realtime to find closest index

    r_closest_idx = None
    if r_ok:
        r_stamps = self.realtime.get_var(time_var)[:]
        r_last_idx = len(r_stamps) - 1
        i_b = bisect_left(r_stamps, target_timestamp)
        # i_b will be possibly one more than the last index
        i_b = min(i_b, r_last_idx)
        # Target timestamp is exactly equal to a data time
        if i_b == r_last_idx or r_stamps[i_b] == target_timestamp:
            r_closest_idx = i_b
        elif i_b > 0:
            r_closest_idx = cdip_utils.get_closest_index(
                i_b - 1, i_b, r_stamps, target_timestamp
            )

    # If closest index not found, check historic

    h_closest_idx = None
    h_last_idx = None  # Let's us know if h_stamps has been loaded
    if h_ok and not r_closest_idx:
        h_stamps = self.historic.get_var(time_var)[:]
        h_last_idx = len(h_stamps) - 1
        i_b = bisect_left(h_stamps, target_timestamp)
        i_b = min(i_b, h_last_idx)
        # Target timestamp is exactly equal to a data time
        if (i_b <= h_last_idx and h_stamps[i_b] == target_timestamp) or i_b == 0:
            h_closest_idx = i_b
        elif i_b >= h_last_idx:  # Target is between the two files
            if r_ok:
                if abs(h_stamps[h_last_idx] - target_timestamp) < abs(
                    r_stamps[0] - target_timestamp
                ):
                    h_closest_idx = i_b
                else:
                    r_closest_idx = 0
            else:  # No realtime file
                h_closest_idx = i_b
        else:  # Within middle of historic stamps
            h_closest_idx = cdip_utils.get_closest_index(
                i_b - 1, i_b, h_stamps, target_timestamp
            )

    # Now we have the closest index, find the intervals

    if r_closest_idx is not None:
        r_interval = cdip_utils.get_interval(
            r_stamps, r_closest_idx, num_target_records
        )
        # If bound exceeded toward H and H exists, cacluate h_interval
        if r_interval[2] < 0 and h_ok:
            if not h_last_idx:
                h_stamps = self.historic.get_var(time_var)[:]
                h_last_idx = len(h_stamps) - 1
            h_interval = cdip_utils.get_interval(
                h_stamps, h_last_idx, num_target_records + r_closest_idx + 1
            )
            return cdip_utils.combine_intervals(h_interval, r_interval)
        else:
            return r_interval
    elif h_closest_idx is not None:
        h_interval = cdip_utils.get_interval(
            h_stamps, h_closest_idx, num_target_records
        )
        # If bound exceeded toward R and R exists, cacluate r_interval
        if h_interval[2] > 0 and r_ok:
            r_interval = cdip_utils.get_interval(
                r_stamps, 0, num_target_records + h_closest_idx - h_last_idx - 1
            )
            return cdip_utils.combine_intervals(h_interval, r_interval)
        else:
            return h_interval

    # If we get to here there's a problem
    return (None, None, None)

get_xyz(start: datetime = None, end: datetime = None, pub_set: str = 'public') -> dict

Calls get_series to return displacement data.

Source code in cdippy/stndata.py
178
179
180
181
182
def get_xyz(
    self, start: datetime = None, end: datetime = None, pub_set: str = "public"
) -> dict:
    """Calls get_series to return displacement data."""
    return self.get_series(start, end, self.xyz_vars, pub_set)

remove_duplicates(data_dict: dict) -> dict

Duplicate records may exist after merge_ routines. This removes them.

Source code in cdippy/stndata.py
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
def remove_duplicates(self, data_dict: dict) -> dict:
    """Duplicate records may exist after merge_ routines. This removes them."""
    result = {}
    keys = list(data_dict.keys())
    if len(keys) > 0:
        key = keys[0]
        prefix = self.get_var_prefix(key)
        time_dimension_name = prefix + "Time"
        time_values, indices_of_unique_values = np.unique(
            data_dict[time_dimension_name], return_index=True
        )
        result[time_dimension_name] = time_values
        for key in keys:
            if key != time_dimension_name:
                result[key] = data_dict[key][indices_of_unique_values]
        return result
    else:
        return data_dict