Source code for pysatMadrigal.instruments.gnss_tec

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Full license can be found in License.md
# Full author list can be found in .zenodo.json file
# DOI:10.5281/zenodo.3824979
#
# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is
# unlimited.
# ----------------------------------------------------------------------------
"""Supports the MIT Haystack GNSS TEC data products.

The Global Navigation Satellite System (GNSS) is used in conjunction with a
world-wide receiver network to produce total electron content (TEC) data
products, including vertical and line-of-sight (or slant) TEC.

Downloads data from the MIT Haystack Madrigal Database.

Properties
----------
platform
    'gnss'
name
    'tec'
tag
    'vtec', 'site', 'los'
inst_id
    '' (not used)

Examples
--------
::

    import datetime as dt
    import pysat
    import pysatMadrigal as pymad

    # Get and load all vertical TEC for 19 Nov 2017
    vtec = pysat.Instrument(inst_module=pymad.instruments.gnss_tec, tag='vtec')
    vtec.download(dt.datetime(2017, 11, 19), dt.datetime(2017, 11, 20),
                  user='Firstname+Lastname', password='email@address.com')
    vtec.load(date=dt.datetime(2017, 11, 19))

    # Get and load the GLONASS slant TEC from the zzon site on 1 Jan 2023
    stec = pysat.Instrument(inst_module=pymad.instruments.gnss_tec, tag='los')
    stec.download(start=dt.datetime(2023, 1, 1), user='Firstname+Lastname',
                  password='email@address.com')
    stec.load(2023, 1, los_method='site', los_value='zzon',
              gnss_network='glonass')

Note
----
Please provide name and email when downloading data with this routine.

The line-of-sight data is too large to load an entire file at once. Data may be
loaded by individual receiver site for any number of days (recommended to load
one day) or a given time. To discover the available sites and times (exact times
are required for selection), you may use the
`pysatMadrigal.instruments.methods.gnss.get_los_times` and
`pysatMadrigal.instruments.methods.gnss.get_los_receiver_sites` functions.

"""

import datetime as dt
import numpy as np

import pysat

from pysatMadrigal.instruments.methods import general
from pysatMadrigal.instruments.methods import gnss

# ----------------------------------------------------------------------------
# Instrument attributes

platform = 'gnss'
name = 'tec'
tags = {'vtec': 'vertical TEC', 'site': 'Sites used in daily TEC data',
        'los': 'line-of-sight TEC'}
inst_ids = {'': [tag for tag in tags.keys()]}

pandas_format = False

# Madrigal tags
madrigal_inst_code = 8000
madrigal_tag = {'': {'vtec': '3500', 'site': '3506', 'los': '3505'}}

# Local attributes
fname = general.madrigal_file_format_str(madrigal_inst_code,
                                         verbose=False).split("*")
supported_tags = {ss: {'vtec': ''.join(['gps', fname[1], 'g', fname[2]]),
                       'los': ''.join(['los_{{year:04d}}{{month:02d}}',
                                       '{{day:02d}}', fname[2]]),
                       'site': ''.join(['site_{{year:04d}}{{month:02d}}',
                                        '{{day:02d}}', fname[2]])}
                  for ss in inst_ids.keys()}
remote_tags = {ss: {kk: supported_tags[ss][kk].format(file_type='hdf5')
                    for kk in inst_ids[ss]} for ss in inst_ids.keys()}

# ----------------------------------------------------------------------------
# Instrument test attributes

_test_dates = {'': {'vtec': dt.datetime(2017, 11, 19),
                    'site': dt.datetime(2001, 1, 1),
                    'los': dt.datetime(2023, 1, 1)}}
_test_load_opt = {'': {'los': [{'los_method': 'site', 'los_value': 'zzon',
                                'gnss_network': 'glonass'},
                               {'los_method': 'time',
                                'los_value': dt.datetime(2023, 1, 1)}]}}
_test_download_ci = {'': {'los': False}}  # Download is too large to test
_clean_warn = {'': {tag: {clean_level: [('logger', 'INFO',
                                         'Data provided at a clean level'
                                         if tag == 'site' else
                                         'further cleaning may be performed',
                                         clean_level)]
                          for clean_level in ['clean', 'dusty', 'dirty']}
                    for tag in inst_ids['']}}

# ----------------------------------------------------------------------------
# Instrument methods


[docs] def init(self): """Initialize the Instrument object with values specific to GNSS TEC.""" self.acknowledgements = '\n'.join([gnss.acknowledgements(self.name), general.cedar_rules()]) self.references = gnss.references(self.name) pysat.logger.info(self.acknowledgements) return
[docs] def clean(self): """Clean GNSS TEC data to a specific level. Note ---- Supports 'clean', 'dusty', and 'dirty'. Not called by pysat if `clean_level` is None. """ msg = "Data provided at a clean level" if self.tag == "vtec": msg = "".join([msg, ", further cleaning may be performed using ", "the measurement error 'dtec'"]) elif self.tag == "los": msg = "".join([msg, ", further cleaning may be performed using ", "the measurement error 'dlos_tec'"]) pysat.logger.info(msg) return
# ---------------------------------------------------------------------------- # Instrument functions # # Use the default Madrigal methods
[docs] def list_files(tag, inst_id, data_path=None, format_str=None, file_cadence=dt.timedelta(days=1), delimiter=None, file_type=None): """Return a Pandas Series of every file for chosen Instrument data. Parameters ---------- tag : str Denotes type of file to load. Accepts strings corresponding to the appropriate Madrigal Instrument `tags`. inst_id : str Specifies the instrument ID to load. Accepts strings corresponding to the appropriate Madrigal Instrument `inst_ids`. data_path : str or NoneType Path to data directory. If None is specified, the value previously set in Instrument.files.data_path is used. (default=None) format_str : str or NoneType User specified file format. If None is specified, the default formats associated with the supplied tags are used. (default=None) file_cadence : dt.timedelta or pds.DateOffset pysat assumes a daily file cadence, but some instrument data file contain longer periods of time. This parameter allows the specification of regular file cadences greater than or equal to a day (e.g., weekly, monthly, or yearly). (default=dt.timedelta(days=1)) two_digit_year_break : int or NoneType If filenames only store two digits for the year, then '1900' will be added for years >= two_digit_year_break and '2000' will be added for years < two_digit_year_break. If None, then four-digit years are assumed. (default=None) delimiter : str or NoneType Delimiter string upon which files will be split (e.g., '.'). If None, filenames will be parsed presuming a fixed width format. (default=None) file_type : str or NoneType File format for Madrigal data. Load routines currently accepts 'hdf5', 'simple', and 'netCDF4', but any of the Madrigal options may be used here. If None, will look for all known file types. (default=None) Returns ------- out : pds.Series A pandas Series containing the verified available files """ if tag == 'vtec': two_digit_year_break = 99 else: two_digit_year_break = None out = general.list_files(tag, inst_id, data_path=data_path, format_str=format_str, supported_tags=supported_tags, file_cadence=file_cadence, two_digit_year_break=two_digit_year_break, delimiter=delimiter, file_type=file_type) return out
[docs] def download(date_array, tag='', inst_id='', data_path=None, user=None, password=None, url='http://cedar.openmadrigal.org', file_type='netCDF4'): """Download data from Madrigal. Parameters ---------- date_array : array-like list of datetimes to download data for. The sequence of dates need not be contiguous. tag : str Tag identifier used for particular dataset. This input is provided by pysat. (default='') inst_id : str Instrument ID string identifier used for particular dataset. This input is provided by pysat. (default='') data_path : str Path to directory to download data to. (default=None) user : str User string input used for download. Provided by user and passed via pysat. (default=None) password : str Password for data download. (default=None) url : str URL for Madrigal site (default='http://cedar.openmadrigal.org') file_type : str File format for Madrigal data. (default='netCDF4') Note ---- The user's names should be provided in field user. Anthea Coster should be entered as Anthea+Coster The password field should be the user's email address. These parameters are passed to Madrigal when downloading. The affiliation field is set to pysat to enable tracking of pysat downloads. """ if tag == 'los': pysat.logger.warning( 'LoS download is very large and succeptible to failure.') general.download(date_array, inst_code=str(madrigal_inst_code), kindat=madrigal_tag[inst_id][tag], data_path=data_path, user=user, password=password, file_type=file_type, url=url) return
[docs] def load(fnames, tag='', inst_id='', los_method='site', los_value=None, gnss_network='all'): """Load the GNSS TEC data. Parameters ---------- fnames : list List of filenames tag : str tag name used to identify particular data set to be loaded. This input is nominally provided by pysat itself. (default='') inst_id : str Instrument ID used to identify particular data set to be loaded. This input is nominally provided by pysat itself. (default='') los_method : str For 'los' tag only, load data for a unique GNSS receiver site ('site') or at a unique time ('time') (default='site') los_value : str, dt.datetime, or NoneType For 'los' tag only, load data at this unique site or time (default=None) gnss_nework : bool For 'los' tag only, limit data by GNSS network if not 'all'. Currently supports 'all', 'gps', and 'glonass' (default='all') Returns ------- data : xarray.Dataset Object containing satellite data meta : pysat.Meta Object containing metadata such as column names and units Raises ------ ValueError If tag is 'los' and no valid 'los_value' is provided or unknown tag Note ---- The line-of-sight data is too large to load an entire file at once. Data may be loaded by individual receiver site for any number of days (recommended to load one day) or a given time. To discover the available sites and times (exact times are required for selection), you may use the `pysatMadrigal.instruments.methods.gnss.get_los_times` and `pysatMadrigal.instruments.methods.gnss.get_los_receiver_sites` functions. """ # Load the specified data if tag == 'vtec': data, meta, lat_keys, lon_keys = gnss.load_vtec(fnames) elif tag == 'site': data, meta, lat_keys, lon_keys = gnss.load_site(fnames) elif tag == 'los': if los_value is None: raise ValueError('must specify a valid {:}'.format(los_method)) data, meta, lat_keys, lon_keys = gnss.load_los(fnames, los_method, los_value, gnss_network) if len(data.dims.keys()) > 0: # Squeeze the kindat and kinst 'coordinates', but keep them as floats squeeze_dims = np.array(['kindat', 'kinst']) squeeze_mask = [sdim in data.coords for sdim in squeeze_dims] if np.any(squeeze_mask): data = data.squeeze(dim=squeeze_dims[squeeze_mask]) # Get the maximum and minimum values for time, latitude, and longitude meta['time'] = {meta.labels.notes: data['time'].values.dtype.__doc__, meta.labels.min_val: np.nan, meta.labels.max_val: np.nan} for lat_key in lat_keys: meta[lat_key] = {meta.labels.min_val: -90.0, meta.labels.max_val: 90.0} for lon_key in lon_keys: min_lon = 0.0 if data[lon_key].values.min() >= 0.0 else -180.0 meta[lon_key] = {meta.labels.min_val: min_lon, meta.labels.max_val: min_lon + 360.0} return data, meta
[docs] def list_remote_files(tag, inst_id, start=dt.datetime(1998, 10, 15), stop=dt.datetime.utcnow(), user=None, password=None): """Create a Pandas Series of every file for chosen remote data. Parameters ---------- tag : str Denotes type of file to load. This input is nominally provided by pysat itself. inst_id : str Specifies the satellite or instrument ID. This input is nominally provided by pysat itself. start : dt.datetime or NoneType Starting time for file list. If None, replaced with default. (default=10-15-1998) stop : dt.datetime or NoneType Ending time for the file list. If None, replaced with default. (default=time of run) user : str or NoneType Username to be passed along to resource with relevant data. (default=None) password : str or NoneType User password to be passed along to resource with relevant data. (default=None) Returns ------- files : pds.Series A series of filenames, see `pysat.utils.files.process_parsed_filenames` for more information. See Also -------- pysatMadrigal.instruments.methods.general.list_remote_files """ if tag == 'vtec': two_break = 99 else: two_break = None files = general.list_remote_files( tag, inst_id, supported_tags=remote_tags, inst_code=madrigal_inst_code, kindats=madrigal_tag, start=start, stop=stop, user=user, password=password, two_digit_year_break=two_break) return files