Source code for pysatMadrigal.instruments.ngdc_ae

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Full license can be found in License.md
# Full author list can be found in .zenodo.json file
# DOI:10.5281/zenodo.3824979
#
# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is
# unlimited.
# ----------------------------------------------------------------------------
"""Supports access to taped data of AE from the World Data Center A (Boulder).

Properties
----------
platform
    'ngdc'
name
    'ae'
tag
    None supported
inst_id
    None supported

Note
----
Please provide name (user) and email (password) when downloading data with this
routine.

Warnings
--------
The entire data set (1 Jan 1978 through 31 Dec 1987) is provided in a single
file on Madrigal.

Examples
--------
::


    import datetime as dt
    import pysat
    import pysatMadrigal as py_mad

    # Download AE data from Madrigal
    aei = pysat.Instrument(inst_module=py_mad.instruments.ngdc_ae)
    aei.download(start=py_mad.instruments.ngdc_ae.madrigal_start,
                 user='Firstname+Lastname', password='email@address.com')
    aei.load(date=dt.datetime(1981, 1, 1))

"""

import datetime as dt
import functools
import numpy as np
import pandas as pds

import h5py
import pysat

from pysatMadrigal.instruments.methods import general

# ----------------------------------------------------------------------------
# Instrument attributes

platform = 'ngdc'
name = 'ae'
tags = {'': ''}
inst_ids = {'': list(tags.keys())}
pandas_format = True

# Madrigal tags and limits
madrigal_inst_code = 211
madrigal_tag = {'': {'': "30008"}}
madrigal_start = dt.datetime(1978, 1, 1)
madrigal_end = dt.datetime(1988, 1, 1)

# Local attributes
#
# Need a way to get the filename strings for a particular instrument unless
# wildcards start working
supported_tags = {
    inst_id: {tag: general.madrigal_file_format_str(madrigal_inst_code,
                                                    verbose=False)
              for tag in inst_ids[inst_id]} for inst_id in inst_ids.keys()}
remote_tags = {ss: {kk: supported_tags[ss][kk].format(file_type='hdf5')
                    for kk in inst_ids[ss]} for ss in inst_ids.keys()}

# ----------------------------------------------------------------------------
# Instrument test attributes

_test_dates = {inst_id: {tag: madrigal_start for tag in inst_ids[inst_id]}
               for inst_id in inst_ids.keys()}
_test_download = {inst_id: {tag: True for tag in inst_ids[inst_id]}
                  for inst_id in inst_ids.keys()}
_clean_warn = {
    inst_id: {tag: {'dusty': [('logger', 'WARN',
                               "'dusty' and 'clean' levels are the same",
                               'dusty')]}
              for tag in inst_ids[inst_id]} for inst_id in inst_ids.keys()}

# ----------------------------------------------------------------------------
# Instrument methods



[docs]
def init(self):
    """Initialize the Instrument object in support of Madrigal access."""
    # Set the standard pysat attributes
    self.acknowledgements = general.cedar_rules()
    self.references = ''.join(['Davis, T. Neil and Masahisa Sugiura. “Auroral',
                               ' electrojet activity index AE and its ',
                               'universal time variations.” Journal of ',
                               'Geophysical Research 71 (1966): 785-801.'])

    # Remind the user of the Rules of the Road
    pysat.logger.info(self.acknowledgements)
    return




[docs]
def clean(self):
    """Raise warning that cleaning is not possible for general data.

    Note
    ----
    Supports 'clean', 'dusty', 'dirty' in the sense that all
    levels use the flag to clean data the same way.
    'None' is also supported as it signifies no cleaning.

    Routine is called by pysat, and not by the end user directly.

    """

    warned = False
    for dvar in self.variables:
        if self.meta[dvar, self.meta.labels.units].find('nT') >= 0:
            # The 'clean', 'dusty', and 'dirty' levels all replace the missing
            # parameter value of -32766 with NaN
            mask = self[dvar] == self.meta[dvar, self.meta.labels.fill_val]
            self[dvar][mask] == np.nan
            self.meta[dvar] = {self.meta.labels.fill_val: np.nan}

            if self.clean_level in ['clean', 'dusty']:
                if self.clean_level == 'dusty' and not warned:
                    pysat.logger.warning(
                        "The NGDC AE 'dusty' and 'clean' levels are the same.")
                    warned = True

                # The 'clean' and 'dusty' levels replace the parameter error
                # value of -32766 with NaN
                self[dvar][self[dvar] == -32766] = np.nan

    return



# ----------------------------------------------------------------------------
# Instrument functions
#
# Use the default Madrigal and pysat methods
file_cadence = madrigal_end - madrigal_start
two_digit_year_break = 50

# Set the download routine
download = functools.partial(general.download,
                             inst_code=str(madrigal_inst_code),
                             kindat=madrigal_tag[''][''], file_type='hdf5')

# Set the list routine
list_files = functools.partial(general.list_files,
                               supported_tags=supported_tags,
                               file_cadence=file_cadence,
                               two_digit_year_break=two_digit_year_break)

# Set list_remote_files routine
list_remote_files = functools.partial(general.list_remote_files,
                                      supported_tags=remote_tags,
                                      inst_code=madrigal_inst_code,
                                      kindats=madrigal_tag,
                                      two_digit_year_break=two_digit_year_break)



[docs]
def load(fnames, tag='', inst_id=''):
    """Load the NGDC AE data.

    Parameters
    -----------
    fnames : list
        List of filenames
    tag : str
        tag name used to identify particular data set to be loaded.
        This input is nominally provided by pysat itself. (default='')
    inst_id : str
        Instrument ID used to identify particular data set to be loaded.
        This input is nominally provided by pysat itself. (default='')

    Returns
    --------
    data : pds.DataFrame
        Object containing satellite data
    meta : pysat.Meta
        Object containing metadata such as column names and units

    Raises
    ------
    ValueError
        Unexpected time variable names

    """
    # Initialize the output
    meta = pysat.Meta()
    labels = []
    data = None
    fill_val = -32767
    notes = "".join(["Assumed parameters error values are assigned a value ",
                     "of -32766 for clean levels of 'dirty' or 'none'"])

    # Cycle through all the filenames, getting the desired start and stop times
    fstart = None
    fstop = None
    for fname_date in fnames:
        # Split the date from the filename
        fname = fname_date[:-11]
        fdate = dt.datetime.strptime(fname_date[-10:], '%Y-%m-%d')
        fstop = fdate

        if fstart is None:
            fstart = fdate

    fstop += dt.timedelta(days=1)

    # There is only one file for this Instrument
    with h5py.File(fname, 'r') as filed:
        file_data = filed['Data']['Table Layout']
        file_meta = filed['Metadata']['Data Parameters']

        # Load available info into pysat.Meta if this is the first file
        if len(labels) == 0:
            for item in file_meta:
                name_string = item[0].decode('UTF-8')
                unit_string = item[3].decode('UTF-8')
                desc_string = item[1].decode('UTF-8')
                labels.append(name_string)

                # Only update metadata if necessary
                if name_string.lower() not in meta:
                    meta_dict = {meta.labels.name: name_string,
                                 meta.labels.units: unit_string,
                                 meta.labels.desc: desc_string}

                    if unit_string.find('nT') >= 0:
                        # Fill and error values only apply to index values
                        meta_dict[meta.labels.fill_val] = fill_val
                        meta_dict[meta.labels.notes] = notes

                    meta[name_string.lower()] = meta_dict

        # Add additional metadata notes. Custom attributes attached to
        # meta are attached to corresponding Instrument object when
        # pysat receives data and meta from this routine
        for key in filed['Metadata']:
            if key != 'Data Parameters':
                setattr(meta, key.replace(' ', '_'), filed['Metadata'][key][:])

        # Extended processing is the same for simple and HDF5 files
        #
        # Construct datetime index from times
        time_keys = np.array(['year', 'month', 'day', 'hour', 'hm', 'hmi'])
        lower_labels = [ll.lower() for ll in labels]
        time_keys = [key for key in time_keys if key not in lower_labels]
        if len(time_keys) > 0:
            raise ValueError(' '.join(["unable to construct time index, ",
                                       "missing {:}".format(time_keys)]))

        # Get the date information
        year = file_data[:]['year']
        month = file_data[:]['month']
        day = file_data[:]['day']
        fdate = pysat.utils.time.create_datetime_index(year=year, month=month,
                                                       day=day)

        # Get the data mask
        dmask = (fdate >= fstart) & (fdate < fstop)

        # Construct the time index
        hour = file_data[dmask]['hour']
        minute = (file_data[dmask]['hm'] / 100.0 - hour) * 100.0
        uts = 3600.0 * hour + 60.0 * minute + file_data[dmask]['hmi']

        tindex = pysat.utils.time.create_datetime_index(
            year=year[dmask], month=month[dmask], day=day[dmask], uts=uts)

        # Load the data into a pandas DataFrame
        data = pds.DataFrame.from_records(file_data[dmask], columns=labels,
                                          index=tindex)

    # Ensure that data is at least an empty Dataset
    if data is None:
        data = pds.DataFrame(dtype=np.float64)

    return data, meta