Source code for pysatMadrigal.instruments.ngdc_ae

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Full license can be found in License.md
# Full author list can be found in .zenodo.json file
# DOI:10.5281/zenodo.3824979
#
# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is
# unlimited.
# ----------------------------------------------------------------------------
"""Supports access to taped data of AE from the World Data Center A (Boulder).

Properties
----------
platform
    'ngdc'
name
    'ae'
tag
    None supported
inst_id
    None supported

Note
----
Please provide name (user) and email (password) when downloading data with this
routine.

Warnings
--------
The entire data set (1 Jan 1978 through 31 Dec 1987) is provided in a single
file on Madrigal.

Examples
--------
::


    import datetime as dt
    import pysat
    import pysatMadrigal as py_mad

    # Download AE data from Madrigal
    aei = pysat.Instrument(inst_module=py_mad.instruments.ngdc_ae)
    aei.download(start=py_mad.instruments.ngdc_ae.madrigal_start,
                 user='Firstname+Lastname', password='email@address.com')
    aei.load(date=dt.datetime(1981, 1, 1))

"""

import datetime as dt
import functools
import numpy as np
import pandas as pds

import h5py
import pysat

from pysatMadrigal.instruments.methods import general

# ----------------------------------------------------------------------------
# Instrument attributes

platform = 'ngdc'
name = 'ae'
tags = {'': ''}
inst_ids = {'': list(tags.keys())}
pandas_format = True

# Madrigal tags and limits
madrigal_inst_code = 211
madrigal_tag = {'': {'': "30008"}}
madrigal_start = dt.datetime(1978, 1, 1)
madrigal_end = dt.datetime(1988, 1, 1)

# Local attributes
#
# Need a way to get the filename strings for a particular instrument unless
# wildcards start working
supported_tags = {
    inst_id: {tag: general.madrigal_file_format_str(madrigal_inst_code,
                                                    verbose=False)
              for tag in inst_ids[inst_id]} for inst_id in inst_ids.keys()}
remote_tags = {ss: {kk: supported_tags[ss][kk].format(file_type='hdf5')
                    for kk in inst_ids[ss]} for ss in inst_ids.keys()}

# ----------------------------------------------------------------------------
# Instrument test attributes

_test_dates = {inst_id: {tag: madrigal_start for tag in inst_ids[inst_id]}
               for inst_id in inst_ids.keys()}
_test_download = {inst_id: {tag: True for tag in inst_ids[inst_id]}
                  for inst_id in inst_ids.keys()}
_clean_warn = {
    inst_id: {tag: {'dusty': [('logger', 'WARN',
                               "'dusty' and 'clean' levels are the same",
                               'dusty')]}
              for tag in inst_ids[inst_id]} for inst_id in inst_ids.keys()}

# ----------------------------------------------------------------------------
# Instrument methods


[docs] def init(self): """Initialize the Instrument object in support of Madrigal access.""" # Set the standard pysat attributes self.acknowledgements = general.cedar_rules() self.references = ''.join(['Davis, T. Neil and Masahisa Sugiura. “Auroral', ' electrojet activity index AE and its ', 'universal time variations.” Journal of ', 'Geophysical Research 71 (1966): 785-801.']) # Remind the user of the Rules of the Road pysat.logger.info(self.acknowledgements) return
[docs] def clean(self): """Raise warning that cleaning is not possible for general data. Note ---- Supports 'clean', 'dusty', 'dirty' in the sense that all levels use the flag to clean data the same way. 'None' is also supported as it signifies no cleaning. Routine is called by pysat, and not by the end user directly. """ warned = False for dvar in self.variables: if self.meta[dvar, self.meta.labels.units].find('nT') >= 0: # The 'clean', 'dusty', and 'dirty' levels all replace the missing # parameter value of -32766 with NaN mask = self[dvar] == self.meta[dvar, self.meta.labels.fill_val] self[dvar][mask] == np.nan self.meta[dvar] = {self.meta.labels.fill_val: np.nan} if self.clean_level in ['clean', 'dusty']: if self.clean_level == 'dusty' and not warned: pysat.logger.warning( "The NGDC AE 'dusty' and 'clean' levels are the same.") warned = True # The 'clean' and 'dusty' levels replace the parameter error # value of -32766 with NaN self[dvar][self[dvar] == -32766] = np.nan return
# ---------------------------------------------------------------------------- # Instrument functions # # Use the default Madrigal and pysat methods file_cadence = madrigal_end - madrigal_start two_digit_year_break = 50 # Set the download routine download = functools.partial(general.download, inst_code=str(madrigal_inst_code), kindat=madrigal_tag[''][''], file_type='hdf5') # Set the list routine list_files = functools.partial(general.list_files, supported_tags=supported_tags, file_cadence=file_cadence, two_digit_year_break=two_digit_year_break) # Set list_remote_files routine list_remote_files = functools.partial(general.list_remote_files, supported_tags=remote_tags, inst_code=madrigal_inst_code, kindats=madrigal_tag, two_digit_year_break=two_digit_year_break)
[docs] def load(fnames, tag='', inst_id=''): """Load the NGDC AE data. Parameters ----------- fnames : list List of filenames tag : str tag name used to identify particular data set to be loaded. This input is nominally provided by pysat itself. (default='') inst_id : str Instrument ID used to identify particular data set to be loaded. This input is nominally provided by pysat itself. (default='') Returns -------- data : pds.DataFrame Object containing satellite data meta : pysat.Meta Object containing metadata such as column names and units Raises ------ ValueError Unexpected time variable names """ # Initialize the output meta = pysat.Meta() labels = [] data = None fill_val = -32767 notes = "".join(["Assumed parameters error values are assigned a value ", "of -32766 for clean levels of 'dirty' or 'none'"]) # Cycle through all the filenames, getting the desired start and stop times fstart = None fstop = None for fname_date in fnames: # Split the date from the filename fname = fname_date[:-11] fdate = dt.datetime.strptime(fname_date[-10:], '%Y-%m-%d') fstop = fdate if fstart is None: fstart = fdate fstop += dt.timedelta(days=1) # There is only one file for this Instrument with h5py.File(fname, 'r') as filed: file_data = filed['Data']['Table Layout'] file_meta = filed['Metadata']['Data Parameters'] # Load available info into pysat.Meta if this is the first file if len(labels) == 0: for item in file_meta: name_string = item[0].decode('UTF-8') unit_string = item[3].decode('UTF-8') desc_string = item[1].decode('UTF-8') labels.append(name_string) # Only update metadata if necessary if name_string.lower() not in meta: meta_dict = {meta.labels.name: name_string, meta.labels.units: unit_string, meta.labels.desc: desc_string} if unit_string.find('nT') >= 0: # Fill and error values only apply to index values meta_dict[meta.labels.fill_val] = fill_val meta_dict[meta.labels.notes] = notes meta[name_string.lower()] = meta_dict # Add additional metadata notes. Custom attributes attached to # meta are attached to corresponding Instrument object when # pysat receives data and meta from this routine for key in filed['Metadata']: if key != 'Data Parameters': setattr(meta, key.replace(' ', '_'), filed['Metadata'][key][:]) # Extended processing is the same for simple and HDF5 files # # Construct datetime index from times time_keys = np.array(['year', 'month', 'day', 'hour', 'hm', 'hmi']) lower_labels = [ll.lower() for ll in labels] time_keys = [key for key in time_keys if key not in lower_labels] if len(time_keys) > 0: raise ValueError(' '.join(["unable to construct time index, ", "missing {:}".format(time_keys)])) # Get the date information year = file_data[:]['year'] month = file_data[:]['month'] day = file_data[:]['day'] fdate = pysat.utils.time.create_datetime_index(year=year, month=month, day=day) # Get the data mask dmask = (fdate >= fstart) & (fdate < fstop) # Construct the time index hour = file_data[dmask]['hour'] minute = (file_data[dmask]['hm'] / 100.0 - hour) * 100.0 uts = 3600.0 * hour + 60.0 * minute + file_data[dmask]['hmi'] tindex = pysat.utils.time.create_datetime_index( year=year[dmask], month=month[dmask], day=day[dmask], uts=uts) # Load the data into a pandas DataFrame data = pds.DataFrame.from_records(file_data[dmask], columns=labels, index=tindex) # Ensure that data is at least an empty Dataset if data is None: data = pds.DataFrame(dtype=np.float64) return data, meta