import numpy as np
import xarray as xr
import pandas as pd
import os
from collections import OrderedDict
# from astropy.time import Time
import logging
import copy
from typing import List, Dict, Union, Tuple
import pysagereader
[docs]class SAGEIILoaderV700(object):
"""
Class designed to load the v7.00 SAGE II spec and index files provided by NASA ADSC into python
Data files must be accessible by the users machine, and can be downloaded from:
https://eosweb.larc.nasa.gov/project/sage2/sage2_v7_table
Parameters
----------
data_folder
location of sage ii index and spec files.
output_format
format for the output data. If ``'xarray'`` the output is returned as an ``xarray.Dataset``.
If None the output is returned as a dictionary of numpy arrays.
**NOTE: the following options only apply to xarray output types**
species
Species to be returned in the output data. If None all species are returned. Options are
``aerosol``, ``ozone``, ``h2o``, and ``no2``. If more than one species is returned fields will be NaN-padded
where data is not available. ``species`` is only used if ``'xarray'`` is set as the ``output_data`` format,
otherwise it has no effect.
cf_names
If True then CF-1.7 naming conventions are used for the output_data when ``xarray`` is selected.
filter_aerosol
filter the aerosol using the cloud flag
filter_ozone
filter the ozone using the criteria recommended in the release notes
* Exclusion of all data points with an uncertainty estimate of 300% or greater
* Exclusion of all profiles with an uncertainty greater than 10% between 30 and 50 km
* Exclusion of all data points at altitude and below the occurrence of an aerosol extinction value of
greater than 0.006 km^-1
* Exclusion of all data points at altitude and below the occurrence of both the 525nm aerosol extinction
value exceeding 0.001 km^-1 and the 525/1020 extinction ratio falling below 1.4
* Exclusion of all data points below 35km an 200% or larger uncertainty estimate
enumerate_flags
expand the index and species flags to their boolean values.
normalize_percent_error
give the species error as percent rather than percent * 100
return_separate_flags
return the enumerated flags as a separate data array
Example
-------
>>> sage = SAGEIILoaderV700()
>>> sage.data_folder = 'path/to/data'
>>> data = sage.load_data('2004-1-1','2004-5-1')
In addition to the sage ii fields reported in the files, two additional time fields are provided
to allow for easier subsetting of the data.
``data['mjd']`` is a numpy array containing the modified julian dates of each scan
``date['time']`` is an pandas time series object containing the times of each scan
"""
def __init__(self, data_folder: str = None,
output_format: str = 'xarray',
species: List[str] = ('aerosol', 'h2o', 'no2', 'ozone', 'background'),
cf_names: bool = False,
filter_aerosol: bool = False,
filter_ozone: bool = False,
enumerate_flags: bool = False,
normalize_percent_error: bool = False,
return_separate_flags: bool = False):
if type(species) == str:
species = [species]
self.data_folder = data_folder # Type: str
self.version = '7.00'
self.index_file = 'SAGE_II_INDEX_'
self.spec_file = 'SAGE_II_SPEC_'
self.fill_value = np.nan
self.spec_format = self.get_spec_format()
self.index_format = self.get_index_format()
self.output_format = output_format
self.species = [s.lower() for s in species]
self.cf_names = cf_names
self.filter_aerosol = filter_aerosol
self.filter_ozone = filter_ozone
self.normalize_percent_error = normalize_percent_error
self.enumerate_flags = enumerate_flags
self.return_separate_flags = return_separate_flags
[docs] def get_spec_filename(self, year: int, month: int) -> str:
"""
Returns the spec filename given a year and month
Parameters
----------
year
year of the data that will be loaded
month
month of the data that will be loaded
Returns
-------
filename of the spec file where the data is stored
"""
file = os.path.join(self.data_folder,
self.spec_file + str(int(year)) + str(int(month)).zfill(2) + '.' + self.version)
if not os.path.isfile(file):
file = None
return file
[docs] def get_index_filename(self, year: int, month: int) -> str:
"""
Returns the index filename given a year and month
Parameters
----------
year
year of the data that will be loaded
month
month of the data that will be loaded
Returns
-------
filename of the index file where the data is stored
"""
file = os.path.join(self.data_folder,
self.index_file + str(int(year)) + str(int(month)).zfill(2) + '.' + self.version)
if not os.path.isfile(file):
file = None
return file
[docs] def read_spec_file(self, file: str, num_profiles: int) -> List[Dict]:
"""
Parameters
----------
file
name of the spec file to be read
num_profiles
number of profiles to read from the spec file (usually determined from the index file)
Returns
-------
list of dictionaries containing the spec data. Each list is one event
"""
# load the file into the buffer
file_format = self.spec_format
with open(file, "rb") as f:
buffer = f.read()
# initialize the list of dictionaries
data = [None] * num_profiles
for p in range(num_profiles):
data[p] = dict()
# load the data from the buffer
bidx = 0
for p in range(num_profiles):
for key in file_format.keys():
nbytes = np.dtype(file_format[key][0]).itemsize * file_format[key][1]
data[p][key] = copy.copy(np.frombuffer(buffer[bidx:bidx + nbytes],
dtype=file_format[key][0]))
bidx += nbytes
return data
[docs] def read_index_file(self, file: str) -> Dict:
"""
Read the binary file into a python data structure
Parameters
----------
file
filename to be read
Returns
-------
data from the file
"""
file_format = self.index_format
with open(file, "rb") as f:
buffer = f.read()
data = dict()
# load the data from file into a list
bidx = 0
for key in file_format.keys():
nbytes = np.dtype(file_format[key][0]).itemsize * file_format[key][1]
if file_format[key][0] == 'S1':
data[key] = copy.copy(buffer[bidx:bidx + nbytes].decode('utf-8'))
else:
data[key] = copy.copy(np.frombuffer(buffer[bidx:bidx + nbytes], dtype=file_format[key][0]))
if len(data[key]) == 1:
data[key] = data[key][0]
bidx += nbytes
# make a more useable time field
date_str = []
# If the time overflows by less than the scan time just set it to midnight
data['HHMMSS'][(data['HHMMSS'] >= 240000) & (data['HHMMSS'] < (240000 + data['Duration']))] = 235959
# otherwise, set it as invalid
data['HHMMSS'][data['HHMMSS'] >= 240000] = -999
for idx, (ymd, hms) in enumerate(zip(data['YYYYMMDD'], data['HHMMSS'])):
if (ymd < 0) | (hms < 0):
date_str.append('1970-1-1 00:00:00') # invalid sage ii date
else:
hours = int(hms / 10000)
mins = int((hms % 10000) / 100)
secs = hms % 100
date_str.append(str(ymd)[0:4] + '-' + str(ymd)[4:6].zfill(2) + '-' +
str(ymd)[6::].zfill(2) + ' ' + str(hours).zfill(2) + ':' +
str(mins).zfill(2) + ':' + str(secs).zfill(2))
# data['time'] = Time(date_str, format='iso')
data['time'] = pd.to_datetime(date_str)
data['mjd'] = np.array((data['time'] - pd.Timestamp('1858-11-17')) / pd.Timedelta(1, 'D'))
data['mjd'][data['mjd'] < 40588] = -999 # get rid of invalid dates
return data
[docs] def load_data(self, min_date: str, max_date: str,
min_lat: float = -90, max_lat: float = 90,
min_lon: float = -180, max_lon: float = 360) -> Union[Dict, xr.Dataset]:
"""
Load the SAGE II data for the specified dates and locations.
Parameters
----------
min_date
start date where data will be loaded in iso format, eg: '2004-1-1'
max_date
end date where data will be loaded in iso format, eg: '2004-1-1'
min_lat
minimum latitude (optional)
max_lat
maximum latitude (optional)
min_lon
minimum longitude (optional)
max_lon
maximum longitude (optional)
Returns
-------
Variables are returned as numpy arrays (1 or 2 dimensional depending on the variable)
"""
min_time = pd.Timestamp(min_date)
max_time = pd.Timestamp(max_date)
data = dict()
init = False
# create a list of unique year/month combinations between the start/end dates
uniq = OrderedDict()
for year in [(t.date().year, t.date().month) for t in
pd.date_range(min_time, max_time + pd.Timedelta(27, 'D'), freq='27D')]:
uniq[year] = year
# load in the data from the desired months
for (year, month) in list(uniq.values()):
logging.info('loading data for : ' + str(year) + '/' + str(month))
indx_file = self.get_index_filename(year, month)
# if the file does not exist move on to the next month
if indx_file is None:
continue
indx_data = self.read_index_file(indx_file)
numprof = indx_data['num_prof']
spec_data = self.read_spec_file(self.get_spec_filename(year, month), numprof)
# get rid of the duplicate names for InfVec
for sp in spec_data:
sp['ProfileInfVec'] = copy.copy(sp['InfVec'])
del sp['InfVec']
for key in indx_data.keys():
# get rid of extraneous profiles in the index so index and spec are the same lengths
if hasattr(indx_data[key], '__len__') and type(indx_data[key]) is not str:
if len(indx_data[key]) == 930:
indx_data[key] = np.delete(indx_data[key], np.arange(numprof, 930))
# add the index values to the data set
if key in data.keys():
# we dont want to replicate certain fields
if (key[0:3] != 'Alt') & (key[0:5] != 'Range') & (key[0:7] != 'FillVal'):
data[key] = np.append(data[key], indx_data[key])
else:
if key == 'FillVal':
data[key] = indx_data[key]
else:
data[key] = [indx_data[key]]
# initialize the data dictionaries as lists
if init is False:
for key in spec_data[0].keys():
data[key] = []
init = True
# add the spec values to the data set
for key in spec_data[0].keys():
data[key].append(np.asarray([sp[key] for sp in spec_data]))
# join all of our lists into an array - this could be done more elegantly with vstack to avoid
# the temporary lists, but this is much faster
for key in data.keys():
if key == 'FillVal':
data[key] = float(data[key]) # make this a simple float rather than zero dimensional array
elif type(data[key][0]) is str:
data[key] = str(data[key][0])
elif len(data[key][0].shape) > 0:
data[key] = np.concatenate(data[key], axis=0)
else:
data[key] = np.asarray(data[key])
data = self.subset_data(data, min_date, max_date, min_lat, max_lat, min_lon, max_lon)
if not data:
return None
if self.output_format == 'xarray':
data = self.convert_to_xarray(data)
return data
[docs] @staticmethod
def subset_data(data: Dict, min_date: str, max_date: str,
min_lat: float, max_lat: float,
min_lon: float, max_lon: float) -> Dict:
"""
Removes any data from the dictionary that does not meet the specified time, latitude and longitude requirements.
Parameters
----------
data
dictionary of sage ii data. Must have the fields 'mjd', 'Lat' and 'Lon'. All others are optional
min_date
start date where data will be loaded in iso format, eg: '2004-1-1'
max_date
end date where data will be loaded in iso format, eg: '2004-1-1'
min_lat
minimum latitude (optional)
max_lat
maximum latitude (optional)
min_lon
minimum longitude (optional)
max_lon
maximum longitude (optional)
Returns
-------
returns the dictionary with only data in the valid latitude, longitude and time range
"""
min_mjd = (pd.Timestamp(min_date) - pd.Timestamp('1858-11-17')) / pd.Timedelta(1, 'D')
max_mjd = (pd.Timestamp(max_date) - pd.Timestamp('1858-11-17')) / pd.Timedelta(1, 'D')
good = (data['mjd'] > min_mjd) & (data['mjd'] < max_mjd) & \
(data['Lat'] > min_lat) & (data['Lat'] < max_lat) & \
(data['Lon'] > min_lon) & (data['Lon'] < max_lon)
if np.any(good):
for key in data.keys():
if type(data[key]) is str:
pass
elif hasattr(data[key], '__len__'):
if data[key].shape[0] == len(good):
data[key] = data[key][good]
else:
print('no data satisfies the criteria')
data = {}
return data
[docs] def convert_to_xarray(self, data: Dict) -> Union[xr.Dataset, Tuple[xr.Dataset, xr.Dataset]]:
"""
Parameters
----------
data
Data from the ``load_data`` function
Returns
-------
data formatted to an xarray Dataset
"""
# split up the fields into one of different sizes and optional returns
fields = dict()
# not currently returned
fields['geometry'] = ['Tan_Alt', 'Tan_Lat', 'Tan_Lon']
fields['flags'] = ['InfVec', 'Dropped']
fields['profile_flags'] = ['ProfileInfVec']
# always returned - 1 per profile
fields['general'] = ['Event_Num', 'Lat', 'Lon', 'Beta', 'Duration', 'Type_Sat', 'Type_Tan', 'Trop_Height']
# optional return parameters
fields['background'] = ['NMC_Pres', 'NMC_Temp', 'NMC_Dens', 'NMC_Dens_Err', 'Density', 'Density_Err']
fields['ozone'] = ['O3', 'O3_Err']
fields['no2'] = ['NO2', 'NO2_Err']
fields['h2o'] = ['H2O', 'H2O_Err']
fields['aerosol'] = ['Ext386', 'Ext452', 'Ext525', 'Ext1020', 'Ext386_Err', 'Ext452_Err', 'Ext525_Err',
'Ext1020_Err']
fields['particle_size'] = ['SurfDen', 'Radius', 'SurfDen_Err', 'Radius_Err']
xr_data = []
index_flags = self.convert_index_bit_flags(data)
species_flags = self.convert_species_bit_flags(data)
time = pd.to_timedelta(data['mjd'], 'D') + pd.Timestamp('1858-11-17')
data['Trop_Height'] = data['Trop_Height'].flatten()
for key in fields['general']:
xr_data.append(xr.DataArray(data[key], coords=[time], dims=['time'], name=key))
if 'aerosol' in self.species or self.filter_ozone: # we need aerosol to filter ozone
altitude = data['Alt_Grid'][0:80]
wavel = np.array([386.0, 452.0, 525.0, 1020.0])
ext = np.array([data['Ext386'], data['Ext452'], data['Ext525'], data['Ext1020']])
xr_data.append(xr.DataArray(ext, coords=[wavel, time, altitude],
dims=['wavelength', 'time', 'Alt_Grid'], name='Ext'))
ext = np.array([data['Ext386_Err'], data['Ext452_Err'], data['Ext525_Err'], data['Ext1020_Err']])
xr_data.append(xr.DataArray(ext, coords=[wavel, time, altitude],
dims=['wavelength', 'time', 'Alt_Grid'], name='Ext_Err'))
for key in fields['particle_size']:
xr_data.append(xr.DataArray(data[key], coords=[time, altitude],
dims=['time', 'Alt_Grid'], name=key))
if 'no2' in self.species:
altitude = data['Alt_Grid'][0:100]
for key in fields['no2']:
xr_data.append(xr.DataArray(data[key], coords=[time, altitude],
dims=['time', 'Alt_Grid'], name=key))
if 'h2o' in self.species:
altitude = data['Alt_Grid'][0:100]
for key in fields['h2o']:
xr_data.append(xr.DataArray(data[key], coords=[time, altitude],
dims=['time', 'Alt_Grid'], name=key))
if any(i in ['ozone', 'o3'] for i in self.species):
altitude = data['Alt_Grid'][0:140]
for key in fields['ozone']:
xr_data.append(xr.DataArray(data[key], coords=[time, altitude],
dims=['time', 'Alt_Grid'], name=key))
if 'background' in self.species:
altitude = data['Alt_Grid'][0:140]
for key in fields['background']:
xr_data.append(xr.DataArray(data[key], coords=[time, altitude],
dims=['time', 'Alt_Grid'], name=key))
xr_data = xr.merge(xr_data)
if self.enumerate_flags:
xr_data = xr.merge([xr_data, index_flags, species_flags])
for var in xr_data.variables.keys():
if xr_data[var].dtype == 'float32' or 'Err' in var:
xr_data[var] = xr_data[var].where(xr_data[var] != data['FillVal'])
# determine cloud filter for aerosol data
cloud_filter = xr.full_like(species_flags.Cloud_Bit_1, fill_value=True, dtype=bool)
min_alt = (xr_data.Alt_Grid * (species_flags.Cloud_Bit_1 & species_flags.Cloud_Bit_2)).max(dim='Alt_Grid')
cloud_filter = cloud_filter.where(cloud_filter.Alt_Grid > min_alt)
xr_data['cloud_filter'] = np.isnan(cloud_filter)
# determine valid ozone altitudes
if any(i in ['ozone', 'o3'] for i in self.species):
# add an ozone filter field for convenience
ozone_good = xr.full_like(species_flags.Cloud_Bit_1, fill_value=True, dtype=bool)
# Exclusion of all data points with an uncertainty estimate of 300% or greater
ozone_good = ozone_good.where(xr_data.O3_Err < 30000)
# Exclusion of all profiles with an uncertainty greater than 10% between 30 and 50 km
no_good = (xr_data.O3_Err > 1000) & (xr_data.Alt_Grid > 30) & (xr_data.Alt_Grid < 50)
ozone_good = ozone_good.where(~no_good)
# Exclusion of all data points at altitude and below the occurrence of an aerosol extinction value of
# greater than 0.006 km^-1
# NOTE: the wavelength to use as the filter is not specified in the documentation, so I have chosen the
# wavelength with the smallest extinction and therefore the strictest filtering
min_alt = (xr_data.Alt_Grid * (xr_data.Ext.sel(wavelength=1020) > 0.006)).max(dim='Alt_Grid')
ozone_good = ozone_good.where(xr_data.Alt_Grid > min_alt)
# Exclusion of all data points at altitude and below the occurrence of both the 525nm aerosol extinction
# value exceeding 0.001 km^-1 and the 525/1020 extinction ratio falling below 1.4
min_alt = (xr_data.Alt_Grid * ((xr_data.Ext.sel(wavelength=525) > 0.001) &
((xr_data.Ext.sel(wavelength=525) / xr_data.Ext.sel(
wavelength=1020)) < 1.4))).max(dim='Alt_Grid')
ozone_good = ozone_good.where(xr_data.Alt_Grid > min_alt)
# Exclusion of all data points below 35km an 200% or larger uncertainty estimate
no_good = (xr_data.O3_Err > 20000) & (xr_data.Alt_Grid < 35)
ozone_good = ~np.isnan(ozone_good.where(~no_good))
xr_data['ozone_filter'] = ozone_good
if self.filter_aerosol:
xr_data['Ext'] = xr_data.Ext.where(~xr_data.cloud_filter)
if self.filter_ozone:
xr_data['O3'] = xr_data.O3.where(ozone_good)
# drop aerosol if not requested
if self.filter_ozone and not ('aerosol' in self.species):
xr_data.drop(['Ext', 'Ext_Err', 'wavelength'])
if self.normalize_percent_error:
for var in xr_data.variables.keys():
if 'Err' in var: # put error units back into percent
xr_data[var] = (xr_data[var] / 100).astype('float32')
xr_data = xr_data.transpose('time', 'Alt_Grid', 'wavelength')
xr_data = self.apply_cf_conventions(xr_data)
if self.return_separate_flags:
return xr_data, xr.merge([index_flags, species_flags])
else:
return xr_data
def apply_cf_conventions(self, data):
attrs = {'time': {'standard_name': 'time'},
'Lat': {'standard_name': 'latitude',
'units': 'degrees_north'},
'Lon': {'standard_name': 'longitude',
'units': 'degrees_east'},
'Alt_Grid': {'units': 'km'},
'wavelength': {'units': 'nm',
'description': 'wavelength at which aerosol extinction is retrieved'},
'O3': {'standard_name': 'number_concentration_of_ozone_molecules_in_air',
'units': 'cm-3'},
'NO2': {'standard_name': 'number_concentration_of_nitrogen_dioxide_molecules_in_air',
'units': 'cm-3'},
'H2O': {'standard_name': 'number_concentration_of_water_vapor_in_air',
'units': 'cm-3'},
'Ext': {'standard_name': 'volume_extinction_coefficient_in_air_due_to_ambient_aerosol_particles',
'units': 'km-1'},
'O3_Err': {'standard_name': 'number_concentration_of_ozone_molecules_in_air_error',
'units': 'percent'},
'NO2_Err': {'standard_name': 'number_concentration_of_nitrogen_dioxide_molecules_in_air_error',
'units': 'percent'},
'H2O_Err': {'standard_name': 'number_concentration_of_water_vapor_in_air_error',
'units': 'percent'},
'Ext_Err': {'standard_name': 'volume_extinction_coefficient_in_air_due_to_ambient_aerosol_'
'particles_error',
'units': 'percent'},
'Duration': {'units': 'seconds',
'description': 'duration of the sunrise/sunset event'},
'Beta': {'units': 'degrees',
'description': 'angle between the satellite orbit plane and the sun'},
'Trop_Height': {'units': 'km'},
'Radius': {'units': 'microns'},
'SurfDen': {'units': 'microns2 cm-3'}}
for key in attrs.keys():
data[key].attrs = attrs[key]
git_repo = 'https://github.com/LandonRieger/pySAGE.git'
data.attrs = {'description': 'Retrieved vertical profiles of aerosol extinction, ozone, '
'nitrogen dioxide, water vapor, and meteorological profiles from SAGE II '
'version 7.00',
'publication reference': 'Damadeo, R. P., Zawodny, J. M., Thomason, L. W., & Iyer, N. (2013). '
'SAGE version 7.0 algorithm: application to SAGE II. Atmospheric '
'Measurement Techniques, 6(12), 3539-3561.',
'title': 'SAGE II version 7.00',
'date_created': pd.Timestamp.now().strftime('%B %d %Y'),
'source_code': 'repository: ' + git_repo + ' revision: ' + pysagereader.__version__,
'source_data': 'https://eosweb.larc.nasa.gov/project/sage2/sage2_v7_table',
'version': pysagereader.__version__,
'Conventions': 'CF-1.7'}
if self.cf_names:
names = {'Lat': 'latitude',
'Lon': 'longitude',
'Alt_Grid': 'altitude',
'Beta': 'beta_angle',
'Ext': 'aerosol_extinction',
'Ext_Err': 'aerosol_extinction_error',
'O3': 'ozone',
'O3_Err': 'ozone_error',
'NO2': 'no2',
'NO2_Err': 'no2_error',
'SurfDen': 'surface_area_density',
'SurfDen_Err': 'surface_area_density_error',
'radius': 'effective_radius',
'radius_err': 'effective_radius_error',
'Density': 'air_density',
'Density_Err': 'air_density_error',
'Type_Sat': 'satellite_sunset',
'Type_Tan': 'local_sunset',
'Trop_Height': 'tropopause_altitude',
'Duration': 'event_duration'}
for key in names.keys():
try:
data.rename({key: names[key]}, inplace=True)
except ValueError:
pass
return data
[docs] @staticmethod
def convert_index_bit_flags(data: Dict) -> xr.Dataset:
"""
Convert the int32 index flags to a dataset of distinct flags
Parameters
----------
data
Dictionary of input data as returned by ``load_data``
Returns
-------
Dataset of the index bit flags
"""
flags = dict()
flags['pmc_present'] = 0
flags['h2o_zero_found'] = 1
flags['h2o_slow_convergence'] = 2
flags['h2o_ega_failure'] = 3
flags['default_nmc_temp_errors'] = 4
flags['ch2_aero_model_A'] = 5
flags['ch2_aero_model_B'] = 6
flags['ch2_new_wavelength'] = 7
flags['incomplete_nmc_data'] = 8
flags['mirror_model'] = 15
flags['twomey_non_conv_rayleigh'] = 19
flags['twomey_non_conv_386_Aero'] = 20
flags['twomey_non_conv_452_Aero'] = 21
flags['twomey_non_conv_525_Aero'] = 22
flags['twomey_non_conv_1020_Aero'] = 23
flags['twomey_non_conv_NO2'] = 24
flags['twomey_non_conv_ozone'] = 25
flags['no_shock_correction'] = 30
f = dict()
for key in flags.keys():
f[key] = (data['InfVec'] & 2 ** flags[key]) > 0
xr_data = []
time = pd.to_timedelta(data['mjd'], 'D') + pd.Timestamp('1858-11-17')
for key in f.keys():
xr_data.append(xr.DataArray(f[key], coords=[time], dims=['time'], name=key))
return xr.merge(xr_data)
[docs] @staticmethod
def convert_species_bit_flags(data: Dict) -> xr.Dataset:
"""
Convert the int32 species flags to a dataset of distinct flags
Parameters
----------
data
Dictionary of input data as returned by `load_data`
Returns
-------
Dataset of the index bit flags
"""
flags = dict()
flags['separation_method'] = [0, 1, 2]
flags['one_chan_aerosol_corr'] = 3
flags['no_935_aerosol_corr'] = 4
flags['Large_1020_OD'] = 5
flags['NO2_Extrap'] = 6
flags['Water_vapor_ratio'] = [7, 8, 9, 10]
flags['Cloud_Bit_1'] = 11
flags['Cloud_Bit_2'] = 12
flags['No_H2O_Corr'] = 13
flags['In_Troposphere'] = 14
separation_method = dict()
separation_method['no_aerosol_method'] = 0
separation_method['trans_no_aero_to_five_chan'] = 1
separation_method['standard_method'] = 2
separation_method['trans_five_chan_to_low'] = 3
separation_method['four_chan_method'] = 4
separation_method['trans_four_chan_to_three_chan'] = 5
separation_method['three_chan_method'] = 6
separation_method['extension_method'] = 7
f = dict()
for key in flags.keys():
if hasattr(flags[key], '__len__'):
if key == 'separation_method':
for k in separation_method.keys():
temp = data['ProfileInfVec'] & np.sum([2 ** k for k in flags[key]])
f[k] = temp == separation_method[k]
else:
temp = data['ProfileInfVec'] & np.sum([2 ** k for k in flags[key]])
f[key] = temp >> flags[key][0] # shift flag to save only significant bits
else:
f[key] = (data['ProfileInfVec'] & 2 ** flags[key]) > 0
xr_data = []
time = pd.to_timedelta(data['mjd'], 'D') + pd.Timestamp('1858-11-17')
for key in f.keys():
xr_data.append(xr.DataArray(f[key], coords=[time, data['Alt_Grid'][0:140]], dims=['time', 'Alt_Grid'],
name=key))
return xr.merge(xr_data)