Source code for jsmetrics.metrics.jet_statistics_components

# -*- coding: utf-8 -*-

"""
Algorithms and calculations for the metrics used to
identify or classify jet-stream in the literature.

This file is in order of publish year of the metrics
(see details_for_all_metrics.py)

Functions and Classes ordered by the year of the paper that uses each metric component first
"""

# imports
import numpy as np
import xarray as xr
import scipy.fftpack
import scipy.interpolate
import scipy.optimize
from jsmetrics.utils import data_utils

# docs
__author__ = "Thomas Keel"
__email__ = "thomasjames.keel@gmail.com"
__status__ = "Development"


[docs] def calc_atmospheric_mass_at_kPa( pressure, gravity=9.81, atmospheric_area=5.1e8 ): """ Will calculate the atmospheric mass at a given pressure level. Radius of earth (R) = 6.372E3 km; Surface area of earth = 4 Pi R^2 = 5.1E8 km^2 Component of method from Archer & Caldiera (2008) https://doi.org/10.1029/2008GL033614 & Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 Parameters --------------- pressure : float in kPa gravity : float m/s^2 """ return (pressure / gravity) * atmospheric_area
[docs] def get_atm_mass_at_one_hPa(hPa): """ Get atmospheric mass at one pressure level. Component of method from Archer & Caldiera (2008) https://doi.org/10.1029/2008GL033614 & Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 Parameters ---------- hPa : int or float One pressure level in hPa Returns ---------- atm_mass : int or float Atmospheric mass at given hPa pressure level """ kPa = hPa / 10 atm_mass = calc_atmospheric_mass_at_kPa(kPa) return atm_mass
[docs] def get_weighted_average_at_one_pressure_level( data, pressure_level, atm_mass, ws_col ): """ Get weighted average wind speed at one pressure level. Component of method from Archer & Caldiera (2008) https://doi.org/10.1029/2008GL033614 & Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 Parameters ---------- data : xarray.Dataset Data containing windspeed ('ws') or u-, v-component wind pressure_level : int or float Pressure level to subset atm_mass : int or float Atmospheric mass at given hPa pressure level ws_col : string Name of column to calculate weighted average from (e.g. 'ws', 'ua', 'va') Returns ---------- output : xarray.Dataset Data with weighted average at a single pressure level """ return atm_mass * (data[ws_col].sel(plev=pressure_level))
[docs] def get_mass_weighted_average_wind(data, ws_col, plev_flux=False): """ Get mass-weighted average wind-speed from 'u', 'v' component wind or 'wind vector'. Component of method from Archer & Caldiera (2008) https://doi.org/10.1029/2008GL033614 & Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 Parameters ---------- data : xarray.Dataset Data containing u- and v-component wind ws_col : string Name of column to calculate weighted average from (e.g. 'ws', 'ua', 'va') Returns ---------- output : xarray.Dataset Data containing mass weighted average ws Usage ---------- mon_mean = data.groupby("time.month").mean() mass_weighted_average = jetstream_metrics_utils.get_mass_weighted_average_ws(mon_mean) """ data_utils.check_at_least_n_plevs_in_data(data, n_plevs=2) sum_weighted_ws = None ( hPa_multiplying_factor, _, ) = get_hPa_or_mbar_and_Pa_or_bar_multiplying_factors_for_plev(data) for plev_step in data["plev"].data: plev_hPa = plev_step * hPa_multiplying_factor atm_mass = get_atm_mass_at_one_hPa(plev_hPa) weighted_average = get_weighted_average_at_one_pressure_level( data, plev_step, atm_mass, ws_col ) if sum_weighted_ws is None: if plev_flux: sum_weighted_ws = weighted_average * plev_hPa else: sum_weighted_ws = weighted_average else: if plev_flux: sum_weighted_ws += weighted_average * plev_hPa else: sum_weighted_ws += weighted_average return sum_weighted_ws
[docs] def get_sum_atm_mass(data): """ Sum atmospheric mass for given pressure levels. Component of method from Archer & Caldiera (2008) https://doi.org/10.1029/2008GL033614 & Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 Parameters ---------- data : xarray.Dataset Data containing plev Returns ---------- sum_atm_mass : int or float Sum of atmospheric mass """ sum_atm_mass = 0 data_utils.check_at_least_n_plevs_in_data(data, n_plevs=2) ( hPa_multiplying_factor, _, ) = get_hPa_or_mbar_and_Pa_or_bar_multiplying_factors_for_plev(data) for plev_level in data["plev"].data: plev_hPa = plev_level * hPa_multiplying_factor atm_mass = get_atm_mass_at_one_hPa(plev_hPa) sum_atm_mass += atm_mass return sum_atm_mass
[docs] def get_hPa_or_mbar_and_Pa_or_bar_multiplying_factors_for_plev(data): """ Will get the multiplying factor for plev units. E.g. if data contains plev with the unit: Pa the two multiplying factors will be 0.01 and 1 E.g. if data contains plev with the unit: hPa the two multiplying factors will be 1 and 100 Component of method from Archer & Caldiera (2008) https://doi.org/10.1029/2008GL033614 & Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 Parameters ---------- data : xr.DataArray or xr.Dataset Data containing plev coord and units for plev Returns ---------- hPa_multiplying_factor : int Factor to multiply plev units to convert them to hPa Pa_multiplying_factor : int Factor to multiply plev units to convert them to Pa """ hPa_multiplying_factor = 1 Pa_multiplying_factor = 1 plev_units = data_utils.check_plev_units( data, ["Pa", "hPa", "mbar", "millibars", "bars"] ) if plev_units in ["hPa", "mbar", "millibars"]: Pa_multiplying_factor *= 100 else: hPa_multiplying_factor /= 100 return hPa_multiplying_factor, Pa_multiplying_factor
[docs] def calc_mass_weighted_average(data, ws_col): """ Calculate mass weighted average wind-speed. Component of method from Archer & Caldiera (2008) https://doi.org/10.1029/2008GL033614 & Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 Parameters ---------- data : xarray.Dataset Data containing plev ws_col : string Name of column to calculate weighted average from (e.g. 'ws', 'ua', 'va') Returns ---------- weighted_average : xr.DataArray Data with weighted average windspeed based on sum atmospheric mass """ sum_atm_mass = get_sum_atm_mass(data) sum_weighted_ws = get_mass_weighted_average_wind(data, ws_col) weighted_average = sum_weighted_ws / sum_atm_mass return weighted_average
[docs] def calc_mass_flux_weighted_pressure(data, ws_col): """ Calculate mass-flux weighted pressure. Component of method from Archer & Caldiera (2008) https://doi.org/10.1029/2008GL033614 Parameters ---------- data : xarray.Dataset Data containing windspeed and plev ws_col : string Name of column to calculate weighted average from (e.g. 'ws', 'ua', 'va') Returns ---------- mass_flux_weighted_pressure : xr.DataArray Data with mass flux weighted pressure """ sum_weighted_ws = get_mass_weighted_average_wind(data, ws_col=ws_col) sum_weighted_ws_plev_flux = get_mass_weighted_average_wind( data, ws_col=ws_col, plev_flux=True ) mass_flux_weighted_pressure = sum_weighted_ws_plev_flux / sum_weighted_ws return mass_flux_weighted_pressure
[docs] def calc_mass_flux_weighted_latitude(data, lat_min, lat_max, ws_col): """ Calculate mass-flux weighted latitude. Component of method from Archer & Caldiera (2008) https://doi.org/10.1029/2008GL033614 NOTE: Problem with including 1000 hPa Parameters ---------- data : xarray.Dataset Data containing windspeed and lat lat_min : int or float Minimum latitude to consider for weighted latitude lat_max : int or float Maximum latitude to consider for weighted latitude ws_col : string Name of column to calculate weighted average from (e.g. 'ws', 'ua', 'va') Returns ---------- mass_flux_weighted_latitude : xr.DataArray Data with mass flux weighted latitude """ assert "lat" in data.coords, "'lat' needs to be in data.coords" sub_data = data.sel(lat=slice(lat_min, lat_max)) sum_weighted_lat_flux = None sum_weighted_ws_by_lat = None for lat in sub_data["lat"].data: lat_data = sub_data.sel(lat=lat, method="nearest") lat_sum_weighted_ws = get_mass_weighted_average_wind(lat_data, ws_col) if sum_weighted_lat_flux is None: sum_weighted_ws_by_lat = lat_sum_weighted_ws sum_weighted_lat_flux = lat_sum_weighted_ws * lat else: sum_weighted_ws_by_lat += lat_sum_weighted_ws sum_weighted_lat_flux += lat_sum_weighted_ws * lat mass_flux_weighted_latitude = ( sum_weighted_lat_flux / sum_weighted_ws_by_lat ) return mass_flux_weighted_latitude
[docs] def get_climatology(data, freq): """ Makes a climatology at given interval (i.e. days, months, season) Parameters ---------- data : xarray.Dataset data with regular time stamp freq : str 'day', 'month' or 'season' Returns ---------- climatology : xarray.Dataset Climatology of a given frequency Usage ---------- climatology = get_climatology(data, 'month') """ climatology = data.groupby("time.%s" % (freq)).mean("time") return climatology
[docs] def calc_low_pass_weights(window, cutoff): """ Calculate weights for a low pass Lanczos filter. A low-pass filter removes short-term random fluctations in a time series Component of method from Woollings et al (2010) http://dx.doi.org/10.1002/qj.625 & Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 TAKEN FROM: https://scitools.org.uk/iris/docs/v1.2/examples/graphics/SOI_filtering.html Parameters ---------- window : int The length of the filter window. cutoff : float The cutoff frequency in inverse time steps. Returns ---------- output : numpy.array filtered weights """ order = ((window - 1) // 2) + 1 nwts = 2 * order + 1 w = np.zeros([nwts]) n = nwts // 2 w[n] = 2 * cutoff k = np.arange(1.0, n) sigma = np.sin(np.pi * k / n) * n / (np.pi * k) firstfactor = np.sin(2.0 * np.pi * cutoff * k) / (np.pi * k) w[n - 1 : 0 : -1] = firstfactor * sigma w[n + 1 : -1] = firstfactor * sigma return w[0 + (window % 2) : -1] # edited from w[1:-1]
[docs] def apply_lanczos_filter(dataarray, filter_freq, window_size): """ Will carry out Lanczos low-pass filter Component of method from Woollings et al (2010) http://dx.doi.org/10.1002/qj.625 & Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 Parameters ---------- datarray : xarray.DataArray Data to apply filter to containing zonal mean (u-component) wind filter_freq : int number of days in filter window_size : int number of days in window for Lancoz filter Returns ---------- window_cons : xarray.DataArray Filtered zonal mean data """ if window_size <= filter_freq or filter_freq <= 0 or window_size <= 0: raise ValueError( "Lanczos filter frequency cannot be larger than window size and both need to be more than 0" ) if not xr.infer_freq(dataarray["time"]) == "D": print( f"Warning: The 'apply_lanczos_filter' function was built to work on a datetime index of freq 'D'. Frequency in data is '{xr.infer_freq(dataarray['time'])}'" ) assert isinstance( dataarray, xr.DataArray ), "Input data needs to be a data array" try: start_date = dataarray["time"].astype(np.datetime64)[0] filter_end_date = start_date + np.timedelta64( filter_freq, "D" ) # add filter end date for check if data is outside the filter size window_end_date = start_date + np.timedelta64( window_size, "D" ) # add window end date for check if data is outside the filter size end_date = dataarray["time"].astype(np.datetime64)[-1] except (ValueError, TypeError): # makes assumption that if times not coerced into numpy datetime then is 360 start_date = dataarray["time"].values[0] end_date = dataarray["time"].values[-1] if not hasattr(start_date, "calendar"): raise ValueError( f"Error: datetime type inputted ({start_date}) cannot be coerced into numpy.datetime64 and is not in cftime format." ) if start_date.calendar == "360_day": datetime_added_func_to_use = ( data_utils.add_num_of_days_to_360Datetime ) elif start_date.calendar == "noleap": datetime_added_func_to_use = ( data_utils.add_num_of_days_to_NoLeapDatetime ) else: raise ValueError( f"Error: datetime type inputted ({start_date}) cannot be coerced into numpy.datetime64 and is not in cftime format." ) filter_end_date = datetime_added_func_to_use( start_date, num_of_days_to_add=filter_freq ) window_end_date = datetime_added_func_to_use( start_date, num_of_days_to_add=window_size ) # add filter day for check if data is outside the filter size if window_end_date > end_date or filter_end_date > end_date: raise ValueError( f"Time series is too short to apply {window_size} day window for Lanczos filter frequency of {filter_freq} days" ) # As the data may not actually be in day format, it will return the number of values within window in data i.e. if in monthly format but filter window is 60, then actual filter size will be 2 actual_filter_freq = dataarray.sel( time=slice(start_date, filter_end_date) ).time.size actual_window_size = dataarray.sel( time=slice(start_date, window_end_date) ).time.size lanczos_weights = calc_low_pass_weights( actual_window_size, 1 / actual_filter_freq ) lanczos_weights_arr = xr.DataArray(lanczos_weights, dims=["window"]) window_cons = ( dataarray.rolling(time=len(lanczos_weights_arr), center=True) .construct("window") .dot(lanczos_weights_arr) ) return window_cons
[docs] def get_latitude_and_speed_where_max_ws(data_row): """ Will return the latitude and windspeed at the index of maximum wind speed from a row of data TODO: expects only one cell to be max Component of method from Woollings et al (2010) http://dx.doi.org/10.1002/qj.625 & Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 Barnes & Simpson (2017) https://doi.org/10.1175/JCLI-D-17-0299.1 & Grise & Polvani (2016) https://doi.org/10.1002/2015JD024687 Parameters ---------- data_row : xarray.DataArray Data of single time unit containing u-component wind Returns ---------- lat_at_max : int or float Latitude of maximum windspeed speed_at_max : int or float Speed at latitude of maximum windspeed """ try: assert hasattr(data_row, "isnull") except Exception as e: raise AttributeError("input needs to have isnull method.") from e if not data_row.isnull().all(): data_row = data_row.fillna(0.0) max_ws = data_row.where( np.abs(data_row) == np.abs(data_row).max(), drop=True ).squeeze() if max_ws.size > 1: print( "'get_latitude_and_speed_where_max_ws method': Warning: more than one max value found, picking the first occurence!" ) max_ws = max_ws[0].max() lat_at_max = float(max_ws["lat"]) speed_at_max = float(max_ws.data) return lat_at_max, speed_at_max else: return np.nan, np.nan
[docs] def calc_latitude_and_speed_where_max_ws(data, var_col="ua"): """ Will return the latitude and windspeed at the index of maximum wind speed from a row of data TODO: expects only one cell to be max Component of method from Woollings et al (2010) http://dx.doi.org/10.1002/qj.625 & Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 Barnes & Simpson (2017) https://doi.org/10.1175/JCLI-D-17-0299.1 & Grise & Polvani (2016) https://doi.org/10.1002/2015JD024687 Parameters ---------- dataarray : xarray.dataset Dataset containing u-component wind Returns ---------- jet_lats : Latitude of maximum windspeed jet_speeds : int or float Speed at latitude of maximum windspeed """ inds = np.nanargmax(data[var_col], axis=data[var_col].get_axis_num("lat")) jet_lats = data[var_col]["lat"][inds] data["jet_lat"] = (("time",), jet_lats.data) data["jet_speed"] = data[var_col].max(dim="lat") # cleans up missing values data = data.dropna("time", subset=["jet_lat", "jet_speed"]) return data
[docs] def assign_jet_lat_and_speed_to_data( data, max_lat_ws, max_lats_col="jet_lat", max_ws_col="jet_speed" ): """ Will return a data array with the maximum windspeed and latitude of that maximum wind speed Component of method from Woollings et al (2010) http://dx.doi.org/10.1002/qj.625 & Barnes & Simpson 2017 https://doi.org/10.1175/JCLI-D-17-0299.1 Parameters ---------- data : xarray.DataArray Data of single time unit containing u-component wind max_lat_ws : np.array Array of maximum latitude and windspeed at those maximums for each timeunit Returns ---------- data_with_max_lats_ws : xarray.Dataset Data with maximum latitude and windspeed attached """ max_lats = max_lat_ws[:, 0] max_ws = max_lat_ws[:, 1] data_with_max_lats_ws = data.assign( {max_lats_col: (("time"), max_lats), max_ws_col: (("time"), max_ws)} ) data_with_max_lats_ws[max_lats_col] = data_with_max_lats_ws[ max_lats_col ].astype(float) data_with_max_lats_ws[max_ws_col] = data_with_max_lats_ws[ max_ws_col ].astype(float) return data_with_max_lats_ws
[docs] def apply_low_freq_fourier_filter(data, highest_freq_to_keep): """ Carries out a Fourier transform for filtering keeping only low frequencies. Component of method from Woollings et al (2010) http://dx.doi.org/10.1002/qj.625 ADAPTED FROM: https://scipy-lectures.org/intro/scipy/auto_examples/plot_fftpack.html Parameters ---------- data : np.array - 1-d time series data at regular intervals highest_freq_to_keep : int highest frequency to keep in the fourier transform expression NOTE: starts at 0, so highest_freq_to_keep=1 will only keep the constant and first expresion Returns ---------- filtered_sig : np.array Fourier-filtered signal Usage ---------- # Apply filter of the two lowest frequencies apply_low_freq_fourier_filter(data, highest_freq_to_keep=2) """ # Fast Fourier Transform on the time series data fourier_transform = scipy.fftpack.fft(data) # Remove low frequencies fourier_transform[highest_freq_to_keep + 1 :] = 0 # Inverse Fast Fourier Transform the time series data back filtered_sig = scipy.fftpack.ifft(fourier_transform) return filtered_sig
[docs] def assign_filtered_lats_and_ws_to_data( data, filtered_max_lats, filtered_max_ws, dim ): """ Assigns the filtered data back to the returned dataset Component of method from Woollings et al (2010) http://dx.doi.org/10.1002/qj.625 Parameters ---------- data : xarray.Dataset Data to have fourier filtered data assigned to filtered_max_lats : numpy.array Fourier-filtered maximum latitude by given timeunit filtered_max_lats : numpy.array Fourier-filtered maximum speed at maximum latitude by given timeunit Returns ---------- filtered_data : xarray.Dataset Data with fourier-filtered maximum lat and windspeed attached to it """ filtered_data = data.assign( { "ff_jet_lat": ((dim), filtered_max_lats), "ff_jet_speed": ((dim), filtered_max_ws), } ) filtered_data["ff_jet_lat"] = filtered_data["ff_jet_lat"].astype(float) filtered_data["ff_jet_speed"] = filtered_data["ff_jet_speed"].astype(float) return filtered_data
[docs] def calc_jet_width_for_one_day(data_row, jet_lat, jet_speed): """ Calculates jet width on a given data array using the calculated jet lat and speed at that lat Component of method from Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 Parameters ---------- data_row : xarray.DataArray Data of single time unit containing u-component wind jet_lat : int or float Latitude of jet jet_speed : int or float Speed of jet at jet_lat Returns ---------- jet_widths : array-like Width of jet-stream in latitude. Units for latitude are the same as the input units i.e. likely to be 'degrees_north' """ lat_resolution = float(data_row["lat"][1] - data_row["lat"][0]) if not jet_speed: return np.nan possible_surrounding_jet_vals = ( get_possible_surrounding_jet_lat_vals_for_one_day(data_row, jet_speed) ) if possible_surrounding_jet_vals.size == data_row["lat"].size: print("No jet-width determined") return np.nan possible_surrounding_jet_lat_vals = possible_surrounding_jet_vals["lat"] jet_width = get_width_of_jet_from_surrounding_lat_vals( jet_lat, possible_surrounding_jet_lat_vals, lat_resolution ) return jet_width
[docs] def get_possible_surrounding_jet_lat_vals_for_one_day( one_day_wind_data, jet_speed ): """ Returns array of windspeed values that are within half speed of the jet_speed given. Component of method from Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 Parameters ---------- one_day_wind_data : xarray.DataArray Data of single time unit containing u-component wind jet_speed : int or float Speed of jet at jet_lat Returns ---------- possible_surrounding_jet_vals : array-like DataArray of True or False as to whether the windspeed value is within half speed of the jet_speed """ half_jet_speed = jet_speed / 2 possible_surrounding_jet_vals = one_day_wind_data[ one_day_wind_data > half_jet_speed ] return possible_surrounding_jet_vals
[docs] def get_width_of_jet_from_surrounding_lat_vals( jet_lat, possible_surrounding_jet_lat_vals, lat_resolution ): """ Get latitude width of jet around the given jet latitude value. Width given in units of latitude Component of method from Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 Parameters ---------- jet_lat : int or float Latitude of jet possible_surrounding_jet_lat_vals : array-like DataArray of lats where the windspeed value is within half speed of the jet_speed lat_resolution : int or float Latitude resolution in degrees Returns ---------- jet_width : array-like Width of jet-stream in latitude. Unit for latitude are the same as the input units i.e. likely to be 'degrees_north' """ lats_within_jet = get_all_surrounding_jet_lats_around_max_lat( jet_lat, possible_surrounding_jet_lat_vals, lat_resolution ) if not lats_within_jet: return np.nan jet_width = max(lats_within_jet) - min(lats_within_jet) return jet_width
[docs] def get_all_surrounding_jet_lats_around_max_lat( jet_lat, possible_surrounding_jet_lat_vals, lat_resolution ): """ Returns all the latitudes in a continuous group (i.e. within one unit of lat resolution) around selected jet (jet_lat). Component of method from Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 Parameters ---------- jet_lat : int or float Latitude of jet possible_surrounding_jet_lat_vals : array-like DataArray of lats where the windspeed value is within half speed of the jet_speed lat_resolution : int or float Latitude resolution in degrees Returns ---------- lats_within_jet : list All latitudes in continous group (i.e. within lat_resolution) around jet_lat """ lats_within_jet = [] lats_at_higher_lats = check_lats_within_jet_in_one_direction( jet_lat, possible_surrounding_jet_lat_vals, lat_resolution ) lats_at_lower_lats = check_lats_within_jet_in_one_direction( jet_lat, possible_surrounding_jet_lat_vals, -lat_resolution ) lats_within_jet.extend(lats_at_higher_lats) lats_within_jet.extend(lats_at_lower_lats) return lats_within_jet
[docs] def check_lats_within_jet_in_one_direction( jet_lat, possible_surrounding_jet_lat_vals, amount_to_add_to_lat_val ): """ Creates a list of latitudes that are within a given distance from the jet_lat and only in one direction i.e. increasing or decreasing Component of method from Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 Parameters ---------- jet_lat : int or float Latitude of jet possible_surrounding_jet_lat_vals : array-like DataArray of lats where the windspeed value is within half speed of the jet_speed amount_to_add_to_lat_val : int or float Check if lat within this amount Returns ---------- lats_within_jet : list All latitudes in continous group (i.e. within lat_resolution) around jet_lat """ lats_within_jet = [] current_lat = jet_lat current_lat_is_within_range = True while current_lat_is_within_range: if current_lat in possible_surrounding_jet_lat_vals: lats_within_jet.append(current_lat) else: current_lat_is_within_range = False current_lat += amount_to_add_to_lat_val return lats_within_jet
[docs] def scale_lat_vals_with_quadratic_func(lats, speeds, scaled_lats): """ Will downscale or upscale the resolution of latitude using a quadratic func. Component of method from Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 & Grise & Polvani (2016) https://doi.org/10.1002/2015JD024687 Parameters ---------- lats : xr.DataArray or array-like Array of latitude values speeds : xr.DataArray or array-like Array of wind-speeds scaled_lats : array-like Array of scaled latitude values of a given resolution (see rescale_lat_resolution function) Returns ---------- scaled_lat_vals : xr.DataArray or array-like Array of rescaled latitude values based scaled_lats """ scaled_lat_vals = apply_quadratic_func(lats, speeds, scaled_lats) return scaled_lat_vals
[docs] def get_latitude_and_speed_where_max_ws_at_reduced_resolution( lats_and_ws, lat_resolution ): """ Get latitude and speed where max windspeed at a reduced resolution. Makes use of the quadratic func to scale latitude values. Component of method from Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 & Grise & Polvani (2016) https://doi.org/10.1002/2015JD024687 Parameters ---------- lats_and_ws : xr.DataArray or array-like Array of latitudes and windspeeds lat_resolution : int or float Latitude resolution in degrees Returns ---------- output : numpy.array latitude and wind-speed value scaled by quadratic func """ lats, ws = lats_and_ws # Remove numpy.nan from list lats = [lat for lat in lats if not np.isnan(lat)] ws = [s for s in ws if not np.isnan(s)] # Scale lats scaled_lats = data_utils.rescale_lat_resolution(lats, lat_resolution) scaled_lat_vals = scale_lat_vals_with_quadratic_func(lats, ws, scaled_lats) decimal_places = data_utils.get_num_of_decimal_places(lat_resolution) max_speed_at_scaled_lat = np.max(scaled_lat_vals) return ( round(scaled_lats[np.argmax(scaled_lat_vals)], decimal_places), max_speed_at_scaled_lat, )
[docs] def get_3_latitudes_and_speed_around_max_ws(row): """ Will get the latitudes neighbouring to east, at and to west where the max windspeed is found Component of method from Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 & Grise & Polvani (2016) https://doi.org/10.1002/2015JD024687 Parameters -------------- row : xr.DataArray or array-like Array containing latitude and wind-speed values Returns ---------- neighbouring_lats : array-like array of 3 neighbouring latitude coordinates around where maximum windspeed is found in input (row) neighbouring_speeds : array-like array of 3 neighbouring winspeeds values around where maximum windspeed is found in input (row) """ assert "lat" in row.coords, "'lat' needs to be in data.coords" max_lat, _ = get_latitude_and_speed_where_max_ws(row) if np.isnan(max_lat): # occurs when no data in slice return ( np.array([np.nan, np.nan, np.nan], dtype="float64"), np.array([np.nan, np.nan, np.nan], dtype="float64"), ) max_lat_argwhere = np.argwhere(row["lat"].data == max_lat)[0][0] if max_lat_argwhere == 0: neighbouring_lats_ind = [max_lat_argwhere, max_lat_argwhere + 1] elif max_lat_argwhere == row["lat"].data.size - 1: neighbouring_lats_ind = [max_lat_argwhere - 1, max_lat_argwhere] else: neighbouring_lats_ind = [ max_lat_argwhere - 1, max_lat_argwhere, max_lat_argwhere + 1, ] neighbouring_lats = row["lat"].isel(lat=neighbouring_lats_ind).data neighbouring_speeds = row.isel(lat=neighbouring_lats_ind).data # add nan value to edges if len(neighbouring_lats) < 3: neighbouring_lats = add_nan_value_to_arr_if_not_len_enough( neighbouring_lats, len_required=3 ) neighbouring_speeds = add_nan_value_to_arr_if_not_len_enough( neighbouring_speeds, len_required=3 ) return (neighbouring_lats, neighbouring_speeds)
[docs] def add_nan_value_to_arr_if_not_len_enough(arr, len_required): """ Add NaN value to array if any values in the array so it is a certain length Component of method from Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 & Grise & Polvani (2016) https://doi.org/10.1002/2015JD024687 Parameters ---------- arr : array-like Array to check. len_required : int length of array required Returns ---------- arr : array-like new array """ assert ( len(arr) <= len_required ), "length of input array needs to be less or same as len_required" if len(arr) != len_required: arr = np.append(arr, np.nan) return add_nan_value_to_arr_if_not_len_enough(arr, len_required) return arr
[docs] def get_3_neighbouring_coord_values(coord_val, coord_resolution): """ Get the three neighbouring values to a given input coord (coord_val) Component of method from Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 & Grise & Polvani (2016) https://doi.org/10.1002/2015JD024687 Parameters ---------- coord_val : int or float Central coord value to get neighbours from coord_resolution : int or float in degrees Returns ---------- output : array-like array of 3 neighbouring latitude coordinates Usage ---------- get_3_neighbouring_coord_values(45.0, 1.25) >>> np.array([43.75, 45.0, 46.25]) """ if not isinstance(coord_val, float) or not isinstance( coord_resolution, float ): coord_val = float(coord_val) coord_resolution = float(coord_resolution) return np.array( [coord_val - coord_resolution, coord_val, coord_val + coord_resolution] )
[docs] def quadratic_func(x, y): """ Quadratic function. Component of method from Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 & Grise & Polvani (2016) https://doi.org/10.1002/2015JD024687 Parameters ---------- x : xr.DataArray or array-like Array 1 y : xr.DataArray or array-like Array 2 """ p = np.polyfit(x, y, deg=2) return p
[docs] def apply_quadratic_func(x, y, vals): """ Apply quadratic function to an array of values (vals). Component of method from Barnes & Polvani (2013) https://doi.org/10.1175/JCLI-D-12-00536.1 & Grise & Polvani (2016) https://doi.org/10.1002/2015JD024687 Parameters ---------- x : xr.DataArray or array-like Array 1 y : xr.DataArray or array-like Array 2 vals : array-like Values to apply function to Returns ---------- output : array-like quadratic function output """ a, b, c = quadratic_func(x, y) return (a * vals**2) + (b * vals) + c
[docs] def get_jet_lat_and_speed_using_parabola_by_day(data_row): """ Will get jet latitude and speed by fitting a parabola and taking maximum Component of method from Barnes & Polvani (2015) http://journals.ametsoc.org/doi/10.1175/JCLI-D-14-00589.1 Parameters ---------- data_row : xarray.DataArray Data of single time unit containing u-component wind Returns ---------- data_row : xarray.DataArray Data of single time unit containing jet_lat and jet_speed variables """ data_ua = abs(data_row["ua"].dropna(dim="lat")) try: # Try and fit the parabola _, coeff = fit_parabola(data_ua["lat"].data, data_ua.data) except (RuntimeError, ValueError): coeff = [np.nan, np.nan] # Check that the max lat is not above the min or max (problem with using a parabola) if coeff[0] < data_row["lat"].min() or coeff[0] > data_row["lat"].max(): coeff = [np.nan, np.nan] data_row["jet_lat"] = float(coeff[0]) data_row["jet_speed"] = float(coeff[1]) return data_row
[docs] def fit_parabola(x, y): """ Fits a parabola. Component of method from Barnes & Polvani (2015) http://journals.ametsoc.org/doi/10.1175/JCLI-D-14-00589.1 """ coeff, _ = scipy.optimize.curve_fit(parabola, x.flatten(), y.flatten()) fitted_parabola = parabola(x, coeff[0], coeff[1], coeff[2]) return fitted_parabola, coeff
[docs] def parabola(x, lat, speed, a): """ Parabola. Component of method from Barnes & Polvani (2015) http://journals.ametsoc.org/doi/10.1175/JCLI-D-14-00589.1 """ return -a * (x - lat) ** 2 + speed
[docs] def assign_jet_lat_speed_to_ten_day_mean_data(ten_day_mean): """ Joins the values for latitude and windspeed of the point of maximum windspeed for a given sector to the ten_day_mean data. Component of method from Barnes & Simpson 2017 https://doi.org/10.1175/JCLI-D-17-0299.1 Parameters ---------- ten_day_mean : xarray.Dataset Data containing u-component wind and resampled by 10 days Returns ---------- ten_day_mean : xarray.Dataset Data with jet latitude and windspeed """ max_lats_ws = np.array( list(map(get_latitude_and_speed_where_max_ws, ten_day_mean["ua"])) ) ten_day_mean = assign_jet_lat_and_speed_to_data(ten_day_mean, max_lats_ws) return ten_day_mean
[docs] def assign_ten_day_average_jet_lat_speed_to_data(data, ten_day_mean): """ Joins the values for latitude and windspeed of the point of maximum windspeed to the main data Component of method from Barnes & Simpson 2017 https://doi.org/10.1175/JCLI-D-17-0299.1 Parameters ---------- data : xarray.Dataset Data containing u-component wind Returns ---------- output : xarray.Dataset Data with jet latitude and windspeed """ return data.assign( { "jet_lat": (("10_day_average"), ten_day_mean["jet_lat"].data), "jet_speed": (("10_day_average"), ten_day_mean["jet_speed"].data), } )
[docs] def cubic_spline_interpolation(x, y): """ Runs a cubic spline interpolation using 2 equal-length arrays. Component of method from Bracegirdle et al (2018) https://doi.org/10.1175/JCLI-D-17-0320.1 Parameters ---------- x : xarray.DataArray or array-like array 1 to run cubic spline interpolation y : xarray.DataArray or array-like array 2 to run cubic spline interpolation Returns ---------- cubic_interpolation : np.array Cubic spline interpolation """ return scipy.interpolate.interp1d( x, y, kind="cubic", fill_value="extrapolate" )
[docs] def run_cubic_spline_interpolation_to_get_max_lat_and_ws( data, lat_resolution, ua_col="ua" ): """ Runs a cubic spline interpolation to find maximum latitude and maximum windspeed at a given resolution of latitude. Component of method from Bracegirdle et al (2018) https://doi.org/10.1175/JCLI-D-17-0320.1 Parameters ---------- data : xr.Dataset must contain coords lat lat_resolution : int or float Latitude resolution in degrees for cubic spline interpolation ua_col : str u-component windspeed column (default='ua') Returns ---------- max_lat : float latitude with the maximum wind-speed for time period as deterimined by cubic spline interpolation to a given lat resolution max_ws : float maximum wind-speed at the given latitude for time period """ scaled_lats = data_utils.rescale_lat_resolution( data["lat"], lat_resolution ) csi = cubic_spline_interpolation(data["lat"], data[ua_col]) interpolated_ws = csi(scaled_lats) max_lat = scaled_lats[np.argmax(interpolated_ws)] max_ws = max(interpolated_ws) return max_lat, max_ws
[docs] def run_cubic_spline_interpolation_for_each_unit_of_climatology_to_get_max_lat_and_ws( data, lat_resolution, time_col ): """ Runs a cubic spline interpolation to find maximum latitude and maximum windspeed at a given resolution of latitude for each. Component of method from Bracegirdle et al (2018) https://doi.org/10.1175/JCLI-D-17-0320.1 Parameters ---------- data : xarray.Dataset Data containing u-component wind-speed. In the case of Bracegirdle et al. 2018, uses a seasonal and annual mean/climatology resolution : int or float Latitude resolution in degrees for cubic spline interpolation time_col : str Column name containing period (i.e. season, year, month, etc.) Returns ---------- max_lats : list list of latitudes with the maximum wind-speed max_ws : list list of maximum wind-speed at the given latitudes Usage ---------- seasonal_climatology = jetstream_metrics_components.get_climatology(data, "season") seasonal_zonal_mean = seasonal_climatology.mean("lon") ( seasonal_max_lats, seasonal_max_ws, ) = jetstream_metrics_utils.run_cubic_spline_interpolation_for_each_unit_of_climatology_to_get_max_lat_and_ws( seasonal_zonal_mean, resolution=0.075, time_col="season" ) """ max_lats = [] max_ws = [] for period in data[time_col].data: period_data = data.sel({time_col: period}) lat, ws = run_cubic_spline_interpolation_to_get_max_lat_and_ws( period_data, lat_resolution=lat_resolution ) max_lats.append(lat) max_ws.append(ws) return max_lats, max_ws
[docs] def calc_centroid_jet_lat_from_zonal_mean(zonal_mean, area_by_lat): """ Will get the centroid latitude of the u-component wind using: jet_lat = integral(60deg, 30deg)(zonal_u**2*lat) dlat / integral(60deg, 30deg)(zonal_u**2) dlat Component of method from Ceppi et al (2018) https://doi.org/10.1175/JCLI-D-17-0323.1 & Zappa et al. 2018 https://doi.org/10.1029/2019GL083653 Parameters ---------- zonal_mean : xarray.Dataset zonally-average data containing u-component wind data area_by_lat : xarray.DataArray Information on area in each latitude (i.e. m2 per latitude) """ u_hat_by_lat = zonal_mean["ua"] ** 2 * zonal_mean["lat"] * area_by_lat u_hat = zonal_mean["ua"] ** 2 * area_by_lat return u_hat_by_lat.sum("lat") / u_hat.sum("lat")
[docs] def get_latitude_value_in_data_row(data_row, lat_val): """ Will return the latitude value of a given data row. Needed to loop through data and external array Component of method from Ceppi et al (2018) https://doi.org/10.1175/JCLI-D-17-0323.1 & Zappa et al. 2018 https://doi.org/10.1029/2019GL083653 Parameters ---------- data_row : xarray.DataArray Data row to extract variable from using given latitude lat_val : float or int latitude value to extract from data row """ return float(data_row.sel(lat=lat_val, method="nearest"))
[docs] def get_moving_averaged_smoothed_jet_lats_for_one_day( data_row, width_of_pulse=10 ): """ Get moving average of smoothed_jet_lats for one day. Used in combination with a groupby mapping. Component of method from Kerr et al. (2020) https://onlinelibrary.wiley.com/doi/10.1029/2020JD032735 Parameters ---------- data_row : xarray.DataArray Data containing u-component windspeed of one time unit Returns ---------- smoothed_jet_lat : xarray.Dataset Data containing jet-stream position """ data_row["jet_lat"] = get_jet_lat_by_lon(data_row["ua"]) data_row["smoothed_jet_lat"] = ( smooth_jet_lat_across_lon_with_rectangular_pulse( data_row["jet_lat"], width_of_pulse=width_of_pulse ) ) return data_row
[docs] def smooth_jet_lat_across_lon_with_rectangular_pulse( jet_lat_data, width_of_pulse ): """ Smooth jet position (jet latitude) by carrying out a convolution with a rectangular pulse. Component of method from Kerr et al. (2020) https://onlinelibrary.wiley.com/doi/10.1029/2020JD032735 Parameters ---------- jet_lat_data : xarray.DataArray Data detailing the latitude of jet-stream at each longitude of one time unit width_of_pulse : float or int Width to make rectangular pulse (likely to be in 'degrees_east') Returns ---------- filtered_data : xarray.DataArray Data detailing the latitude of jet-stream that has been smoothed using rectangular pulse """ sig = jet_lat_data["lon"] % jet_lat_data < width_of_pulse filtered_data = jet_lat_data[sig] return filtered_data
[docs] def get_jet_lat_by_lon(data_row): """ Gets all the latitudes of maximum wind-speeds by longitude. Component of method from Kerr et al. (2020) https://onlinelibrary.wiley.com/doi/10.1029/2020JD032735 Parameters ---------- data_row : xarray.DataArray Data containing u-component windspeed of one time unit Returns ---------- output : xarray.DataArray Data detailing the latitude of jet-stream at each longitude of one time unit """ if "time" in data_row.dims: data_row = data_row.squeeze("time") argmax_lat_indices = xr.DataArray( dims=("lon",), data=np.nanargmax(data_row, axis=data_row.get_axis_num("lat")), ) return data_row["lat"][argmax_lat_indices]