#!/usr/bin/env python
u"""
smooth.py
Written by Tyler Sutterley (01/2023)
Computes a moving average of a time-series using three possible routines:
1) centered moving average
2) 13-month Loess filter (default)
3) 13-month Loess filter weighted and outputs for all dates
Note: due to the missing months in the GRACE/GRACE-FO time series,
a standard moving average will have problems if the
missing months are not interpolated.
CALLING SEQUENCE:
smth = gravity_toolkit.time_series.smooth(t_in, d_in, HFWTH=6)
INPUTS:
t_in: input time array
d_in: input data array
OUTPUTS:
time: time after removing start and end half-windows
data: smoothed time-series
season: seasonal component calculated by the Loess filter
annual: annual component calculated by the Loess filter
semiann: semi-annual component calculated by the Loess filter
trend: instantaneous trend calculated by the Loess filter
error: estimated error of the instantaneous trend
noise: noise component after removing the Loess trend and seasonal components
reduce: original time series after removing start and end half-windows
OPTIONS:
MOVING: calculates centered moving average using mean of window
mean of: (January up to December) and (February up to January)
WEIGHT: smoothing algorithm that backward models dates before
half-width and forward models dates after half-width
0: use unweighted Loess filter
1: use linear weights with Loess filter
2: use gaussian weights with Loess filter
HFWTH: half-width of the moving average (default = 6 for 13-month Loess)
DATA_ERR: input error for known and equal errors (single value)
STDEV: standard deviation of output error
CONF: confidence interval of output error (default is for 95%)
PYTHON DEPENDENCIES:
numpy: Scientific Computing Tools For Python (https://numpy.org)
scipy: Scientific Tools for Python (https://docs.scipy.org/doc/)
UPDATE HISTORY:
Updated 01/2023: refactored time series analysis functions
Updated 04/2022: updated docstrings to numpy documentation format
Updated 05/2021: define int/float precision to prevent deprecation warning
Updated 07/2020: added function docstrings
Updated 10/2019: changing Y/N flags to True/False. output amplitudes
Updated 09/2019: calculate and output annual and semi-annual phase
Updated 08/2018: use implicit import of scipy stats and special
Updated 09/2017: using rcond=-1 in numpy least-squares algorithms
Updated 09/2014: added output for the reduced time-series
Updated 06/2014: added parameter DATA_ERR for known and equal errors
Updated 03/2014: created a new smoothing algorithm
similar to Loess-type least-squares algorithm but has
backward models dates before HFWTH and forward models dates after
if all dates are available will be centrally weighted
need to update to include error and should find a reference
as i came up with this algorithm at 4:30am
Updated 02/2014: minor update to if statements
Updated 09/2013: switched MOVING flag to Y/N
Minor change: P_cons, and P_lin changed to P_x0, and P_x1
Updated 06/2013: added error for instantaneous trend
Updated 04/2013: converted to python and added more outputs
Updated 02/2013: using a centered moving average
added seasonal option to compute the smooth seasonal variation
calculated by the loess filter program.
added option to compute the noise component after removing the
smoothed trend and the seasonal component
Updated 03/2012: added Loess smoothing following Velicogna (2009)
Written 12/2011
"""
import numpy as np
import scipy.stats
import scipy.special
[docs]
def smooth(t_in, d_in, HFWTH=6, MOVING=False, DATA_ERR=0, WEIGHT=0,
STDEV=0, CONF=0):
"""
Computes the moving average of a time-series
1) centered moving average
2) 13-month Loess filter :cite:p:`Velicogna:2009ft`
3) 13-month Loess filter weighted and outputs for all dates
Parameters
----------
t_in: float
input time array
d_in: float
input data array
HFWTH: int
half-width of the moving average
MOVING: bool, default False
calculates centered moving average using mean of window
WEIGHT: smoothing algorithm that backward models dates before
half-width and forward models dates after half-width
- ``0``: use unweighted Loess filter
- ``1``: use linear weights with Loess filter
- ``2``: use gaussian weights with Loess filter
DATA_ERR: float or list
input error for known and equal errors
STDEV: float, default 0
Standard deviation of output error
CONF: float, default 0
Confidence interval of output error
Returns
-------
time: float
time after removing start and end half-windows
data: float
smoothed time-series
season: float
seasonal component calculated by the Loess filter
annual: float
annual component calculated by the Loess filter
semiann: float
semi-annual component calculated by the Loess filter
trend: float
instantaneous trend calculated by the Loess filter
error: float
estimated error of the instantaneous trend
noise: float
noise component after removing the Loess trend and seasonal components
reduce: float
original time series after removing start and end half-windows
"""
# remove singleton dimensions
t_in = np.squeeze(t_in)
d_in = np.squeeze(d_in)
nmax = len(t_in)
# Indice with start of seasonal terms:
SEAS = 2
# set either the standard deviation or the confidence interval
if (STDEV != 0):
# Setting the standard deviation of the output error
alpha = 1.0 - scipy.special.erf(STDEV/np.sqrt(2.0))
elif (CONF != 0):
# Setting the confidence interval of the output error
alpha = 1.0 - CONF
else:
# Default is 95% confidence interval
alpha = 1.0 - (0.95)
# moving average algorithm
if MOVING:
# Centered moving average using the mean of each window
# equal to mean of Jan:Dec and Feb:Jan+1 for HFWTH 6
# problematic with GRACE due to missing months within time-series
# output time
tout = t_in[HFWTH:nmax-HFWTH]
smth = np.zeros((nmax-2*HFWTH))
for k in range(0, (nmax-(2*HFWTH))):
# centered moving average sum[2:i-1] + 0.5[1] + 0.5[i]
smth[k] = np.sum(d_in[k+1:k+2*HFWTH]) + 0.5*(d_in[k]+d_in[k+2*HFWTH])
dsmth = smth/(2*HFWTH)
return {'data':dsmth, 'time':tout}
elif WEIGHT in (1,2):
# weighted moving average calculated from the least-squares of window
# and removing An/SAn signal. models entire range of dates
# for a HFWTH of 6 (remove annual)
# will fit linear model to data for 13 months
# creates a weight array ranging from 1:HFWTH+1:-1 for linear
# or a gaussian function centered on HFWTH
# which favors the regression with the date centered
# smoothed time-series = sum(smth*weights)/sum(weights)the weight array
# output time = input time
tout = np.copy(t_in)
if (WEIGHT == 1):
# linear weights (range from 1:HFWTH+1:-1)
wi = np.concatenate((np.arange(1,HFWTH+2,dtype=np.float64),
np.arange(HFWTH,0,-1,dtype=np.float64)),axis=0)
elif (WEIGHT == 2):
# gaussian weights
# default standard deviation of 2
stdev = 2.0
# gaussian function over range 2*HFWTH
# centered on HFWTH
xi=np.arange(0, 2*HFWTH+1)
wi=np.exp(-(xi-HFWTH)**2/(2.0*stdev**2))/(stdev*np.sqrt(2.0*np.pi))
dsmth = np.zeros((nmax))
dseason = np.zeros((nmax))
dannual = np.zeros((nmax))
annamp = np.zeros((nmax))
annphase = np.zeros((nmax))
dsemian = np.zeros((nmax))
semiamp = np.zeros((nmax))
semiphase = np.zeros((nmax))
weight = np.zeros((nmax))
for i in range(0, (nmax-(2*HFWTH))):
ran = i + np.arange(0, 2*HFWTH+1)
P_x0 = np.ones((2*HFWTH+1))# Constant Term
P_x1 = t_in[ran]# Linear Term
# Annual term = 2*pi*t*harmonic
P_asin = np.sin(2*np.pi*t_in[ran])
P_acos = np.cos(2*np.pi*t_in[ran])
#Semi-Annual = 4*pi*t*harmonic
P_ssin = np.sin(4*np.pi*t_in[ran])
P_scos = np.cos(4*np.pi*t_in[ran])
# x0,x1,AS,AC,SS,SC
TMAT = np.array([P_x0, P_x1, P_asin, P_acos, P_ssin, P_scos])
TMAT = np.transpose(TMAT)
# Least-Squares fitting
# (the [0] denotes coefficients output)standard
beta_mat = np.linalg.lstsq(TMAT,d_in[ran],rcond=-1)[0]
# Calculating the output components
# add weighted smoothed time series
dsmth[ran] += wi*np.dot(TMAT[:,0:SEAS],beta_mat[0:SEAS])
# seasonal component
dseason[ran] += wi*np.dot(TMAT[:,SEAS:],beta_mat[SEAS:])
# annual component
AS,AC = beta_mat[SEAS:SEAS+2]
dannual[ran] += wi*np.dot(TMAT[:,SEAS:SEAS+2],[AS,AC])
annamp[ran] += wi*np.sqrt(AS**2 + AC**2)
annphase[ran] += wi*np.arctan2(AC,AS)*180.0/np.pi
# semi-annual component
SS,SC = beta_mat[SEAS+2:SEAS+4]
dsemian[ran] += wi*np.dot(TMAT[:,SEAS+2:SEAS+4],[SS,SC])
semiamp[ran] += wi*np.sqrt(SS**2 + SC**2)
semiphase[ran] += wi*np.arctan2(SC,SS)*180.0/np.pi
# add weights
weight[ran] += wi
# divide weighted smoothed time-series by weights
# to get output smoothed time-series
dsmth /= weight
dseason /= weight
dannual /= weight
annamp /= weight
annphase /= weight
dsemian /= weight
semiamp /= weight
semiphase /= weight
# noise = data - smoothed - seasonal
dnoise = d_in - dsmth - dseason
return {'data':dsmth, 'seasonal':dseason, 'annual':dannual,
'annamp':annamp, 'annphase':annphase, 'semiann':dsemian,
'semiamp':semiamp, 'semiphase':semiphase, 'noise':dnoise,
'time':tout, 'weight':weight}
else:
# Moving average calculated from least-squares of window
# and removing An/SAn signal
# output time
tout = t_in[HFWTH:nmax-HFWTH]
dsmth = np.zeros((nmax-2*HFWTH))
dtrend = np.zeros((nmax-2*HFWTH))
derror = np.zeros((nmax-2*HFWTH))
dseason = np.zeros((nmax-2*HFWTH))
dannual = np.zeros((nmax-2*HFWTH))
annamp = np.zeros((nmax-2*HFWTH))
annphase = np.zeros((nmax-2*HFWTH))
dsemian = np.zeros((nmax-2*HFWTH))
semiamp = np.zeros((nmax-2*HFWTH))
semiphase = np.zeros((nmax-2*HFWTH))
dnoise = np.zeros((nmax-2*HFWTH))
dreduce = np.zeros((nmax-2*HFWTH))
for i in range(0, (nmax-(2*HFWTH))):
ran = i + np.arange(0, 2*HFWTH+1)
P_x0 = np.ones((2*HFWTH+1))# Constant Term
P_x1 = t_in[ran]# Linear Term
# Annual term = 2*pi*t*harmonic
P_asin = np.sin(2*np.pi*t_in[ran])
P_acos = np.cos(2*np.pi*t_in[ran])
#Semi-Annual = 4*pi*t*harmonic
P_ssin = np.sin(4*np.pi*t_in[ran])
P_scos = np.cos(4*np.pi*t_in[ran])
# x0,x1,AS,AC,SS,SC
TMAT = np.array([P_x0, P_x1, P_asin, P_acos, P_ssin, P_scos])
TMAT = np.transpose(TMAT)
# Least-Squares fitting
# (the [0] denotes coefficients output)
beta_mat = np.linalg.lstsq(TMAT,d_in[ran],rcond=-1)[0]
n_terms = len(beta_mat)
if (DATA_ERR != 0):
# LEAST-SQUARES CASE WITH KNOWN AND EQUAL ERROR
P_err = DATA_ERR*np.ones((2*HFWTH+1))
Hinv = np.linalg.inv(np.dot(np.transpose(TMAT),TMAT))
# Normal Equations
NORMEQ = np.dot(Hinv,np.transpose(TMAT))
beta_err = np.zeros((n_terms))
for n in range(0,n_terms):
beta_err[n] = np.sqrt(np.sum((NORMEQ[n,:]*P_err)**2))
else:
# Error Analysis
# Degrees of Freedom
nu = (2*HFWTH+1) - n_terms
# Mean square error
MSE = np.dot(np.transpose(d_in[ran] - np.dot(TMAT,beta_mat)),
(d_in[ran] - np.dot(TMAT,beta_mat)))/nu
# Covariance Matrix
# Multiplying the design matrix by itself
Hinv = np.linalg.inv(np.dot(np.transpose(TMAT),TMAT))
# Taking the diagonal components of the cov matrix
hdiag = np.diag(Hinv)
# STANDARD LEAST-SQUARES CASE
# Regression with Errors with Unknown Standard Deviations
# Student T-Distribution with D.O.F. nu
# t.ppf parallels tinv in matlab
tstar = scipy.stats.t.ppf(1.0-(alpha/2.0),nu)
# beta_err is the error for each coefficient
# beta_err = t(nu,1-alpha/2)*standard error
st_err = np.sqrt(MSE*hdiag)
beta_err = tstar*st_err
# Calculating the output components
# smoothed time series
dsmth[i] = np.dot(TMAT[HFWTH,0:SEAS],beta_mat[0:SEAS])
dtrend[i] = np.copy(beta_mat[1])# Instantaneous data trend
derror[i] = np.copy(beta_err[1])# Error in trend
# seasonal component
dseason[i] = np.dot(TMAT[HFWTH,SEAS:],beta_mat[SEAS:])
# annual component
AS,AC = beta_mat[SEAS:SEAS+2]
dannual[i] = np.dot(TMAT[HFWTH,SEAS:SEAS+2],[AS,AC])
annphase[i] = np.arctan2(AC,AS)*180.0/np.pi
annamp[i] = np.sqrt(AS**2 + AC**2)
# semi-annual component
SS,SC = beta_mat[SEAS+2:SEAS+4]
dsemian[i] = np.dot(TMAT[HFWTH,SEAS+2:SEAS+4],[SS,SC])
semiamp[i] = np.sqrt(SS**2 + SC**2)
semiphase[i] = np.arctan2(SC,SS)*180.0/np.pi
# noise component
dnoise[i] = d_in[i+HFWTH] - dsmth[i] - dseason[i]
# reduced time-series
dreduce[i] = d_in[i+HFWTH]
return {'data':dsmth, 'trend':dtrend, 'error':derror,
'seasonal':dseason, 'annual':dannual, 'annphase':annphase,
'annamp':annamp, 'semiann':dsemian, 'semiamp':semiamp,
'semiphase':semiphase, 'noise':dnoise, 'time':tout, 'reduce':dreduce}