Source code for toto.filters.despike_phasespace3d
""" Removes spike noise from Acoustic Doppler
Velocimetry (ADV) data using phase-space method, using modified Goring and Nikora (2002) method by Nobuhito Mori (2005).
Further modified by Joseph Ulanowski to remove offset in output (2014).
Parameters
~~~~~~~~~~
input_array : Panda Obj
The Panda dataframe.
Examples:
~~~~~~~~~
>>> df['phasespace3d']=despike_phasespace3d.despike_phasespace3d(df['signal'].copy())
>>>
"""
import numpy as np
[docs]def despike_phasespace3d( input_array,args={}) :
# --- initial setup
# number of maximum iternation
f=input_array.to_numpy(copy=True)
n_iter = 20
n_out = 999
n = f.shape[0]
f_mean = 0# % do not calculate f_mean here, as it will be affected by spikes (was: f_mean = nanmean(fi);)
lamba = np.sqrt(2*np.log(n));
# --- loop
n_loop = 1;
while (n_out!=0) & (n_loop <= n_iter):
# step 0
f_mean=f_mean+np.nanmean(f) # accumulate offset value at each step [J.U.]
f = f - np.nanmean(f)
# step 1: first and second derivatives
f_t = np.gradient(f,axis=0)
f_tt = np.gradient(f_t,axis=0)
# step 2: estimate angle between f and f_tt axis
if n_loop==1:
theta = np.arctan2( np.nansum(f*f_tt), np.nansum(f**2) )
#step 3: checking outlier in the 3D phase space
xp,yp,zp,ip,coef = func_excludeoutlier_ellipsoid3d(f,f_t,f_tt,theta)
# --- excluding data
n_nan_1 = np.nonzero(np.isnan(f))[0].shape[0]
f[ip] = np.nan
n_nan_2 = np.nonzero(np.isnan(f))[0].shape[0]
n_out = n_nan_2 - n_nan_1;
# --- end of loop
n_loop += 1
# --- post process
go = f + f_mean; # add offset back
input_array.values[:]=go
return input_array
[docs]def func_excludeoutlier_ellipsoid3d(xi,yi,zi,theta):
""" %
% This program excludes the points outside of ellipsoid in two-
% dimensional domain
"""
n = np.max(xi.shape)
lamba = np.sqrt(2*np.log(n))
xp = [];
yp = [];
zp = [];
ip = [];
# --- rotate data
if theta == 0:
X = xi
Y = yi
Z = zi
else:
R = np.array([ [np.cos(theta), 0, np.sin(theta)],[ 0, 1, 0],[ -np.sin(theta), 0, np.cos(theta)]])
X = xi*R[0,0] + yi*R[0,1] + zi*R[0,2]
Y = xi*R[1,0] + yi*R[1,1] + zi*R[1,2]
Z = xi*R[2,0] + yi*R[2,1] + zi*R[2,2]
# --- preprocess
a = lamba*np.nanstd(X)
b = lamba*np.nanstd(Y)
c = lamba*np.nanstd(Z)
# --- main
m = -1
for i in range(0,n):
x1 = X[i]
y1 = Y[i]
z1 = Z[i]
# point on the ellipsoid
x2 = a*b*c*x1/np.sqrt((a*c*y1)**2+b**2*(c**2*x1**2+a**2*z1**2))
y2 = a*b*c*y1/np.sqrt((a*c*y1)**2+b**2*(c**2*x1**2+a**2*z1**2))
zt = c**2* ( 1 - (x2/a)**2 - (y2/b)**2 )
if z1 < 0:
z2 = -np.sqrt(zt)
elif z1 > 0:
z2 = np.sqrt(zt)
else:
z2 = 0
# check outlier from ellipsoid
dis = (x2**2+y2**2+z2**2) - (x1**2+y1**2+z1**2)
if dis < 0 :
m = m + 1
ip.append( i)
xp.append(xi[i])
yp.append(yi[i])
zp.append(zi[i])
coef=np.ndarray((3,))
coef[0] = a
coef[1] = b
coef[2] = c
return xp,yp,zp,ip,coef
if __name__ == '__main__':
import pandas as pd
import matplotlib.pyplot as plt
x = np.arange(0,10*np.pi,0.1)
y= np.sin(x)
y[99]=100
y[150]=-44
df = pd.DataFrame(y, columns=list('e'))
plt.plot(df['e'],'b')
go=func_despike_phasespace3d(df)
plt.plot(go['e'],'r')
plt.show()