Linz post-processing examples

import os
import pandas as pd
import toto
from toto.inputs.linz import LINZfile
from toto.core.totoframe import TotoFrame
from toto.filters.despike_phasespace3d import despike_phasespace3d
from toto.filters.lanczos_filter import lanczos_filter
from toto.filters.detrend import detrend
import numpy as np
import matplotlib.pyplot as plt
import requests
import zipfile
import datetime
import copy

Link to lINZ files

BASEURL='https://sealevel-data.linz.govt.nz/tidegauge/%s/%i/%i/%s_%i_%s.zip'
#BASEURL='https://sealevel-data.linz.govt.nz/tidegauge/AUCT/2009/40/AUCT_40_2009085.zip

Station to download

tstart=datetime.datetime(2019,1,1)
tend=datetime.datetime(2020,1,1)
station='AUCT'
sensor=40
if not os.path.isfile('AUCT_40_2019001.csv'):
    # Download Linz elevation file from `tstart` to `tend` at `station` tidal gauge
    dt=copy.deepcopy(tstart)
    files=[]
    while dt<tend:
        fileout='%s_%03i.zip' % (station,dt.timetuple().tm_yday)
        linzurl=BASEURL % (station,dt.year,sensor,station,sensor,str(dt.year)+'%03i'%dt.timetuple().tm_yday)
        linzfile = requests.get(linzurl, allow_redirects=True)
        if linzfile.status_code != 404:
            files.append(fileout)
            with open(fileout, 'wb') as fd:
                for chunk in linzfile.iter_content(chunk_size=128):
                    fd.write(chunk)
        dt+=datetime.timedelta(days=1)

    #%%
    # Download AUCKLAND station README
    fileout='%s_readme.txt' % station
    linzurl='https://sealevel-data.linz.govt.nz/tidegauge/%s/%s_readme.txt' % (station,station)
    linzfile = requests.get(linzurl, allow_redirects=True)
    with open(fileout, 'wb') as fd:
        fd.write(linzfile.content)

    #%%
    # Unzip the all files and save to file
    filenames=[]
    for file in files:
        with zipfile.ZipFile(file) as z:
            filenames.append(z.namelist()[0])
            z.extractall()

    #%%
    # Merge all timeseries into 1
    with open(filenames[0], 'w') as outfile:
        for fname in filenames[1:]:
            with open(fname) as infile:
                outfile.write(infile.read())

Reading the files into a dataframe df=LINZfile(filenames[0])._toDataFrame()[0]

df=LINZfile('AUCT_40_2019001.csv')._toDataFrame()[0]

Out:

Line SUMMARY OF TIDE GAUGE ZERO not found

plot the raw timeseries

df.rename(columns={'elev'+str(sensor):'elev'},inplace=True)
plt.plot(df.index,df['elev'])
plt.show(block=False)
plot linz exemple

Add the Panda Dataframe to a Totoframe. The reason is so if anyhting changes to the dataframe, the metadata get saved in a sperate dictionary. Also the dataframe gets clean and any gaps in the data get filled with NaN. The timeserie is now with a uniform time interval

tf=TotoFrame()
tf.add_dataframe([df],[station])
df=tf[list(tf.keys())[0]]['dataframe']

Resample to hourly otherwise the next steps might crash

df = df.resample('1H').nearest()

Apply a phase-space method filter to remove most of the spike

df['filtered']=despike_phasespace3d(df['elev'])
plt.plot(df.index,df['filtered'])
plt.show(block=False)
plot linz exemple

Remove the rest of the spike if needed

Now the timeseries is clean will start extracting the component

del df['elev']
df.rename(columns={'filtered':'elev'},inplace=True)

Detrending but don’t think there is much to detrend Before detrending we store the position of all the gaps

f = np.where(np.isnan(df.elev.values) == 1)
# We fill gaps using the mean
df.fillna(df.elev.mean(), inplace=True)
# Get the detrended time series
df['et'] = detrend(df['elev'],args={'Type':'linear'})
# Strore the trend
df['trend'] = df['elev']-df['et']

the tidal analysis

lat=tf[list(tf.keys())[0]]['latitude']
tmp=df.TideAnalysis.detide(mag='et',\
                                args={'minimum SNR':2,\
                                      'latitude':lat,
                                      'constit': 'auto'
                                     })

df['tide']=tmp['ett'].copy()
# Replace the gap filled data with tidal elevation
df['et'].values[f] = df['tide'].values[f]

Out:

solve: matrix prep ... solution ... diagnostics ... done.
prep/calcs ... done.

Monthly sea level analysis using lanczos filter

df['msea'] = lanczos_filter(df['et'], args={'window':24*30,'Type':'lanczos lowpas 2nd order'})

# We subtract that component to what is left of the signal
df['et'] = df['et'] - df['msea']

Storm surgeanalysis using lanczos filter

df['ss'] = lanczos_filter(df['et'], args={'window':40,
    'Type':'lanczos lowpas 2nd order'})
# We subtract that component to what is left of the signal and get the residual
df['et'] = df['et'] - df['ss']

Finally we subtract the tide to get the residual

df['res'] = df['et'] = df['et'] - df['tide']
for key in df.keys():
    if key!='time':
        df[key].values[f] = np.nan

Plot the results

fig = plt.figure()
ax=plt.subplot(111)
plt.title(station)
variables_to_plot=['elev','trend','tide','msea','ss','res']
for v in variables_to_plot:
    plt.plot(df.index,df[v],label=v)


plt.legend()
fig.autofmt_xdate()
plt.show(block=False)
AUCT

Water elevation fit the distribution

df.Extreme.distribution_shape(mag='ss',\
        args={'Fitting distribution':'Weibull',#'Weibull','Gumbel','GPD','GEV'
         'method':'ml',#'pkd','pwm','mom', 'ml',
         'threshold type':'percentile', # 'percentile' or 'value'
         'threshold value':95.0,
         'minimum number of peaks over threshold': 4,
         'minimum time interval between peaks (h)':2.0,
         'time blocking':'Annual',#'Annual',Seasonal (South hemisphere)' ,'Seasonal (North hemisphere)','Monthly'
         'Display peaks':'Off',#'On' or 'Off'
         'Display CDFs':'On',#'On' or 'Off'
         })
  • Annual
  • Annual: Omni

Out:

FitDistribution:
alpha = 0.05
method = ml
LLmax = 23.15105550486007
LPSmax = -51.51073426408082
pvalue = 0.11421246906897149
par = [1.14123187 0.09934601 0.07528642]
par_lower = [0.69650123 0.09934601 0.03863236]
par_upper = [1.58596251 0.09934601 0.11194048]
par_fix = [nan, 0.09934601253759345, nan]
par_cov = [[0.05148704 0.         0.00141487]
 [0.         0.         0.        ]
 [0.00141487 0.         0.00034974]]

Total running time of the script: ( 0 minutes 27.995 seconds)

Gallery generated by Sphinx-Gallery