{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Linz post-processing examples\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import os\nimport pandas as pd\nimport toto\nfrom toto.inputs.linz import LINZfile\nfrom toto.core.totoframe import TotoFrame\nfrom toto.filters.despike_phasespace3d import despike_phasespace3d\nfrom toto.filters.lanczos_filter import lanczos_filter\nfrom toto.filters.detrend import detrend\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport requests\nimport zipfile\nimport datetime\nimport copy"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Link to lINZ files\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "BASEURL='https://sealevel-data.linz.govt.nz/tidegauge/%s/%i/%i/%s_%i_%s.zip'\n#BASEURL='https://sealevel-data.linz.govt.nz/tidegauge/AUCT/2009/40/AUCT_40_2009085.zip"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Station to download\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "tstart=datetime.datetime(2019,1,1)\ntend=datetime.datetime(2020,1,1)\nstation='AUCT'\nsensor=40"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "if not os.path.isfile('AUCT_40_2019001.csv'):\n    # Download Linz elevation file from `tstart` to `tend` at `station` tidal gauge\n    dt=copy.deepcopy(tstart)\n    files=[]\n    while dt<tend:\n        fileout='%s_%03i.zip' % (station,dt.timetuple().tm_yday)\n        linzurl=BASEURL % (station,dt.year,sensor,station,sensor,str(dt.year)+'%03i'%dt.timetuple().tm_yday)\n        linzfile = requests.get(linzurl, allow_redirects=True)\n        if linzfile.status_code != 404:\n            files.append(fileout)\n            with open(fileout, 'wb') as fd:\n                for chunk in linzfile.iter_content(chunk_size=128):\n                    fd.write(chunk)\n        dt+=datetime.timedelta(days=1)\n\n    #%%\n    # Download AUCKLAND station README\n    fileout='%s_readme.txt' % station\n    linzurl='https://sealevel-data.linz.govt.nz/tidegauge/%s/%s_readme.txt' % (station,station)\n    linzfile = requests.get(linzurl, allow_redirects=True)\n    with open(fileout, 'wb') as fd:\n        fd.write(linzfile.content)\n\n    #%%\n    # Unzip the all files and save to file\n    filenames=[]\n    for file in files:\n        with zipfile.ZipFile(file) as z:\n            filenames.append(z.namelist()[0])\n            z.extractall()\n\n    #%%\n    # Merge all timeseries into 1\n    with open(filenames[0], 'w') as outfile:\n        for fname in filenames[1:]:\n            with open(fname) as infile:\n                outfile.write(infile.read())"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Reading the files into a dataframe\ndf=LINZfile(filenames[0])._toDataFrame()[0]\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "df=LINZfile('AUCT_40_2019001.csv')._toDataFrame()[0]"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "plot the raw timeseries\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "df.rename(columns={'elev'+str(sensor):'elev'},inplace=True)\nplt.plot(df.index,df['elev'])\nplt.show(block=False)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Add the Panda Dataframe to a Totoframe.\nThe reason is so if anyhting changes to the dataframe,\nthe metadata get saved in a sperate dictionary.\nAlso the dataframe gets clean and any gaps in the data get filled with NaN.\nThe timeserie is now with a uniform time interval\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "tf=TotoFrame()\ntf.add_dataframe([df],[station])\ndf=tf[list(tf.keys())[0]]['dataframe']"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Resample to hourly otherwise the next steps might crash\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "df = df.resample('1H').nearest()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Apply a phase-space method filter to remove most of the spike \n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "df['filtered']=despike_phasespace3d(df['elev'])\nplt.plot(df.index,df['filtered'])\nplt.show(block=False)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Remove the rest of the spike if needed\n\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Now the timeseries is clean will start extracting the component\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "del df['elev']\ndf.rename(columns={'filtered':'elev'},inplace=True)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Detrending but don't think there is much to detrend\nBefore detrending we store the position of all the gaps\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "f = np.where(np.isnan(df.elev.values) == 1)\n# We fill gaps using the mean\ndf.fillna(df.elev.mean(), inplace=True)\n# Get the detrended time series\ndf['et'] = detrend(df['elev'],args={'Type':'linear'})\n# Strore the trend\ndf['trend'] = df['elev']-df['et']"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "the tidal analysis\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "lat=tf[list(tf.keys())[0]]['latitude']\ntmp=df.TideAnalysis.detide(mag='et',\\\n                                args={'minimum SNR':2,\\\n                                      'latitude':lat,\n                                      'constit': 'auto'\n                                     })\n\ndf['tide']=tmp['ett'].copy()\n# Replace the gap filled data with tidal elevation\ndf['et'].values[f] = df['tide'].values[f]"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Monthly sea level analysis using lanczos filter\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "df['msea'] = lanczos_filter(df['et'], args={'window':24*30,'Type':'lanczos lowpas 2nd order'})\n\n# We subtract that component to what is left of the signal\ndf['et'] = df['et'] - df['msea']"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Storm surgeanalysis using lanczos filter\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "df['ss'] = lanczos_filter(df['et'], args={'window':40,\n    'Type':'lanczos lowpas 2nd order'})\n# We subtract that component to what is left of the signal and get the residual\ndf['et'] = df['et'] - df['ss']"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Finally we subtract the tide to get the residual\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "df['res'] = df['et'] = df['et'] - df['tide']"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "for key in df.keys():\n    if key!='time':\n        df[key].values[f] = np.nan"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Plot the results\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "fig = plt.figure()\nax=plt.subplot(111)\nplt.title(station)\nvariables_to_plot=['elev','trend','tide','msea','ss','res']\nfor v in variables_to_plot:\n    plt.plot(df.index,df[v],label=v)\n\n\nplt.legend()\nfig.autofmt_xdate()\nplt.show(block=False)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "Water elevation fit the distribution\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "df.Extreme.distribution_shape(mag='ss',\\\n        args={'Fitting distribution':'Weibull',#'Weibull','Gumbel','GPD','GEV'\n         'method':'ml',#'pkd','pwm','mom', 'ml',\n         'threshold type':'percentile', # 'percentile' or 'value'\n         'threshold value':95.0,\n         'minimum number of peaks over threshold': 4,\n         'minimum time interval between peaks (h)':2.0,\n         'time blocking':'Annual',#'Annual',Seasonal (South hemisphere)' ,'Seasonal (North hemisphere)','Monthly'\n         'Display peaks':'Off',#'On' or 'Off'\n         'Display CDFs':'On',#'On' or 'Off'\n         })"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.8.10"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}