Source code for toto.plugins.plots.plot_roses

import pandas as pd
import os
from ._do_roses import do_roses
from ._do_bias_hist import do_bias_hist
from ._do_density_diagramm import do_density_diagramm
from ._do_perc_of_occurence import do_perc_of_occurence
from ._do_qq_plot import qq_plot
from ._thermocline import thermocline
from toto.plugins.statistics._do_joint_prob import _do_joint_prob_plot
from ...core.toolbox import get_opt,dir_interval,get_increment
import numpy as np


[docs]@pd.api.extensions.register_dataframe_accessor("StatPlots") class StatPlots: def __init__(self, pandas_obj): # self._validate(pandas_obj) self.data = pandas_obj self.dfout = pd.DataFrame(index=self.data.index.copy())
[docs] def plot_roses(self,mag='mag',drr='drr',\ args={'title':'Current speed',\ 'units':'m/s',\ 'speed bins (optional)':[], '% quadran (optional)':[], 'time blocking':{'Annual':True,'Seasonal (South hemisphere)':False,'Seasonal (North hemisphere)':False,'Monthly':False}, 'folder out':os.getcwd(), 'display':True }): """ This function provides annual, seasonal or monthly rose plots for wind, wave, current or any direcional variable. This function is using https://github.com/python-windrose/windrose Parameters ~~~~~~~~~~ mag : str Name of the column from which to get stats. drr : str Column name representing the directions. args: dict Dictionnary with the folowing keys: title: str Graph title speed bins (optional): list Speed to plot % quadran (optional): list Percentage of each occurence to plot display: str `On` or `Off` to display image folder out: str Path to save the output time blocking: str if ``Time blocking=='Annual'``, Statistics will be calculated for the whole timeserie if ``Time blocking=='Seasonal (South hemisphere)'``, Statistics will be calculated for South hemisphere seasons if ``Time blocking=='Seasonal (North hemisphere)'``, Statistics will be calculated for North hemisphere seasons if ``Time blocking=='Monthly'``, Statistics will be calculated for each month. Examples: ~~~~~~~~~ >>> df=tf['test1']['dataframe'].StatPlots.plot_roses(mag='U',drr='drr',args={'time blocking':'Yearly'}) >>> """ display=args.get('display',True) # if args.get('display','Off')=='Off': # display=False unit=get_opt(self.data[mag],'units',args['units']) if not hasattr(self.data,'filename'): self.data.filename='' filename=os.path.join(args.get('folder out',os.getcwd()),os.path.splitext(self.data.filename)[0]+'_rose_'+mag+'.png') do_roses(self.data.index,self.data[mag],self.data[drr],unit,args['title'], args['speed bins (optional)'],args['% quadran (optional)'],args['time blocking'],filename,display)
[docs] def BIAS_histogramm(self,measured='measured',modelled='modelled', args={'Nb of bins':30,'Xlabel':'','units':'','display':{'On':True,'Off':False},'folder out':os.getcwd()}): """ This function provides a bias histogramm between measured and predicted data for any variables Parameters ~~~~~~~~~~ measured : str Name of the column to contain the measured data. modelled : str Name of the column to contain the modelled data. args: dict Dictionnary with the folowing keys: Nb of bins: int Number of bins to use Xlabel: str X axis label Units: str Units of the data display: str `On` or `Off` to display image folder out: str Path to save the output Examples: ~~~~~~~~~ >>> df=tf['test1']['dataframe'].StatPlots.BIAS_histogramm(measured='U',modelled='U_m') >>> """ display=True if args.get('display','Off')=='Off': display=False if not hasattr(self.data,'filename'): self.data.filename='' filename=os.path.join(args.get('folder out',os.getcwd()),os.path.splitext(self.data.filename)[0]+'_biasHist.png') unit=get_opt(self.data[measured],'units',args.get('units','')) short_name=get_opt(self.data[measured],'short_name',args.get('Xlabel','')) self.data=self.data.dropna() do_bias_hist(self.data[measured].values,self.data[modelled].values,unit,short_name,args.get('Nb of bins',30),filename,display)
[docs] def density_diagramm(self,X='X',Y='Y',args={ 'Y name':'', 'X name':'', 'Y unit':'', 'X unit':'', 'Y limits':[0,np.inf], 'X limits':[0,np.inf], 'display':{'On':True,'Off':False}, 'minimum': 0.001, 'folder out':os.getcwd()}): """ This function provides density diagrams of parameter Y vs parameter X, i.e. similar to scatter plot but emphasing on region withg large number of data Parameters ~~~~~~~~~~ X : str Name of the column to plot on the X axis. Y : str Name of the column to plot on the Y axis. args: dict Dictionnary with the folowing keys: Y name: str Name of the Y axis X name: str Name of the X axis Y unit: str Unit of the Y axis X unit: str Unit of the X axis Y limits: list 2 value list with Y axis limit X limits: list 2 value list with X axis limits display: str `On` or `Off` to display image folder out: str Path to save the output minimum: float Minimum value to plot Examples: ~~~~~~~~~ >>> df=tf['test1']['dataframe'].StatPlots.density_diagramm(X='U',Y='U_m') >>> """ display=True if args.get('display','Off')=='Off': display=False X_short_name=get_opt(self.data[X],'short_name',args.get('X name','')) Y_short_name=get_opt(self.data[Y],'short_name',args.get('Y name','')) X_unit=get_opt(self.data[X],'units',args.get('X unit','')) Y_unit=get_opt(self.data[Y],'units',args.get('Y unit','')) Xlim=args.get('X limits',[0,np.inf]) Ylim=args.get('Y limits',[0,np.inf]) if hasattr(self.data,'filename'): filename=os.path.join(args.get('folder out',os.getcwd()),os.path.splitext(self.data.filename)[0]+'_'+X+'_'+Y+'_density_diagramm.png') else: filename=os.path.join(args.get('folder out',os.getcwd()),'density_diagramm.png') do_density_diagramm(self.data[X].values,self.data[Y].values,X_short_name,Y_short_name,X_unit,Y_unit,Xlim,Ylim,filename,display,args.get('minimum',0.001))
[docs] def QQ_plot(self,measured='measured',modelled='modelled',args={ 'measured name':'', 'modelled name':'', 'measured unit':'', 'modelled unit':'', 'Quantile increment step (%)':1.0, 'display':{'On':True,'Off':False}, 'folder out':os.getcwd()}): """ This function provides Quantile-Quantile plots (Q-Q plots) for comparison statistics. Parameters ~~~~~~~~~~ measured : str Name of the column to contain the measured data. modelled : str Name of the column to contain the modelled data. args: dict Dictionnary with the folowing keys: measured name: str Name of the Y axis modelled name: str Name of the X axis measured unit: str Unit of the Y axis modelled unit: str Unit of the X axis Quantile increment step (%): float Quantile increment step in percentage display: str `On` or `Off` to display image folder out: str Path to save the output Examples: ~~~~~~~~~ >>> df=tf['test1']['dataframe'].StatPlots.QQ_plot(measured='U',modelled='U_m') >>> """ display=True if args.get('display','Off')=='Off': display=False X_short_name=get_opt(self.data[measured],'short_name',args.get('measured name','')) Y_short_name=get_opt(self.data[modelled],'short_name',args.get('modelled name','')) X_unit=get_opt(self.data[measured],'units',args.get('measured unit','')) Y_unit=get_opt(self.data[modelled],'units',args.get('modelled unit','')) pvec=np.arange(0,100+args.get('Quantile increment step (%)',1),args.get('Quantile increment step (%)',1)) if not hasattr(self.data,'filename'): self.data.filename='' filename=os.path.join(args.get('folder out',os.getcwd()),os.path.splitext(self.data.filename)[0]+'_qqplot.png') qq_plot(self.data[measured],self.data[modelled],pvec,X_short_name,Y_short_name,X_unit,Y_unit,filename,display)
[docs] def percentage_of_occurence(self,mag='mag',drr='drr',args={ 'magnitude interval (optional)':[], 'X label':'Wind speed in [m/s]', 'time blocking':{'Annual':True,'Seasonal (South hemisphere)':False,'Seasonal (North hemisphere)':False,'Monthly':False}, 'direction binning':{'centered':True,'not-centered':False}, 'direction interval': 45., 'display':{'On':True,'Off':False}, 'folder out':os.getcwd()}): """ This function provides percentage of occurnce from any variable. plot are directional if direction is supplied Parameters ~~~~~~~~~~ mag : str Name of the column from which to get stats. drr : str Column name representing the directions. args: dict Dictionnary with the folowing keys: title: str Graph title magnitude interval (optional): list interval to use for the magnitude X label: str Label for the X axis direction binning: str Can be `centered` or `not-centered` depending if the directionnal are centered over 0 direction interval: int Dirctionnal interval for the bins in degrees display: str `On` or `Off` to display image folder out: str Path to save the output time blocking: str if ``Time blocking=='Annual'``, Statistics will be calculated for the whole timeserie if ``Time blocking=='Seasonal (South hemisphere)'``, Statistics will be calculated for South hemisphere seasons if ``Time blocking=='Seasonal (North hemisphere)'``, Statistics will be calculated for North hemisphere seasons if ``Time blocking=='Monthly'``, Statistics will be calculated for each month. Examples: ~~~~~~~~~ >>> df=tf['test1']['dataframe'].StatPlots.Percentage_of_occurence(mag='U',drr='drr',args={'time blocking':'Yearly'}) >>> """ display=True if args.get('display','Off')=='Off': display=False if drr in self.data: drr=self.data[drr].values else: drr=None mag_inteval=args.get('magnitude interval (optional)',[]) X_unit=get_opt(self.data[mag],'units','') X_short_name=get_opt(self.data[mag],'short_name','') if X_short_name=='': xlabel=args.get('X label','') else: xlabel=X_short_name+' ['+X_unit+']' if not hasattr(self.data,'filename'): self.data.filename='' filename=os.path.join(args.get('folder out',os.getcwd()),os.path.splitext(self.data.filename)[0]+'_OccurencePlot.png') drr_interval=dir_interval(args['direction interval'],args['direction binning']) do_perc_of_occurence(self.data.index,self.data[mag].values,drr,mag_inteval,xlabel,args['time blocking'],drr_interval,filename,display)
[docs] def plot_thermocline(self,mag=['mag'],args={ 'function':{'Max':True, 'Mean':False, 'Median':False, 'Min':False, 'Percentile':False, 'Prod':False, 'Quantile':False, 'Std':False, 'Sum':False, 'Var':False}, 'percentile or Quantile': 0.1, 'X label':'Water temperature [degC]', 'time blocking':{'Annual':True,'Seasonal (South hemisphere)':False,'Seasonal (North hemisphere)':False,'Monthly':False}, 'display':{'On':True,'Off':False}, 'table':{'On':True,'Off':False}, 'folder out':os.getcwd()}): """ This function provides a plot of parameter versus water depth. This function average the timeseries ( by mean, median ...) Notes ~~~~~ Variables MUST be in the format *_lev_1,*_lev_2 etc.. Parameters ~~~~~~~~~~ mag : list Name of the column from which to get stats. args: dict Dictionnary with the folowing keys: function: str Statistics to use to process each level: can be `Max`,`Mean`,`Median`,`Min`,`Percentile`,`Prod`,`Quantile`,`Std`,`Sum` or `Var` 'percentile or Quantile': float Percetile or quantile to use X label: str Label for the X axis display: str `On` or `Off` to display image table: str `On` or `Off` to print result in a table folder out: str Path to save the output time blocking: str if ``Time blocking=='Annual'``, Statistics will be calculated for the whole timeserie if ``Time blocking=='Seasonal (South hemisphere)'``, Statistics will be calculated for South hemisphere seasons if ``Time blocking=='Seasonal (North hemisphere)'``, Statistics will be calculated for North hemisphere seasons if ``Time blocking=='Monthly'``, Statistics will be calculated for each month. Examples: ~~~~~~~~~ >>> df=tf['test1']['dataframe'].StatPlots.plot_thermocline(mag=['U_lev_1','U_lev_2'],args={'function':'Mean',time blocking':'Yearly'}) >>> """ if isinstance(mag,str): return 'cannot be only one level,select multiple' display=True if args.get('display','Off')=='Off': display=False X_unit=get_opt(self.data[mag[0]],'units','') X_short_name=get_opt(self.data[mag[0]],'short_name','') if X_short_name=='': xlabel=args.get('X label','') else: xlabel=X_short_name+' ['+X_unit+']' funct=getattr(np,'nan'+args['function'].lower()) val=args.get('percentile or Quantile',0.1) if not hasattr(self.data,'filename'): self.data.filename='' table_filename=os.path.join(args.get('folder out',os.getcwd()),os.path.splitext(self.data.filename)[0]+'_thermocline.xlsx') figure_filename=os.path.join(args.get('folder out',os.getcwd()),os.path.splitext(self.data.filename)[0]+'_thermocline.png') th=thermocline(self.data[mag],time_blocking=args['time blocking'],funct=funct,val=val) if args.get('table','Off')=='On': th.output_table(table_filename) th.output_fig(figure_filename,xlabel=xlabel,display=display)
[docs] def joint_probability_plot(self,X='X',Y='Y',\ args={ 'X Min Res Max(optional)':[2,1,22], 'Y Min Res Max(optional)':[0,0.5], 'X label':'', 'Y label':'', 'time blocking':{'Annual':True,'Seasonal (South hemisphere)':False,'Seasonal (North hemisphere)':False,'Monthly':False}, 'probablity expressed in':{'percent':False,'per thoushand':True}, 'display':{'On':True,'Off':False}, 'folder out':os.getcwd(), }): """ This function provides joint distribution graph for X and Y, i.e. the probability of events defined in terms of both X and Y (per 1000) Parameters ~~~~~~~~~~ X : str Name of the column to plot on the X axis. Y : str Name of the column to plot on the Y axis. args: dict Dictionnary with the folowing keys: X Min Res Max(optional): list Minimum, resolution and maximum value of X axis use in the join probability Y Min Res Max(optional): list Minimum, resolution and maximum value of Y axis use in the join probability X label: str Label for the X axis Y label: str Label for the Y axis display: str `On` or `Off` to display image folder out: str Path to save the output probablity expressed in: Can be `percent` or `per thoushand` time blocking: str if ``Time blocking=='Annual'``, Statistics will be calculated for the whole timeserie if ``Time blocking=='Seasonal (South hemisphere)'``, Statistics will be calculated for South hemisphere seasons if ``Time blocking=='Seasonal (North hemisphere)'``, Statistics will be calculated for North hemisphere seasons if ``Time blocking=='Monthly'``, Statistics will be calculated for each month. Examples: ~~~~~~~~~ >>> df=tf['test1']['dataframe'].StatPlots.plot_thermocline(mag=['U_lev_1','U_lev_2'],args={'function':'Mean',time blocking':'Yearly'}) >>> """ display=True if args.get('display','Off')=='Off': display=False Ydata=self.data[Y] Xdata=self.data[X] if not hasattr(self.data,'filename'): self.data.filename='' filename=os.path.join(args.get('folder out',os.getcwd()),os.path.splitext(self.data.filename)[0]+'JP.png') if args.get('probablity expressed in','percent')=='percent': multiplier=100. else: multiplier=1000. Y_interval=get_increment(Ydata,args.get('Y Min Res Max(optional)',[2,1])) X_interval=get_increment(Xdata,args.get('X Min Res Max(optional)',[0,0.5])) X_unit=get_opt(self.data[X],'units','') X_short_name=get_opt(self.data[X],'short_name','') if X_short_name=='': xlabel=args.get('X label','') else: xlabel=X_short_name+' ['+X_unit+']' Y_unit=get_opt(self.data[Y],'units','') Y_short_name=get_opt(self.data[Y],'short_name','') if Y_short_name=='': ylabel=args.get('Y label','') else: ylabel=Y_short_name+' ['+Y_unit+']' _do_joint_prob_plot(filename,self.data.index,Xdata,Ydata,X_interval,Y_interval,args['time blocking'],display,xlabel,ylabel,multiplier)