Source code for toto.inputs.nc
"""Read generic netcdf file
This import function works well is NetCDF or Zarr files created by `XARRAY`.
This class returns a Panda Dataframe with some extra attributes such as Latitude,Longitude,Units.
Parameters
~~~~~~~~~~
filename : (files,) str or list_like
A list of filename to process.
Examples
~~~~~~~~
>>> from toto.inputs.nc import NCfile
>>> nc=NCfile('filename.nc')._toDataFrame()
"""
import glob,os,sys
import pandas as pd
import xarray as xr
[docs]class NCfile():
[docs] @staticmethod
def defaultExtensions():
return ['.nc','.zarr']
def __init__(self,filenames):
if isinstance(filenames,str):
filenames=[filenames]
self.filenames=filenames
self.data=[]
# READ
self._reads_nc()
def _reads_nc(self):
for file in self.filenames:
self._read_nc(file)
def _read_nc(self,filename):
if '.zarr' in filename:
ds = xr.open_zarr(filename)
else:
ds = xr.open_dataset(filename)
df=ds.to_dataframe()
if len(df.index.names)==1:
if 'lon' in ds:
setattr(df,'longitude',ds['lon'].values)
setattr(df,'latitude',ds['lat'].values)
if 'longitude' in ds:
setattr(df,'longitude',ds['longitude'].values)
setattr(df,'latitude',ds['latitude'].values)
df.reset_index(inplace=True)
df.set_index('time',inplace=True,drop=False)
self.data.append(df)
else:
sub_index=list(df.index.names)
if 'time' in sub_index:
del sub_index[sub_index.index('time')]
sub_index1=df.index.get_level_values(sub_index[0]).unique()
for nsub in sub_index1:
df1=df.iloc[df.index.get_level_values(sub_index[0]) == nsub].reset_index(sub_index[0])
del df1[sub_index[0]]
df1=df1.add_suffix('_'+sub_index[0]+str(nsub))
if len(sub_index)>1:
sub_index2=df.index.get_level_values(sub_index[1]).unique()
for nsub2 in sub_index2:
df2=df1.iloc[df1.index.get_level_values(sub_index[1]) == nsub2].reset_index(sub_index[1])
del df2[sub_index[1]]
df2=df2.add_suffix('_'+sub_index[1]+str(nsub2))
if len(sub_index)>2:
sub_index3=df.index.get_level_values(sub_index[2]).unique()
for nsub3 in sub_index3:
df3=df2.iloc[df2.index.get_level_values(sub_index[2]) == nsub3].reset_index(sub_index[2])
del df3[sub_index[2]]
df3=df3.add_suffix('_'+sub_index[2]+str(nsub3))
if len(sub_index)>3:
sub_index4=df.index.get_level_values(sub_index[3]).unique()
for nsub4 in sub_index4:
df4=df3.iloc[df3.index.get_level_values(sub_index[3]) == nsub4].reset_index(sub_index[3])
del df4[sub_index[3]]
df4=df4.add_suffix('_'+sub_index[3]+str(nsub4))
df4.reset_index(inplace=True)
df4.set_index('time',inplace=True,drop=False)
self.data.append(df4)
else:
df3.reset_index(inplace=True)
df3.set_index('time',inplace=True,drop=False)
self.data.append(df3)
else:
df2.reset_index(inplace=True)
df2.set_index('time',inplace=True,drop=False)
self.data.append(df2)
else:
df1.reset_index(inplace=True)
df1.set_index('time',inplace=True,drop=False)
self.data.append(df1)
#print(df1)
#import pdb;pdb.set_trace()
# D1_keys=[]
# D2_keys=[]
# key_to_drop=[]
# for key in ds.keys():
# if 'time' in ds[key].dims:
# if 'lev' in ds[key].dims:
# D2_keys.append(key)
# else:
# D1_keys.append(key)
# else:
# key_to_drop.append(key)
# ds.drop_vars(key_to_drop)
# df = ds.to_dataframe()
# df0=[]
# if len(D2_keys)>0:
# nlev=df.index.get_level_values('lev').unique()
# for m in nlev:
# df2d=df[D2_keys].loc[(m)]
# df2d.reset_index(inplace=True)
# df2d.set_index('time',inplace=True)
# df2d=df2d.add_suffix('_lev_'+str(m))
# df0.append(df2d)
# if len(D1_keys)>0:
# df1d=df[D1_keys]
# df1d.reset_index(inplace=True)
# df1d.set_index('time',inplace=True)
# df0.append(df1d)
# df0=pd.concat(df0,axis=1)
# df0.reset_index(inplace=True)
# df0.set_index('time',inplace=True,drop=False)
# for col in list(df0.columns):
# if '_lev_' in col:
# Col=col.split('_lev_')[0]
# else:
# Col=col
# if hasattr(ds[Col],'units'):
# setattr(df0[col],'units',ds[Col].units)
# if hasattr(ds[Col],'long_name'):
# setattr(df0[col],'long_name',ds[Col].long_name)
# if 'lon' in ds:
# setattr(df0,'longitude',ds['lon'].values)
# setattr(df0,'latitude',ds['lat'].values)
# self.data.append(df0)
def _toDataFrame(self):
return self.data
if __name__ == '__main__':
ncfile='/home/remy/projects/ms/tidal_points/tidal_prediction.nc'
ncfile='/home/remy/developpement/sst/OISST.nc'
nc=NCfile(ncfile)
df=nc._toDataFrame()