Projects#
Ask your supervisors for the data if not already provided (it is not included in this repository).
Quick presentation.
Final project presentation.
Quality of the dataset is key.
Results on a clean notebook.
Explain which method(s) you used and why.
If a method fails, explain why.
Variable Renewable Energy (VRE) assessment and forecast#
Project objectives#
Assess the onshore wind or solar photovoltaic hourly production over in metropolitan France regions using climate data and capacity factor observations.
Predict the VRE power ahead of time.
Dataset#
Observed monthly VRE capacity factors averaged over metropolitan France regions from 2014 to 2021
Climate variables of your choice from a global reanalysis with an hourly sampling from 2010 to 2019
First steps#
Choose from solar or wind power
Read about solar/wind production assessment and forecast
Estimate the hourly solar/wind production
Reading the data#
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
import xarray as xr
# Directories where you saved the data
data_dir_energy = Path('../../../data/projects/energy_france')
data_dir_climate = Path('../../../data/projects/climate_france')
# Template filenames
filename_mask = 'mask_datagouv_french_regions_merra2_Nx_France.nc'
filename_climate = 'merra2_area_selection_output_{}_merra2_2010-2019.nc'
filename_energy = 'reseaux_energies_{}.csv'
# Read and plot grid point-region mask
filepath_mask = Path(data_dir_climate, filename_mask)
ds_mask = xr.load_dataset(filepath_mask)
da_mask = ds_mask['mask']
plt.figure()
plt.scatter(da_mask['lon'], da_mask['lat'], c=da_mask, cmap='Set1')
# Read a climate variable and plot its mean over time
variable_name = 'zonal_wind'
filename = filename_climate.format(variable_name)
filepath = Path(data_dir_climate, filename)
da_climate = xr.load_dataset(filepath)[variable_name]
plt.figure()
plt.scatter(da_mask['lon'], da_mask['lat'], c=da_climate.mean('time'))
# Compute regional mean of climate variable and plot time series
da_climate_reg = da_climate.groupby(da_mask).mean().rename(mask='region')
da_climate_reg['region'] = ds_mask['region'].values
plt.figure()
da_climate_reg.plot.line(x='time')
# Read energy variable and plot time series
variable_name = 'capacityfactor_wind-onshore'
filename = filename_energy.format(variable_name)
filepath = Path(data_dir_energy, filename)
df_energy = pd.read_csv(filepath, index_col=0, header=0, parse_dates=True)
plt.figure()
df_energy.plot()
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File /builds/energy4climate/public/education/machine_learning_for_climate_and_energy/venv/lib/python3.8/site-packages/xarray/backends/file_manager.py:209, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
208 try:
--> 209 file = self._cache[self._key]
210 except KeyError:
File /builds/energy4climate/public/education/machine_learning_for_climate_and_energy/venv/lib/python3.8/site-packages/xarray/backends/lru_cache.py:55, in LRUCache.__getitem__(self, key)
54 with self._lock:
---> 55 value = self._cache[key]
56 self._cache.move_to_end(key)
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('/builds/energy4climate/public/education/data/projects/climate_france/mask_datagouv_french_regions_merra2_Nx_France.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False)), 'e4631814-3135-4e78-a71e-754a561fb62d']
During handling of the above exception, another exception occurred:
FileNotFoundError Traceback (most recent call last)
Cell In[1], line 17
15 # Read and plot grid point-region mask
16 filepath_mask = Path(data_dir_climate, filename_mask)
---> 17 ds_mask = xr.load_dataset(filepath_mask)
18 da_mask = ds_mask['mask']
19 plt.figure()
File /builds/energy4climate/public/education/machine_learning_for_climate_and_energy/venv/lib/python3.8/site-packages/xarray/backends/api.py:279, in load_dataset(filename_or_obj, **kwargs)
276 if "cache" in kwargs:
277 raise TypeError("cache has no effect in this context")
--> 279 with open_dataset(filename_or_obj, **kwargs) as ds:
280 return ds.load()
File /builds/energy4climate/public/education/machine_learning_for_climate_and_energy/venv/lib/python3.8/site-packages/xarray/backends/api.py:541, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, backend_kwargs, **kwargs)
529 decoders = _resolve_decoders_kwargs(
530 decode_cf,
531 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...)
537 decode_coords=decode_coords,
538 )
540 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 541 backend_ds = backend.open_dataset(
542 filename_or_obj,
543 drop_variables=drop_variables,
544 **decoders,
545 **kwargs,
546 )
547 ds = _dataset_from_backend_dataset(
548 backend_ds,
549 filename_or_obj,
(...)
557 **kwargs,
558 )
559 return ds
File /builds/energy4climate/public/education/machine_learning_for_climate_and_energy/venv/lib/python3.8/site-packages/xarray/backends/netCDF4_.py:578, in NetCDF4BackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, format, clobber, diskless, persist, lock, autoclose)
557 def open_dataset(
558 self,
559 filename_or_obj,
(...)
574 autoclose=False,
575 ):
577 filename_or_obj = _normalize_path(filename_or_obj)
--> 578 store = NetCDF4DataStore.open(
579 filename_or_obj,
580 mode=mode,
581 format=format,
582 group=group,
583 clobber=clobber,
584 diskless=diskless,
585 persist=persist,
586 lock=lock,
587 autoclose=autoclose,
588 )
590 store_entrypoint = StoreBackendEntrypoint()
591 with close_on_error(store):
File /builds/energy4climate/public/education/machine_learning_for_climate_and_energy/venv/lib/python3.8/site-packages/xarray/backends/netCDF4_.py:382, in NetCDF4DataStore.open(cls, filename, mode, format, group, clobber, diskless, persist, lock, lock_maker, autoclose)
376 kwargs = dict(
377 clobber=clobber, diskless=diskless, persist=persist, format=format
378 )
379 manager = CachingFileManager(
380 netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
381 )
--> 382 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
File /builds/energy4climate/public/education/machine_learning_for_climate_and_energy/venv/lib/python3.8/site-packages/xarray/backends/netCDF4_.py:329, in NetCDF4DataStore.__init__(self, manager, group, mode, lock, autoclose)
327 self._group = group
328 self._mode = mode
--> 329 self.format = self.ds.data_model
330 self._filename = self.ds.filepath()
331 self.is_remote = is_remote_uri(self._filename)
File /builds/energy4climate/public/education/machine_learning_for_climate_and_energy/venv/lib/python3.8/site-packages/xarray/backends/netCDF4_.py:391, in NetCDF4DataStore.ds(self)
389 @property
390 def ds(self):
--> 391 return self._acquire()
File /builds/energy4climate/public/education/machine_learning_for_climate_and_energy/venv/lib/python3.8/site-packages/xarray/backends/netCDF4_.py:385, in NetCDF4DataStore._acquire(self, needs_lock)
384 def _acquire(self, needs_lock=True):
--> 385 with self._manager.acquire_context(needs_lock) as root:
386 ds = _nc4_require_group(root, self._group, self._mode)
387 return ds
File /usr/local/lib/python3.8/contextlib.py:113, in _GeneratorContextManager.__enter__(self)
111 del self.args, self.kwds, self.func
112 try:
--> 113 return next(self.gen)
114 except StopIteration:
115 raise RuntimeError("generator didn't yield") from None
File /builds/energy4climate/public/education/machine_learning_for_climate_and_energy/venv/lib/python3.8/site-packages/xarray/backends/file_manager.py:197, in CachingFileManager.acquire_context(self, needs_lock)
194 @contextlib.contextmanager
195 def acquire_context(self, needs_lock=True):
196 """Context manager for acquiring a file."""
--> 197 file, cached = self._acquire_with_cache_info(needs_lock)
198 try:
199 yield file
File /builds/energy4climate/public/education/machine_learning_for_climate_and_energy/venv/lib/python3.8/site-packages/xarray/backends/file_manager.py:215, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
213 kwargs = kwargs.copy()
214 kwargs["mode"] = self._mode
--> 215 file = self._opener(*self._args, **kwargs)
216 if self._mode == "w":
217 # ensure file doesn't get overridden when opened again
218 self._mode = "a"
File src/netCDF4/_netCDF4.pyx:2330, in netCDF4._netCDF4.Dataset.__init__()
File src/netCDF4/_netCDF4.pyx:1948, in netCDF4._netCDF4._ensure_nc_success()
FileNotFoundError: [Errno 2] No such file or directory: b'/builds/energy4climate/public/education/data/projects/climate_france/mask_datagouv_french_regions_merra2_Nx_France.nc'
Project: Forecast of El Nino#
Natural mode of variability of the pacific equatorial ocean
Big impact on the local economy
Question: What is the predictability of El Nino?
Data set#
Variable: Global sea surface temperature (SST)
Temporal resolution: monthly mean
Spatial resolution: 1 degree\(\times\)1 degree
First steps#
Read about El nino
Characterize El nino: i.e. introduce a classifier
What does it mean ``to make a prediction’’?
from pathlib import Path
import matplotlib.pyplot as plt
import xarray as xr
dir0 = Path('../../../data/projects/el_nino/')
file_sst = 'sst.mnmean.nc'
# load the data set with xarray
ds = xr.open_dataset(Path(dir0, file_sst))
# the name of the variable is 'sst'
p = ds["sst"].isel(time=0).plot.contourf(levels=20, center=False)
Project: Weather station#
Suppose there are 5 weather stations that monitor the weather: Paris, Brest, London, Marseille and Berlin.
The weather station in Paris breaks down
Can we use the other stations to infer the weather in Paris
Data set#
Surface variables: skt, u10, v10, t2m, d2m, tcc, sp, tp, ssrd, blh
Temporal resolution: hourly
Spatial resolution: N/A
First steps#
Look at the correlations between variables.
What variable do I want to predict
What time scale am interested in?
Start with the easy predictions and move on to harder ones
Are there events that are more predictable than others?
from pathlib import Path
import pandas as pd
import xarray as xr
dir0 = Path('../../../data/projects/weather/paris/')
file_t2m = 't2m.nc'
ds = xr.open_dataset(Path(dir0, file_t2m))
# convert to pandas dataframe
df = ds.to_dataframe()
# remove latitude and longitude indices
df = df.droplevel([0,1])
df[df.index.year==2000].plot()
<Axes: xlabel='time'>
Credit#
Contributors include Bruno Deremble and Alexis Tantet. Several slides and images are taken from the very good Scikit-learn course.