import os
import sys
import pickle
import requests
import zarr
import zipfile
# filepaths need to be identified with importlib_resources
# rather than __file__ as the latter does not work at runtime
# when the package is installed via pip install
if sys.version_info < (3, 9):
# importlib.resources either doesn't exist or lacks the files()
# function, so use the PyPI version:
import importlib_resources
else:
# importlib.resources has files(), so use that:
import importlib.resources as importlib_resources
DATASETS_PKL = [
'MOLLI',
'MOLLI_small',
'MOLLI_tiny',
'VFA',
'VFA_small',
'VFA_tiny',
]
DATASETS_ZARR = [
'VFA',
'VFA_small',
'VFA_tiny',
'DCE',
'DCE_small',
'DCE_tiny',
]
[docs]
def fetch(dataset=None, clear_cache=False, download_all=False)->dict:
"""Fetch a dataset included in mdreg
Args:
dataset (str, optional): name of the dataset. See below for options.
clear_cache (bool, optional): When a dataset is fetched, it is
downloaded and then stored in a local cache memory for faster access
next time it is fetched. Set clear_cache=True to delete all data
in the cache memory. Default is False.
download_all (bool, optional): By default only the dataset that is
fetched is downloaded. Set download_all=True to download all
datasets at once. This will cost some time but then offers fast and
offline access to all datasets afterwards. This will take up around
300 MB of space on your hard drive. Default is False.
Returns:
dict: Data as a dictionary.
Example:
Fetch the MOLLI images, and display as animation:
.. plot::
:include-source:
:context:
>>> import mdreg
>>> import mdreg.plot as plt
Get the data:
>>> data = mdreg.fetch('MOLLI')
Plot as animation:
>>> plt.animation(data['array'], vmin=0, vmax=1e4)
Notes:
The following datasets can be fetched:
**MOLLI**
**Size**: 2MB
**Background**: T1-mapping data for the kidney acquired on a
healthy volunteer, collected as part of the technical validation
efforts of the
`UKRIN-MAPS consortium <https://www.nottingham.ac.uk/research/groups/spmic/research/uk-renal-imaging-network/ukrin-maps.aspx>`_
and the development of the
`UKAT package <https://github.com/UKRIN-MAPS/ukat>`_.
**Data format**: The fetch function returns a dictionary, which
contains the following items:
- **array**: 4D array of signal intensities in the abdomen at
different inversion times.
- **TI**: A list of inversion times in msec.
Funding statement:
Data collection was funded by the UKRIN-MAPS MRC Partnership
grant (MR/R02264X/1) and the NIHR AFiRM project (NIHR128494).
**MOLLI_small**
**Size**: 129KB
**Background**: A small version of 'MOLLI' dataset, in 4x lower
resolution than the original. Useful for rapid testing and
debugging on a local machine.
**MOLLI_tiny**
**Size**: 9KB
**Background**: A tiny version of 'MOLLI' dataset, in 16x lower
resolution than the original. Useful for testing and debugging
solutions on remote machines.
**VFA**
**Size**: 5MB
**Background**: 4D variable flip angle data for T1-mapping in the
abdomen. Data are provided by the liver work package of
the `TRISTAN project <https://www.imi-tristan.eu/liver>`_
which develops imaging biomarkers for drug safety assessment.
The data and analysis was first presented at the ISMRM in 2024
(Min et al 2024, manuscript in press). A single set of variable
flip angle data are included.
**Data format**: The fetch function returns a dictionary, which
contains the following items:
- **array**: 4D array of signal intensities in the liver at
different flip angles
- **FA**: flip angles in degrees
- **spacing**: voxel size in mm in x-, y-, and z-directions as an
array.
Please reference the following abstract when using these data:
Thazin Min, Marta Tibiletti, Paul Hockings, Aleksandra Galetin,
Ebony Gunwhy, Gerry Kenna, Nicola Melillo, Geoff JM Parker,
Gunnar Schuetz, Daniel Scotcher, John Waterton, Ian Rowe, and
Steven Sourbron. *Measurement of liver function with dynamic
gadoxetate-enhanced MRI: a validation study in healthy
volunteers*. Proc Intl Soc Mag Reson Med, Singapore 2024.
**VFA_small**
**Size**: 87KB
**Background**: A small version of 'VFA' dataset, in 5x lower
resolution than the original. Useful for rapid testing and
debugging on a local machine.
**VFA_tiny**
**Size**: 7KB
**Background**: A tiny version of 'VFA' dataset, in 10x lower
resolution than the original. Useful for testing and debugging
cloud-based solutions that consume funds in proportion to data size.
"""
if dataset is None:
v = None
else:
v = _fetch_dataset(dataset, '.pkl')
if clear_cache:
_clear_cache()
if download_all:
for d in DATASETS_PKL:
_download(d, '.pkl')
return v
[docs]
def fetch_zarr(dataset=None, clear_cache=False, download_all=False)->dict:
"""Fetch a zarray dataset included in mdreg
Args:
dataset (str, optional): name of the dataset. See below for options.
clear_cache (bool, optional): When a dataset is fetched, it is
downloaded and then stored in a local cache memory for faster access
next time it is fetched. Set clear_cache=True to delete all data
in the cache memory. Default is False.
download_all (bool, optional): By default only the dataset that is
fetched is downloaded. Set download_all=True to download all
datasets at once. This will cost some time but then offers fast and
offline access to all datasets afterwards. This will take up around
300 MB of space on your hard drive. Default is False.
Returns:
zarr.Array: Data as a zarray.
Example:
Fetch the MOLLI images, and display as animation:
.. plot::
:include-source:
:context:
>>> import mdreg
Get the data:
>>> data = mdreg.fetch_zarr('VFA')
Plot as animation:
>>> mdreg.plot.animation(data, vmin=0, vmax=1e4)
Notes:
The following datasets can be fetched:
**MOLLI**
**Size**: 2MB
**Background**: T1-mapping data for the kidney acquired on a
healthy volunteer, collected as part of the technical validation
efforts of the
`UKRIN-MAPS consortium <https://www.nottingham.ac.uk/research/groups/spmic/research/uk-renal-imaging-network/ukrin-maps.aspx>`_
and the development of the
`UKAT package <https://github.com/UKRIN-MAPS/ukat>`_.
**Data format**: The fetch function returns a dictionary, which
contains the following items:
- **array**: 4D array of signal intensities in the abdomen at
different inversion times.
- **TI**: A list of inversion times in msec.
Funding statement:
Data collection was funded by the UKRIN-MAPS MRC Partnership
grant (MR/R02264X/1) and the NIHR AFiRM project (NIHR128494).
**MOLLI_small**
**Size**: 129KB
**Background**: A small version of 'MOLLI' dataset, in 4x lower
resolution than the original. Useful for rapid testing and
debugging on a local machine.
**VFA**
**Size**: 5MB
**Background**: 4D variable flip angle data for T1-mapping in the
abdomen. Data are provided by the liver work package of
the `TRISTAN project <https://www.imi-tristan.eu/liver>`_
which develops imaging biomarkers for drug safety assessment.
The data and analysis was first presented at the ISMRM in 2024
(Min et al 2024, manuscript in press). A single set of variable
flip angle data are included.
**Data format**: The fetch function returns a dictionary, which
contains the following items:
- **array**: 4D array of signal intensities in the liver at
different flip angles
- **FA**: flip angles in degrees
- **spacing**: voxel size in mm in x-, y-, and z-directions as an
array.
Please reference the following abstract when using these data:
Thazin Min, Marta Tibiletti, Paul Hockings, Aleksandra Galetin,
Ebony Gunwhy, Gerry Kenna, Nicola Melillo, Geoff JM Parker,
Gunnar Schuetz, Daniel Scotcher, John Waterton, Ian Rowe, and
Steven Sourbron. *Measurement of liver function with dynamic
gadoxetate-enhanced MRI: a validation study in healthy
volunteers*. Proc Intl Soc Mag Reson Med, Singapore 2024.
**VFA_small**
**Size**: 87KB
**Background**: A small version of 'VFA' dataset, in 5x lower
resolution than the original. Useful for rapid testing and
debugging on a local machine.
**VFA_tiny**
**Size**: 7KB
**Background**: A tiny version of 'VFA' dataset, in 10x lower
resolution than the original. Useful for testing and debugging
cloud-based solutions that consume funds in proportion to data size.
"""
if dataset is None:
v = None
else:
v = _fetch_dataset(dataset, '.zip')
if clear_cache:
_clear_cache()
if download_all:
for d in DATASETS_PKL:
_download(d, '.zip')
return v
def _clear_cache():
"""
Clear the folder where the data downloaded via fetch are saved.
Note if you clear the cache the data will need to be downloaded again
if you need them.
"""
f = importlib_resources.files('mdreg.datafiles')
for item in f.iterdir():
if item.is_file():
item.unlink() # Delete the file
def _fetch_dataset(dataset, ext):
f = importlib_resources.files('mdreg.datafiles')
datafile = str(f.joinpath(dataset + ext))
# If this is the first time the data are accessed, download them.
if not os.path.exists(datafile):
_download(dataset, ext)
if ext=='.pkl':
with open(datafile, 'rb') as f:
return pickle.load(f)
if ext=='.zip':
extracted = os.path.join(os.path.dirname(datafile), dataset)
if not os.path.exists(extracted):
with zipfile.ZipFile(datafile, 'r') as z:
z.extractall(extracted)
return zarr.open(extracted, mode='r') # TODO: fetch should return just a filepath to extracted zarr
def _download(dataset, ext):
f = importlib_resources.files('mdreg.datafiles')
datafile = str(f.joinpath(dataset + ext))
if os.path.exists(datafile):
return
# Dataset location
version_doi = "14933756" # This will change if a new version is created on zenodo
file_url = "https://zenodo.org/records/" + version_doi + "/files/" + dataset + ext
# Make the request and check for connection error
try:
file_response = requests.get(file_url)
except requests.exceptions.ConnectionError as err:
raise requests.exceptions.ConnectionError(
"\n\n"
"A connection error occurred trying to download the test data \n"
"from Zenodo. This usually happens if you are offline. The \n"
"first time a dataset is fetched via mdreg.fetch you need to \n"
"be online so the data can be downloaded. After the first \n"
"time they are saved locally so afterwards you can fetch \n"
"them even if you are offline. \n\n"
"The detailed error message is here: " + str(err))
# Check for other errors
file_response.raise_for_status()
# Save the file locally
with open(datafile, 'wb') as f:
f.write(file_response.content)