Source code for miblab_data.zenodo


import os
import zipfile
import requests  
 

# Zenodo DOI of the repository
DOI = {
    'MRR': "15285017",    
    'TRISTAN': "15301607",
    'RAT': "15747417",
}

# miblab datasets
DATASETS = {
    'KRUK.dmr.zip': {'doi': DOI['MRR']},
    'tristan_humans_healthy_controls.dmr.zip': {'doi': DOI['TRISTAN']},
    'tristan_humans_healthy_ciclosporin.dmr.zip': {'doi': DOI['TRISTAN']},
    'tristan_humans_healthy_metformin.dmr.zip': {'doi': DOI['TRISTAN']},
    'tristan_humans_healthy_rifampicin.dmr.zip': {'doi': DOI['TRISTAN']},
    'tristan_humans_patients_rifampicin.dmr.zip': {'doi': DOI['TRISTAN']},
    'tristan_rats_healthy_multiple_dosing.dmr.zip': {'doi': DOI['TRISTAN']},
    'tristan_rats_healthy_reproducibility.dmr.zip': {'doi': DOI['TRISTAN']},
    'tristan_rats_healthy_six_drugs.dmr.zip': {'doi': DOI['TRISTAN']},
}

[docs] def fetch(dataset: str, folder: str, doi: str = None, filename: str = None, extract: bool = False, verbose: bool = False): """Download a dataset from Zenodo. Note if a dataset already exists locally it will not be downloaded again and the existing file will be returned. Args: dataset (str): Name of the dataset folder (str): Local folder where the result is to be saved doi (str, optional): Digital object identifier (DOI) of the Zenodo repository where the dataset is uploaded. If this is not provided, the function will look for the dataset in miblab's own Zenodo repositories. filename (str, optional): Filename of the downloaded dataset. If this is not provided, then *dataset* is used as filename. extract (bool): Whether to automatically extract downloaded ZIP files. verbose (bool): If True, prints logging messages. Raises: NotImplementedError: If miblab is not installed with the data option. requests.exceptions.ConnectionError: If the connection to Zenodo cannot be made. Returns: str: Full path to the downloaded datafile. """ # Create filename if filename is None: file = os.path.join(folder, dataset) else: file = os.path.join(folder, filename) # If it is not already downloaded, download it. if os.path.exists(file): if verbose: print(f"Skipping {dataset} download, file {file} already exists.") else: # Get DOI if doi is None: if dataset in DATASETS: doi = DATASETS[dataset]['doi'] else: raise ValueError( f"{dataset} does not exist in one of the miblab " f"repositories on Zenodo. If you want to fetch " f"a dataset in an external Zenodo repository, please " f"provide the doi of the repository." ) # Dataset download link file_url = f"https://zenodo.org/records/{doi}/files/{filename or dataset}" # Make the request and check for connection error try: file_response = requests.get(file_url) except requests.exceptions.ConnectionError as err: raise requests.exceptions.ConnectionError( f"\n\n" f"A connection error occurred trying to download {dataset} " f"from Zenodo. This usually happens if you are offline. " f"The detailed error message is here: {err}" ) # Check for other errors file_response.raise_for_status() # Create the folder if needed if not os.path.exists(folder): os.makedirs(folder) # Save the file with open(file, 'wb') as f: f.write(file_response.content) # If the zip file is requested we are done if not extract: return file # If extraction requested, returned extracted if file[-4:] == '.zip': extract_to = file[:-4] else: extract_to = file + '_unzip' # Skip extraction if the folder already exists if os.path.exists(extract_to): if verbose: print(f"Skipping {file} extraction, folder {extract_to} already exists.") return extract_to # Perform extraction os.makedirs(extract_to) with zipfile.ZipFile(file, 'r') as zip_ref: bad_file = zip_ref.testzip() if bad_file: raise zipfile.BadZipFile( f"Cannot extract: corrupt file {bad_file}." ) zip_ref.extractall(extract_to) return extract_to