Source code for miblab_data.osf
import os
import zipfile
from tqdm import tqdm
from osfclient.api import OSF
[docs]
def fetch(dataset: str, folder: str, project: str = "un5ct", token: str = None, extract: bool = True, verbose: bool = True):
"""
Download a dataset from OSF (Open Science Framework).
This function downloads a specific dataset (folder or subfolder) from a public or private OSF project.
Files are saved into the specified local directory. If a zip file is found, it will be extracted by default.
Args:
dataset (str): Subfolder path inside the OSF project. If an empty string, all files in the root will be downloaded (use with caution).
folder (str): Local folder where the dataset will be saved.
project (str, optional): OSF project ID (default is "un5ct").
token (str, optional): Personal OSF token for accessing private projects. Read from OSF_TOKEN environment variable if needed.
extract (bool, optional): Whether to automatically unzip downloaded .zip files (default is True).
verbose (bool, optional): Whether to print progress messages (default is True).
Raises:
FileNotFoundError: If the specified dataset path does not exist in the OSF project.
NotImplementedError: If required packages are not installed.
Returns:
str: Path to the local folder containing the downloaded data.
Example:
>>> from miblab.osf import fetch
>>> fetch('TRISTAN/RAT/bosentan_highdose/Sanofi', 'test_download')
"""
# Prepare local folder
os.makedirs(folder, exist_ok=True)
# Connect to OSF and locate project storage
osf = OSF(token=token) #osf = OSF() for public projects
project = osf.project(project)
storage = project.storage('osfstorage')
# Navigate the dataset folder if provided
current = storage
if dataset:
parts = dataset.strip('/').split('/')
for part in parts:
for f in current.folders:
if f.name == part:
current = f
break
else:
raise FileNotFoundError(f"Folder '{part}' not found when navigating path '{dataset}'.")
# Recursive download of all files and folders
def download(current_folder, local_folder):
os.makedirs(local_folder, exist_ok=True)
files = list(current_folder.files)
iterator = tqdm(files, desc=f"Downloading to {local_folder}") if verbose and files else files
for file in iterator:
local_file = os.path.join(local_folder, file.name)
try:
with open(local_file, 'wb') as f:
file.write_to(f)
except Exception as e:
if verbose:
print(f"Warning downloading {file.name}: {e}")
for subfolder in current_folder.folders:
download(subfolder, os.path.join(local_folder, subfolder.name))
download(current, folder)
# Extract all downloaded zip files if needed
if extract:
for dirpath, _, filenames in os.walk(folder):
for filename in filenames:
if filename.lower().endswith('.zip'):
zip_path = os.path.join(dirpath, filename)
extract_to = os.path.join(dirpath, filename[:-4])
os.makedirs(extract_to, exist_ok=True)
try:
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
bad_file = zip_ref.testzip()
if bad_file:
raise zipfile.BadZipFile(f"Corrupt file {bad_file} inside {zip_path}")
zip_ref.extractall(extract_to)
os.remove(zip_path)
if verbose:
print(f"Unzipped and deleted {zip_path}")
except Exception as e:
if verbose:
print(f"Warning unzipping {zip_path}: {e}")
return folder
[docs]
def upload(folder: str, dataset: str, project: str = "un5ct", token: str = None, verbose: bool = True, overwrite: bool = True):
"""
Upload a file to OSF (Open Science Framework) using osfclient.
This function uploads a single local file to a specified path inside an OSF project.
Intermediate folders must already exist in the OSF project; osfclient does not create them.
If the file already exists, it can be overwritten or skipped.
Args:
folder (str): Path to the local file to upload.
dataset (str): OSF path where the file should be placed (e.g., "Testing/filename.txt").
project (str): OSF project ID (default: "un5ct").
token (str): OSF personal token for private/write access.
verbose (bool): Whether to print progress messages (default True).
overwrite (bool): Whether to replace an existing file if it already exists (default True).
Raises:
FileNotFoundError: If the file does not exist.
NotImplementedError: If osfclient is not installed.
RuntimeError: If upload fails for any reason.
Example:
>>> from miblab.osf import upload
>>> upload(
... folder='data/results.csv',
... dataset='Testing/results.csv',
... project='un5ct',
... token='your-osf-token',
... verbose=True,
... overwrite=True
... )
"""
# Check that the specified local file exists
if not os.path.isfile(folder):
raise FileNotFoundError(f"Local file not found: {folder}")
# Authenticate and connect to the OSF project
osf = OSF(token=token)
project = osf.project(project)
storage = project.storage("osfstorage")
# Clean and prepare the remote dataset path
full_path = dataset.strip("/")
# Check if the file already exists on OSF
existing = next((f for f in storage.files if f.path == "/" + full_path), None)
if existing:
if overwrite:
if verbose:
print(f"File '{full_path}' already exists. Deleting before re-upload...")
try:
existing.remove()
except Exception as e:
raise RuntimeError(f"Failed to delete existing file before overwrite: {e}")
else:
if verbose:
print(f"File '{full_path}' already exists. Skipping (overwrite=False).")
return
# Upload the file
size_mb = os.path.getsize(folder) / 1e6
with open(folder, "rb") as f:
if verbose:
print(f"Uploading '{os.path.basename(folder)}' ({size_mb:.2f} MB) to '{full_path}'...")
try:
storage.create_file(full_path, f)
if verbose:
print("Upload complete.")
except Exception as e:
raise RuntimeError(f"Failed to upload file: {e}")