Source code for miblab_data.tristan

import zipfile
from pathlib import Path
from typing import List

import dicom2nifti
from tqdm import tqdm
import requests       
from requests.adapters import HTTPAdapter, Retry


_rat_session = requests.Session()
_rat_session.mount(
    "https://",
    HTTPAdapter(
        max_retries=Retry(
            total=3,
            backoff_factor=1,             # 1 s → 2 s → 4 s
            status_forcelist=(502, 503, 504),
        )
    ),
)


#  Public TRISTAN RAT Download Zenodo API
[docs] def rat_fetch( dataset: str | None = None, *, folder: str | Path = "./tristanrat", unzip: bool = True, convert: bool = False, keep_archives: bool = False, ) -> List[str]: """ Download, recursively extract, and (optionally) convert TRISTAN rat MRI studies from Zenodo (record **15747417**). The helper understands the 15 published studies **S01 … S15**. Pass ``dataset="all"`` (or leave *dataset* empty) to fetch every archive in one go. Parameters ---------- dataset ``"S01" … "S15"`` to grab a single study ``"all"`` or *None* to fetch them all. folder Root directory that will hold the ``SXX.zip`` files and the extracted DICOM tree. A sibling directory ``<folder>_nifti/`` is used for conversion output. unzip If *True*, each ZIP is unpacked **recursively** (handles inner ZIP-in-ZIP structures). convert If *True*, every DICOM folder is converted to compressed NIfTI (requires the **dicom2nifti** wheel and ``unzip=True``). keep_archives Forwarded to :func:`_unzip_nested`; set *True* to retain each inner ZIP after extraction (useful for auditing). Returns ------- list[str] Absolute paths to every ``SXX.zip`` that was downloaded (whether new or cached). Examples -------- Download a single study and leave it zipped >>> from miblab_data.tristan import rat_fetch Single study, leave zipped >>> rat_fetch("S01", folder="./rat_data", unzip=False) ['/home/you/rat_data/S01.zip'] Single study, unzip everything and convert to NIfTI (requires dicom2nifti) >>> rat_fetch("S01", folder="./rat_data", unzip=True, convert=True) Download by group (friendly names): - rifampicin_effect_size → S01, S02, S03, S04 - six_compound → S05, S06, S07, S08, S09, S10, S12 - field_strength → S13 - chronic → S11, S14, S15 Example of download by group: Rifampicin effect-size (S01–S04) >>> rat_fetch("rifampicin_effect_size", folder="./rat_data", unzip=True, convert=False) Example of download by group: Six-compound set (S05, S06, S07, S08, S10, S12) >>> rat_fetch("six_compound", folder="./rat_data", unzip=True, convert=False) Example of download by group: Field-strength (S13) >>> rat_fetch("field_strength", folder="./rat_data", unzip=True, convert=False) Example of download by group: Chronic studies (S11, S14, S15) >>> rat_fetch("chronic", folder="./rat_data", unzip=True, convert=False) Fetch the entire collection, unzip, but skip conversion >>> rat_fetch(dataset="all", ... folder="./rat_data", ... unzip=True, ... convert=False) Full end-to-end pipeline (requires dicom2nifti) >>> rat_fetch("S03", ... folder="./rat_data", ... unzip=True, ... convert=True) The call returns the list of ZIP paths; side-effects are files extracted (and optionally NIfTI volumes) under *folder*. Notes ----- - unzip=True recursively extracts any inner ZIPs. - convert=True writes compressed NIfTI files alongside the DICOM tree (requires dicom2nifti; installed via miblab[data]). - You may pass "S01" or "s01"; labels are case-insensitive. """ # ── resolve study IDs ─────────────────────────────────────────────────── dataset = (dataset or "all").lower() valid_ids = [f"s{i:02d}" for i in range(1, 16)] # S01 … S15 only if dataset == "all": studies = valid_ids elif dataset in valid_ids: studies = [dataset] else: raise ValueError( f"Unknown study '{dataset}'. Choose one of " f"{', '.join(valid_ids)} or 'all'." ) # ── local paths & URL template ────────────────────────────────────────── folder = Path(folder).expanduser().resolve() folder.mkdir(parents=True, exist_ok=True) nifti_root = folder.parent / f"{folder.name}_nifti" base_url = f"https://zenodo.org/api/records/{DOI['RAT']}/files" downloaded: List[str] = [] # ── download loop ─────────────────────────────────────────────────────── desc = "Downloading TRISTAN rat studies" it = tqdm(studies, desc=desc, leave=False) for sid in it: zip_name = f"{sid.upper()}.zip" zip_path = folder / zip_name url = f"{base_url}/{zip_name}/content" # skip if already present if not zip_path.exists(): try: with _rat_session.get(url, stream=True, timeout=30) as r: r.raise_for_status() with open(zip_path, "wb") as fh: for chunk in r.iter_content(chunk_size=1 << 20): fh.write(chunk) except Exception as exc: # noqa: BLE001 print(f"[rat_fetch] WARNING – could not download {zip_name}: {exc}") continue downloaded.append(str(zip_path)) # ── extraction ─────────────────────────────────────── if unzip: study_dir = folder / sid.upper() _unzip_nested(zip_path, study_dir, keep_archives=keep_archives) # ── optional DICOM ➜ NIfTI ────────────────────── if convert: _relax_dicom2nifti_validators() for dcm_dir in study_dir.rglob("*"): if not dcm_dir.is_dir(): continue if any(p.suffix.lower() == ".dcm" for p in dcm_dir.iterdir()): rel_out = dcm_dir.relative_to(folder) _convert_dicom_to_nifti( dcm_dir, nifti_root / rel_out, ) return downloaded
# Utilities def _unzip_nested(zip_path: str | Path, extract_to: str | Path, *, keep_archives: bool = False) -> None: """ Recursively extract *every* ZIP found inside *zip_path*. Parameters ---------- zip_path Path to the outer **.zip** file downloaded from Zenodo. extract_to Target directory. It is created if it does not exist. keep_archives • *False* (default) → **delete** each inner archive after it has been unpacked, leaving only the extracted folders/files. • *True* → preserve the nested ``.zip`` files for checksum / forensic work. Notes ----- * The routine is **pure-Python** (built-in ``zipfile``); no external 7-Zip dependency. * Extraction is breadth-first: after the outer ZIP is unpacked, the function scans the new tree for ``*.zip`` and repeats until none remain. * Corrupt inner archives are caught and logged to *stdout* but do **not** abort the entire operation. Examples -------- >>> _unzip_nested("S03.zip", "S03_unzipped", keep_archives=True) """ zip_path, extract_to = Path(zip_path), Path(extract_to) extract_to.mkdir(parents=True, exist_ok=True) with zipfile.ZipFile(zip_path) as zf: zf.extractall(extract_to) while True: inners = list(extract_to.rglob("*.zip")) if not inners: break for inner in inners: dest = inner.with_suffix("") # “…/file.zip” → “…/file/” dest.mkdir(exist_ok=True) try: with zipfile.ZipFile(inner) as izf: izf.extractall(dest) if not keep_archives: inner.unlink() except zipfile.BadZipFile as exc: # noqa: BLE001 print(f"[rat_fetch] WARNING – cannot unzip {inner}: {exc}") def _convert_dicom_to_nifti(source_dir: Path, output_dir: Path) -> None: """ Convert *all* DICOM series found in *source_dir* to compressed NIfTI. A thin, tolerant wrapper around :pyfunc:`dicom2nifti.convert_directory`. Any conversion error (corrupt slice, unsupported orientation, etc.) is printed and the function returns so the calling loop can continue with the next subject / day. Parameters ---------- source_dir Directory that contains one or more DICOM series. output_dir Destination directory. Created if missing. Each converted series is written as ``series_<UID>.nii.gz``. Examples -------- >>> from pathlib import Path >>> _convert_dicom_to_nifti(Path("S01/Rat03/Day1/dicom"), Path("S01_nifti/Rat03/Day1")) """ output_dir.mkdir(parents=True, exist_ok=True) try: dicom2nifti.convert_directory( str(source_dir), str(output_dir), reorient=True ) except Exception as exc: # noqa: BLE001 print(f"[rat_fetch] ERROR – conversion failed for {source_dir}: {exc}") def _relax_dicom2nifti_validators() -> None: """ Disable dicom2nifti's strict slice-geometry validators. Pre-clinical (small-animal) scanners often produce DICOMs that fail dicom2nifti’s default **orthogonality** / **slice-increment** checks even though the data reconstructs fine. This helper tries to import ``dicom2nifti.settings`` and, if present, toggles every *disable_validate_* flag known across versions 2 → 3. The call is **idempotent** – safe to invoke multiple times. No error is raised when *dicom2nifti* is not installed; the caller should already have checked the `_have_dicom2nifti` feature-flag. """ import dicom2nifti.settings as _dset # type: ignore for fn in ("disable_validate_orthogonal", "disable_validate_sliceincrement", "disable_validate_slice_increment", "disable_validate_dimensions", "disable_validate_dimension"): if hasattr(_dset, fn): getattr(_dset, fn)()