Source code for pydmr.rw

import os
import shutil
import zipfile
import csv
from io import TextIOWrapper

import numpy as np


from pydmr.pydict import (
    dict_keep, 
    dict_reformat, 
    _nested_dict_to_multi_index,
    dict_to_flat,
)



[docs] def write(path:str, dmr:dict, format='flat'): """Write data to disk in .dmr format. Args: path (str): path to .dmr file. If the extension .dmr is not included, it is added automatically. dmr (dict): A dictionary with one required key 'data' and optional keys 'rois', 'pars', 'sdev', 'columns'. dmr['data'] is a dictionary with one item for each parameter; the key is the parameter and the value is a list of containing description, unit and python data type. dmr['rois'] is a dictionary with one item per ROI; each ROI is a dictionary on itself which has keys (subject, study, parameter) and a list or array as value. dmr['pars'] is a dictionary with parameters such as sequence parameters or subject characteristics. dmr['sdev'] is a dictionary with standard deviations of parameters listed in pars.csv. This can include only a subset of parameters but all parameters in sdev.csv must also be in pars.csv. Defaults to None. dmr['columns'] is a list of headers for optional columns in the data dictionary. Required if the data dictionary contains extra columns above the required three (description, unit, type). format (str, optional): Formatting of the arguments. The default ('flat') is a dictionary with a multi-index, meaning values (rois, pars, sdev) are flat dictionaries with a multi-index consisting of (subject, study, parameter). If format='nest', these values are nested dictionaries with 3 levels. If format='table', the values are a list of lists. Defaults to 'flat'. Raises: ValueError: if the data are not dmr-compliant formatted. ImportError: if an optional package is not installed """ # # Check dmr compliance # dmr = dict_to_flat(dmr, format) data = dmr['data'] for key, values in data.items(): if not isinstance(values, list): raise ValueError( f"Each dmr['data'] value must be a list" ) length = 3 if 'columns' in dmr: length += len(dmr['columns']) if len(values) < length: raise ValueError( f"Each dmr['data'] value must have at least {length} elements. " f"The required 'description', 'unit', 'type' and the " f"optional columns {columns}." ) if 'rois' in dmr: rois = dmr['rois'] for roi in rois.keys(): if len(roi) != 3: raise ValueError("Each rois key must be a 3-element tuple") if roi[-1] not in list(data.keys()): raise ValueError( f"rois parameter {roi[-1]} not in dmr['data']. " "Please add it to the dictionary." ) for key, values in rois.items(): if key[-1] not in data: raise ValueError( f"rois parameter {key[-1]} not in data. " "Please add it to the dictionary." ) data_type = np.dtype(data[key[-1]][2]) write_values = np.asarray(values).astype(data_type) if not np.array_equal(write_values, values): raise ValueError( f"rois parameter {key[-1]} has wrong data type. " "Please correct the data in rois.csv " "or correct the data type in data.csv" ) if 'pars' in dmr: pars = dmr['pars'] for par in pars.keys(): if len(par) != 3: raise ValueError("Each pars key must be a 3-element tuple") if par[-1] not in list(data.keys()): raise ValueError( f"pars parameter {par[-1]} not in dmr['data']. " "Please add it to the dictionary." ) for key, value in pars.items(): if key[-1] not in data: raise ValueError( f"pars parameter {key[-1]} not in data. " "Please add it to the dictionary." ) data_type = data[key[-1]][2] if data_type == 'str': if not isinstance(value, (str, np.str_, np.unicode_)): raise ValueError( f"pars parameter {key[-1]} must be a string. " "Please correct the data in pars.csv " "or correct the data type in data.csv" ) elif data_type == 'float': if not isinstance(value, (float, np.floating, int)): raise ValueError( f"pars parameter {key[-1]} must be a float. " "Please correct the data in pars.csv " "or correct the data type in data.csv" ) elif data_type == 'bool': if not isinstance(value, (bool, np.bool_)): raise ValueError( f"pars parameter {key[-1]} must be a boolean. " "Please correct the data in pars.csv " "or correct the data type in data.csv" ) elif data_type == 'int': if not isinstance(value, (int, np.integer)): raise ValueError( f"pars parameter {key[-1]} must be an integer. " "Please correct the data in pars.csv " "or correct the data type in data.csv" ) elif data_type == 'complex': if not isinstance(value, (complex, np.complexfloating)): raise ValueError( f"pars parameter {key[-1]} must be a complex number. " "Please correct the data in pars.csv" "or correct the data type in data.csv" ) if 'sdev' in dmr: if 'pars' not in dmr: raise ValueError( "dmr['sdev'] should only be provided if dmr['pars'] are also " "provided." ) sdev = dmr['sdev'] if not (sdev.keys() <= pars.keys()): raise ValueError( 'keys in the sdev dictionary must also be in pars.' ) for key, value in sdev.items(): try: float(value) except: raise ValueError("sdev values must be float.") # make folder if path[-4:] == ".dmr": path = path[:-4] elif path[-8:] == ".dmr.zip": path = path[:-8] if not os.path.exists(path): os.makedirs(path) # # Write data dictionary # # Build rows header = ['parameter', 'description', 'unit', 'type'] if 'columns' in dmr: header += dmr['columns'] rows = [header] for key, values in data.items(): row = [key] + values rows.append(row) # Write rows to dict.csv file = os.path.join(path, "data.csv") with open(file, "w", newline="") as f: writer = csv.writer(f) writer.writerows(rows) # # Write ROI curves # if 'rois' in dmr: # Find the longest array length max_len = max(len(arr) for arr in rois.values()) # Prepare CSV data (convert dictionary to column format) columns = [] # First 3 rows: keys (tuple elements) for key, values in rois.items(): data_type = np.dtype(data[key[-1]][2]) write_values = np.asarray(values).astype(data_type) if data_type=='bool': write_values = write_values.astype(str) write_values[write_values=='True'] = '1' write_values[write_values=='False'] = '0' col = list(key) + list(write_values) + [""] * (max_len - len(values)) # Pad shorter columns columns.append(col) # Transpose to get row-wise structure rows = list(map(list, zip(*columns))) # Write to CSV file = os.path.join(path, "rois.csv") with open(file, "w", newline="") as f: writer = csv.writer(f) writer.writerows(rows) # # Write parameters # if 'pars' in dmr: rows = [ ['subject', 'study', 'parameter', 'value'], ] for key, value in pars.items(): data_type = data[key[-1]][2] if data_type == 'str': write_value = value elif data_type == 'float': write_value = value elif data_type == 'bool': write_value = '1' if value else '0' elif data_type == 'int': write_value = value elif data_type == 'complex': write_value = value row = list(key) + [write_value] rows.append(row) file = os.path.join(path, "pars.csv") with open(file, "w", newline="") as f: writer = csv.writer(f) writer.writerows(rows) if 'sdev' in dmr: rows = [ ['subject', 'study', 'parameter', 'value'], ] for key, value in sdev.items(): row = list(key) + [value] rows.append(row) file = os.path.join(path, "sdev.csv") with open(file, "w", newline="") as f: writer = csv.writer(f) writer.writerows(rows) # Zip and delete original shutil.make_archive(path + ".dmr", "zip", path) shutil.rmtree(path)
[docs] def read(path:str, format='flat', subject=None, study=None, parameter=None): """Read .dmr data from disk. Args: path (str): Path to .dmr file where the data are saved. The extensions do not need to be included. format (str, optional): Formatting of the returned results. The default ('flat') returns a dictionary with a multi-index, meaning values (rois, pars, sdev) are returned as flat dictionaries with a multi-index consisting of (subject, study, parameter). If format='nest', these values are returned as nested dictionaries with 3 levels. If format='table', the values are returned as a list of lists. If format is 'pandas' the results are pandas dataframes. Defaults to 'flat'. subject (str or list, optional): subject or list of subjects to return. If not provided, all subjects are returned. Defaults to None. study (str or list, optional): subject or list of subjects to return. If not provided, all studies are returned. Defaults to None. parameter (str or list, optional): parameter or list of parameters to return. If not provided, all parameters are returned. Defaults to None. Raises: ValueError: If the data on disk are not correctly formatted. Returns: dict: A dictionary with one item for each of the csv files in the dmr file - keys are either 'data', 'rois', 'pars', 'sdev'. The optional key 'columns' is returned as well if the data dictionary has optional columns, in which case it lists the names of those extra columns. """ if path[-8:] == ".dmr.zip": read_path = path # If the filename is provided with the .dmr extension alone, add the .zip elif path[-4:] == ".dmr": read_path = path + ".zip" # If filename is provided without extensions, add them both else: read_path = path + ".dmr.zip" with zipfile.ZipFile(read_path, "r") as z: # Check files csv_files = [f for f in z.namelist() if f.endswith(".csv")] if 'data.csv' not in csv_files: raise ValueError("A .dmr file must contain a data.csv file.") # Read data dictionary data = {} with z.open('data.csv') as file: text = TextIOWrapper(file, encoding="utf-8") reader = csv.reader(text) dict_list = list(reader) data_headers = dict_list[0] for d in dict_list[1:]: if len(d) != len(data_headers): raise ValueError( f"Each data_dict row must have {len(data_headers)} " f"elements {data_headers}. " f"Correct the data dictionary in data.csv" ) if d[3] not in ['str', 'float', 'bool', 'int', 'complex']: raise ValueError( f"data type {d[3]} is not allowed. Correct " f"the data dictionary in data.csv" ) data[d[0]] = d[1:] if 'pars.csv' in csv_files: pars = {} with z.open('pars.csv') as file: text = TextIOWrapper(file, encoding="utf-8") reader = csv.reader(text) pars_list = list(reader) pars_list = pars_list[1:] # do not return headers for p in pars_list: if len(p) != 4: raise ValueError( f"Error in pars row {p}. " f"Each row must have 4 elements: " f"subject, study, parameter, value. " f"Correct the data in pars.csv" ) if p[2] not in data: raise ValueError( f"parameter {p[2]} is not listed in the " f"data dictionary in data.csv" ) data_type = data[p[2]][2] if data_type=='str': value = p[3] elif data_type=='float': value = float(p[3]) elif data_type=='bool': if p[3]=='1': value = True elif p[3]=='0': value = False else: raise ValueError( f"Boolean value {p[3]} is not allowed. " "Possible values are 1 or 0. " "Correct the data in pars.csv" ) elif data_type=='int': value = int(p[3]) elif data_type=='complex': value = complex(p[3]) pars[tuple(p[:3])] = value if 'rois.csv' in csv_files: rois = {} with z.open('rois.csv') as file: text = TextIOWrapper(file, encoding="utf-8") reader = csv.reader(text) rois_list = list(reader) if len(rois_list)!=0: # Extract headers (first 3 rows) # Transpose first 3 rows to get column-wise headers headers = list(zip(*rois_list[:3])) # Extract data (from row 3 onward) and convert to NumPy arrays rois = {} for header, col in zip(headers, zip(*rois_list[3:])): if header[2] not in data: raise ValueError( f"roi parameter {header[2]} is not listed in the " f"data dictionary in data.csv. Please update the dictionary." ) values = np.array([val for val in col if val]) data_type = data[header[2]][2] if data_type == 'bool': rois[header] = values.astype(int).astype(bool) else: rois[header] = values.astype(np.dtype(data_type)) if 'sdev.csv' in csv_files: if 'pars.csv' not in csv_files: raise ValueError( "A file sdev.csv is included in the .dmr file " "without a corresponding pars.csv file. " "Please remove the sdev.csv file or add a " "pars.csv file." ) sdev = {} with z.open('sdev.csv') as file: text = TextIOWrapper(file, encoding="utf-8") reader = csv.reader(text) sdev_list = list(reader) sdev_list = sdev_list[1:] # do not return headers for p in sdev_list: if len(p) != 4: raise ValueError( f"Each sdev row must have 4 elements: " f"subject, study, parameter, sdev. " f"Correct the data in sdev.csv" ) if tuple(p[:3]) not in pars: raise ValueError( f"parameter {tuple(p[:3])} has a sdev but " f"no corresponding value in pars.csv." ) sdev[tuple(p[:3])] = float(p[3]) # Create dictionary dmr = {'data': data} if len(data_headers) > 4: dmr['columns'] = data_headers[4:] if 'pars.csv' in csv_files: dmr['pars'] = pars if 'rois.csv' in csv_files: dmr['rois'] = rois if 'sdev.csv' in csv_files: dmr['sdev'] = sdev # Extract requested fields dmr = dict_keep(dmr, subject, study, parameter) # Convert to required return format dmr = dict_reformat(dmr, format) return dmr