import os
import shutil
import zipfile
import csv
from io import TextIOWrapper
import numpy as np
from pydmr.pydict import (
dict_keep,
dict_reformat,
_nested_dict_to_multi_index,
dict_to_flat,
)
[docs]
def write(path:str, dmr:dict, format='flat'):
"""Write data to disk in .dmr format.
Args:
path (str): path to .dmr file. If the extension .dmr is not
included, it is added automatically.
dmr (dict): A dictionary with one required key 'data'
and optional keys 'rois', 'pars', 'sdev', 'columns'.
dmr['data'] is a dictionary with one item for each
parameter; the key is the parameter and the value is a list
of containing description, unit and python data type.
dmr['rois'] is a dictionary with one item per ROI; each
ROI is a dictionary on itself which has keys
(subject, study, parameter) and a list or array as value.
dmr['pars'] is a dictionary with parameters
such as sequence parameters or subject characteristics.
dmr['sdev'] is a dictionary with standard deviations
of parameters listed in pars.csv. This can include only a
subset of parameters but all parameters in sdev.csv must
also be in pars.csv. Defaults to None.
dmr['columns'] is a list of headers for optional
columns in the data dictionary. Required if the data
dictionary contains extra columns above the required three
(description, unit, type).
format (str, optional): Formatting of the arguments.
The default ('flat') is a dictionary with a
multi-index, meaning values (rois, pars, sdev) are
flat dictionaries with a multi-index consisting of
(subject, study, parameter). If format='nest', these values
are nested dictionaries with 3 levels. If
format='table', the values are a list of lists.
Defaults to 'flat'.
Raises:
ValueError: if the data are not dmr-compliant formatted.
ImportError: if an optional package is not installed
"""
#
# Check dmr compliance
#
dmr = dict_to_flat(dmr, format)
data = dmr['data']
for key, values in data.items():
if not isinstance(values, list):
raise ValueError(
f"Each dmr['data'] value must be a list"
)
length = 3
if 'columns' in dmr:
length += len(dmr['columns'])
if len(values) < length:
raise ValueError(
f"Each dmr['data'] value must have at least {length} elements. "
f"The required 'description', 'unit', 'type' and the "
f"optional columns {columns}."
)
if 'rois' in dmr:
rois = dmr['rois']
for roi in rois.keys():
if len(roi) != 3:
raise ValueError("Each rois key must be a 3-element tuple")
if roi[-1] not in list(data.keys()):
raise ValueError(
f"rois parameter {roi[-1]} not in dmr['data']. "
"Please add it to the dictionary."
)
for key, values in rois.items():
if key[-1] not in data:
raise ValueError(
f"rois parameter {key[-1]} not in data. "
"Please add it to the dictionary."
)
data_type = np.dtype(data[key[-1]][2])
write_values = np.asarray(values).astype(data_type)
if not np.array_equal(write_values, values):
raise ValueError(
f"rois parameter {key[-1]} has wrong data type. "
"Please correct the data in rois.csv "
"or correct the data type in data.csv"
)
if 'pars' in dmr:
pars = dmr['pars']
for par in pars.keys():
if len(par) != 3:
raise ValueError("Each pars key must be a 3-element tuple")
if par[-1] not in list(data.keys()):
raise ValueError(
f"pars parameter {par[-1]} not in dmr['data']. "
"Please add it to the dictionary."
)
for key, value in pars.items():
if key[-1] not in data:
raise ValueError(
f"pars parameter {key[-1]} not in data. "
"Please add it to the dictionary."
)
data_type = data[key[-1]][2]
if data_type == 'str':
if not isinstance(value, (str, np.str_, np.unicode_)):
raise ValueError(
f"pars parameter {key[-1]} must be a string. "
"Please correct the data in pars.csv "
"or correct the data type in data.csv"
)
elif data_type == 'float':
if not isinstance(value, (float, np.floating, int)):
raise ValueError(
f"pars parameter {key[-1]} must be a float. "
"Please correct the data in pars.csv "
"or correct the data type in data.csv"
)
elif data_type == 'bool':
if not isinstance(value, (bool, np.bool_)):
raise ValueError(
f"pars parameter {key[-1]} must be a boolean. "
"Please correct the data in pars.csv "
"or correct the data type in data.csv"
)
elif data_type == 'int':
if not isinstance(value, (int, np.integer)):
raise ValueError(
f"pars parameter {key[-1]} must be an integer. "
"Please correct the data in pars.csv "
"or correct the data type in data.csv"
)
elif data_type == 'complex':
if not isinstance(value, (complex, np.complexfloating)):
raise ValueError(
f"pars parameter {key[-1]} must be a complex number. "
"Please correct the data in pars.csv"
"or correct the data type in data.csv"
)
if 'sdev' in dmr:
if 'pars' not in dmr:
raise ValueError(
"dmr['sdev'] should only be provided if dmr['pars'] are also "
"provided."
)
sdev = dmr['sdev']
if not (sdev.keys() <= pars.keys()):
raise ValueError(
'keys in the sdev dictionary must also be in pars.'
)
for key, value in sdev.items():
try:
float(value)
except:
raise ValueError("sdev values must be float.")
# make folder
if path[-4:] == ".dmr":
path = path[:-4]
elif path[-8:] == ".dmr.zip":
path = path[:-8]
if not os.path.exists(path):
os.makedirs(path)
#
# Write data dictionary
#
# Build rows
header = ['parameter', 'description', 'unit', 'type']
if 'columns' in dmr:
header += dmr['columns']
rows = [header]
for key, values in data.items():
row = [key] + values
rows.append(row)
# Write rows to dict.csv
file = os.path.join(path, "data.csv")
with open(file, "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(rows)
#
# Write ROI curves
#
if 'rois' in dmr:
# Find the longest array length
max_len = max(len(arr) for arr in rois.values())
# Prepare CSV data (convert dictionary to column format)
columns = []
# First 3 rows: keys (tuple elements)
for key, values in rois.items():
data_type = np.dtype(data[key[-1]][2])
write_values = np.asarray(values).astype(data_type)
if data_type=='bool':
write_values = write_values.astype(str)
write_values[write_values=='True'] = '1'
write_values[write_values=='False'] = '0'
col = list(key) + list(write_values) + [""] * (max_len - len(values)) # Pad shorter columns
columns.append(col)
# Transpose to get row-wise structure
rows = list(map(list, zip(*columns)))
# Write to CSV
file = os.path.join(path, "rois.csv")
with open(file, "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(rows)
#
# Write parameters
#
if 'pars' in dmr:
rows = [
['subject', 'study', 'parameter', 'value'],
]
for key, value in pars.items():
data_type = data[key[-1]][2]
if data_type == 'str':
write_value = value
elif data_type == 'float':
write_value = value
elif data_type == 'bool':
write_value = '1' if value else '0'
elif data_type == 'int':
write_value = value
elif data_type == 'complex':
write_value = value
row = list(key) + [write_value]
rows.append(row)
file = os.path.join(path, "pars.csv")
with open(file, "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(rows)
if 'sdev' in dmr:
rows = [
['subject', 'study', 'parameter', 'value'],
]
for key, value in sdev.items():
row = list(key) + [value]
rows.append(row)
file = os.path.join(path, "sdev.csv")
with open(file, "w", newline="") as f:
writer = csv.writer(f)
writer.writerows(rows)
# Zip and delete original
shutil.make_archive(path + ".dmr", "zip", path)
shutil.rmtree(path)
[docs]
def read(path:str, format='flat', subject=None, study=None, parameter=None):
"""Read .dmr data from disk.
Args:
path (str): Path to .dmr file where the data are
saved. The extensions do not need to be included.
format (str, optional): Formatting of the returned results.
The default ('flat') returns a dictionary with a
multi-index, meaning values (rois, pars, sdev) are returned
as flat dictionaries with a multi-index consisting of
(subject, study, parameter). If format='nest', these values
are returned as nested dictionaries with 3 levels. If
format='table', the values are returned as a list of lists.
If format is 'pandas' the results are pandas dataframes.
Defaults to 'flat'.
subject (str or list, optional): subject or list of subjects
to return. If not provided, all subjects are returned.
Defaults to None.
study (str or list, optional): subject or list of subjects
to return. If not provided, all studies are returned.
Defaults to None.
parameter (str or list, optional): parameter or list of
parameters to return. If not provided, all parameters are returned.
Defaults to None.
Raises:
ValueError: If the data on disk are not correctly formatted.
Returns:
dict: A dictionary with one item for each of the csv files
in the dmr file - keys are either 'data', 'rois', 'pars',
'sdev'. The optional key 'columns' is returned as well if
the data dictionary has optional columns, in which case it
lists the names of those extra columns.
"""
if path[-8:] == ".dmr.zip":
read_path = path
# If the filename is provided with the .dmr extension alone, add the .zip
elif path[-4:] == ".dmr":
read_path = path + ".zip"
# If filename is provided without extensions, add them both
else:
read_path = path + ".dmr.zip"
with zipfile.ZipFile(read_path, "r") as z:
# Check files
csv_files = [f for f in z.namelist() if f.endswith(".csv")]
if 'data.csv' not in csv_files:
raise ValueError("A .dmr file must contain a data.csv file.")
# Read data dictionary
data = {}
with z.open('data.csv') as file:
text = TextIOWrapper(file, encoding="utf-8")
reader = csv.reader(text)
dict_list = list(reader)
data_headers = dict_list[0]
for d in dict_list[1:]:
if len(d) != len(data_headers):
raise ValueError(
f"Each data_dict row must have {len(data_headers)} "
f"elements {data_headers}. "
f"Correct the data dictionary in data.csv"
)
if d[3] not in ['str', 'float', 'bool', 'int', 'complex']:
raise ValueError(
f"data type {d[3]} is not allowed. Correct "
f"the data dictionary in data.csv"
)
data[d[0]] = d[1:]
if 'pars.csv' in csv_files:
pars = {}
with z.open('pars.csv') as file:
text = TextIOWrapper(file, encoding="utf-8")
reader = csv.reader(text)
pars_list = list(reader)
pars_list = pars_list[1:] # do not return headers
for p in pars_list:
if len(p) != 4:
raise ValueError(
f"Error in pars row {p}. "
f"Each row must have 4 elements: "
f"subject, study, parameter, value. "
f"Correct the data in pars.csv"
)
if p[2] not in data:
raise ValueError(
f"parameter {p[2]} is not listed in the "
f"data dictionary in data.csv"
)
data_type = data[p[2]][2]
if data_type=='str':
value = p[3]
elif data_type=='float':
value = float(p[3])
elif data_type=='bool':
if p[3]=='1':
value = True
elif p[3]=='0':
value = False
else:
raise ValueError(
f"Boolean value {p[3]} is not allowed. "
"Possible values are 1 or 0. "
"Correct the data in pars.csv"
)
elif data_type=='int':
value = int(p[3])
elif data_type=='complex':
value = complex(p[3])
pars[tuple(p[:3])] = value
if 'rois.csv' in csv_files:
rois = {}
with z.open('rois.csv') as file:
text = TextIOWrapper(file, encoding="utf-8")
reader = csv.reader(text)
rois_list = list(reader)
if len(rois_list)!=0:
# Extract headers (first 3 rows)
# Transpose first 3 rows to get column-wise headers
headers = list(zip(*rois_list[:3]))
# Extract data (from row 3 onward) and convert to NumPy arrays
rois = {}
for header, col in zip(headers, zip(*rois_list[3:])):
if header[2] not in data:
raise ValueError(
f"roi parameter {header[2]} is not listed in the "
f"data dictionary in data.csv. Please update the dictionary."
)
values = np.array([val for val in col if val])
data_type = data[header[2]][2]
if data_type == 'bool':
rois[header] = values.astype(int).astype(bool)
else:
rois[header] = values.astype(np.dtype(data_type))
if 'sdev.csv' in csv_files:
if 'pars.csv' not in csv_files:
raise ValueError(
"A file sdev.csv is included in the .dmr file "
"without a corresponding pars.csv file. "
"Please remove the sdev.csv file or add a "
"pars.csv file."
)
sdev = {}
with z.open('sdev.csv') as file:
text = TextIOWrapper(file, encoding="utf-8")
reader = csv.reader(text)
sdev_list = list(reader)
sdev_list = sdev_list[1:] # do not return headers
for p in sdev_list:
if len(p) != 4:
raise ValueError(
f"Each sdev row must have 4 elements: "
f"subject, study, parameter, sdev. "
f"Correct the data in sdev.csv"
)
if tuple(p[:3]) not in pars:
raise ValueError(
f"parameter {tuple(p[:3])} has a sdev but "
f"no corresponding value in pars.csv."
)
sdev[tuple(p[:3])] = float(p[3])
# Create dictionary
dmr = {'data': data}
if len(data_headers) > 4:
dmr['columns'] = data_headers[4:]
if 'pars.csv' in csv_files:
dmr['pars'] = pars
if 'rois.csv' in csv_files:
dmr['rois'] = rois
if 'sdev.csv' in csv_files:
dmr['sdev'] = sdev
# Extract requested fields
dmr = dict_keep(dmr, subject, study, parameter)
# Convert to required return format
dmr = dict_reformat(dmr, format)
return dmr