Source code for CPAC.utils.datasource

# Copyright (C) 2012-2023  C-PAC Developers

# This file is part of C-PAC.

# C-PAC is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.

# C-PAC is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
# License for more details.

# You should have received a copy of the GNU Lesser General Public
# License along with C-PAC. If not, see <https://www.gnu.org/licenses/>.
import csv
import json
import re
from pathlib import Path
from typing import Union
from nipype import logging
from nipype.interfaces import utility as util
from CPAC.pipeline import nipype_pipeline_engine as pe
from CPAC.resources.templates.lookup_table import format_identifier, \
                                                  lookup_identifier
from CPAC.utils import function
from CPAC.utils.bids_utils import bids_remove_entity
from CPAC.utils.interfaces.function import Function
from CPAC.utils.typing import TUPLE
from CPAC.utils.utils import get_scan_params

logger = logging.getLogger('nipype.workflow')


[docs]def bidsier_prefix(unique_id): """ Function to return a BIDSier prefix for a given unique_id Parameters ---------- unique_id : str Returns ------- prefix : str Examples -------- >>> bidsier_prefix('01_1') 'sub-01_ses-1' >>> bidsier_prefix('sub-01_ses-1') 'sub-01_ses-1' >>> bidsier_prefix('sub-01_1') 'sub-01_ses-1' >>> bidsier_prefix('01_ses-1') 'sub-01_ses-1' """ keys = ['sub', 'ses'] components = unique_id.split('_') for i, component in enumerate(components): if i < len(keys): if not component.startswith(keys[i]): components[i] = '-'.join([keys[i], component]) return '_'.join(components)
[docs]def get_rest(scan, rest_dict, resource="scan"): """Return the file path of the chosen resource stored in the functional file dictionary, if it exists. scan: the scan/series name or label rest_dict: the dictionary read in from the data configuration YAML file (sublist) nested under 'func:' resource: the dictionary key scan - the functional timeseries scan_parameters - path to the scan parameters JSON file, or a dictionary containing scan parameters information (to be phased out in the future) """ try: file_path = rest_dict[scan][resource] except KeyError: file_path = None return file_path
[docs]def extract_scan_params_dct(scan_params_dct): return scan_params_dct
[docs]def get_map(map, map_dct): # return the spatial map required return map_dct[map]
[docs]def select_model_files(model, ftest, model_name): """ Method to select model files """ import os import glob files = glob.glob(os.path.join(model, '*')) if len(files) == 0: raise Exception("No files found inside directory %s" % model) fts_file = '' for filename in files: if (model_name + '.mat') in filename: mat_file = filename elif (model_name + '.grp') in filename: grp_file = filename elif ((model_name + '.fts') in filename) and ftest: fts_file = filename elif (model_name + '.con') in filename: con_file = filename if ftest == True and fts_file == '': errmsg = "\n[!] CPAC says: You have f-tests included in your group " \ "analysis model '%s', but no .fts files were found in the " \ "output folder specified for group analysis: %s.\n\nThe " \ ".fts file is automatically generated by CPAC, and if you " \ "are seeing this error, it is because something went wrong " \ "with the generation of this file, or it has been moved." \ "\n\n" % (model_name, model) raise Exception(errmsg) return fts_file, con_file, grp_file, mat_file
[docs]def check_func_scan(func_scan_dct, scan): """Run some checks on the functional timeseries-related files for a given series/scan name or label.""" scan_resources = func_scan_dct[scan] try: scan_resources.keys() except AttributeError: err = "\n[!] The data configuration file you provided is " \ "missing a level under the 'func:' key. CPAC versions " \ "1.2 and later use data configurations with an " \ "additional level of nesting.\n\nExample\nfunc:\n " \ "rest01:\n scan: /path/to/rest01_func.nii.gz\n" \ " scan parameters: /path/to/scan_params.json\n\n" \ "See the User Guide for more information.\n\n" raise Exception(err) # actual 4D time series file if "scan" not in scan_resources.keys(): err = "\n\n[!] The {0} scan is missing its actual time-series " \ "scan file, which should be a filepath labeled with the " \ "'scan' key.\n\n".format(scan) raise Exception(err) # Nipype restriction (may have changed) if '.' in scan or '+' in scan or '*' in scan: raise Exception('\n\n[!] Scan names cannot contain any special ' 'characters (., +, *, etc.). Please update this ' 'and try again.\n\nScan: {0}' '\n\n'.format(scan))
[docs]def create_func_datasource(rest_dict, rpool, wf_name='func_datasource'): """Return the functional timeseries-related file paths for each series/scan, from the dictionary of functional files described in the data configuration (sublist) YAML file. Scan input (from inputnode) is an iterable. """ from CPAC.pipeline import nipype_pipeline_engine as pe import nipype.interfaces.utility as util wf = pe.Workflow(name=wf_name) inputnode = pe.Node(util.IdentityInterface( fields=['subject', 'scan', 'creds_path', 'dl_dir'], mandatory_inputs=True), name='inputnode') outputnode = pe.Node(util.IdentityInterface(fields=['subject', 'rest', 'scan', 'scan_params', 'phase_diff', 'magnitude']), name='outputspec') # have this here for now because of the big change in the data # configuration format # (Not necessary with ingress - format does not comply) if not rpool.check_rpool('derivatives-dir'): check_scan = pe.Node(function.Function(input_names=['func_scan_dct', 'scan'], output_names=[], function=check_func_scan, as_module=True), name='check_func_scan') check_scan.inputs.func_scan_dct = rest_dict wf.connect(inputnode, 'scan', check_scan, 'scan') # get the functional scan itself selectrest = pe.Node(function.Function(input_names=['scan', 'rest_dict', 'resource'], output_names=['file_path'], function=get_rest, as_module=True), name='selectrest') selectrest.inputs.rest_dict = rest_dict selectrest.inputs.resource = "scan" wf.connect(inputnode, 'scan', selectrest, 'scan') # check to see if it's on an Amazon AWS S3 bucket, and download it, if it # is - otherwise, just return the local file path check_s3_node = pe.Node(function.Function(input_names=['file_path', 'creds_path', 'dl_dir', 'img_type'], output_names=['local_path'], function=check_for_s3, as_module=True), name='check_for_s3') wf.connect(selectrest, 'file_path', check_s3_node, 'file_path') wf.connect(inputnode, 'creds_path', check_s3_node, 'creds_path') wf.connect(inputnode, 'dl_dir', check_s3_node, 'dl_dir') check_s3_node.inputs.img_type = 'func' wf.connect(inputnode, 'subject', outputnode, 'subject') wf.connect(check_s3_node, 'local_path', outputnode, 'rest') wf.connect(inputnode, 'scan', outputnode, 'scan') # scan parameters CSV select_scan_params = pe.Node(function.Function(input_names=['scan', 'rest_dict', 'resource'], output_names=['file_path'], function=get_rest, as_module=True), name='select_scan_params') select_scan_params.inputs.rest_dict = rest_dict select_scan_params.inputs.resource = "scan_parameters" wf.connect(inputnode, 'scan', select_scan_params, 'scan') # if the scan parameters file is on AWS S3, download it s3_scan_params = pe.Node(function.Function(input_names=['file_path', 'creds_path', 'dl_dir', 'img_type'], output_names=['local_path'], function=check_for_s3, as_module=True), name='s3_scan_params') wf.connect(select_scan_params, 'file_path', s3_scan_params, 'file_path') wf.connect(inputnode, 'creds_path', s3_scan_params, 'creds_path') wf.connect(inputnode, 'dl_dir', s3_scan_params, 'dl_dir') wf.connect(s3_scan_params, 'local_path', outputnode, 'scan_params') return wf
[docs]def create_fmap_datasource(fmap_dct, wf_name='fmap_datasource'): """Return the field map files, from the dictionary of functional files described in the data configuration (sublist) YAML file. """ from CPAC.pipeline import nipype_pipeline_engine as pe import nipype.interfaces.utility as util wf = pe.Workflow(name=wf_name) inputnode = pe.Node(util.IdentityInterface( fields=['subject', 'scan', 'creds_path', 'dl_dir'], mandatory_inputs=True), name='inputnode') outputnode = pe.Node(util.IdentityInterface(fields=['subject', 'rest', 'scan', 'scan_params', 'phase_diff', 'magnitude']), name='outputspec') selectrest = pe.Node(function.Function(input_names=['scan', 'rest_dict', 'resource'], output_names=['file_path'], function=get_rest, as_module=True), name='selectrest') selectrest.inputs.rest_dict = fmap_dct selectrest.inputs.resource = "scan" wf.connect(inputnode, 'scan', selectrest, 'scan') # check to see if it's on an Amazon AWS S3 bucket, and download it, if it # is - otherwise, just return the local file path check_s3_node = pe.Node(function.Function(input_names=['file_path', 'creds_path', 'dl_dir', 'img_type'], output_names=['local_path'], function=check_for_s3, as_module=True), name='check_for_s3') wf.connect(selectrest, 'file_path', check_s3_node, 'file_path') wf.connect(inputnode, 'creds_path', check_s3_node, 'creds_path') wf.connect(inputnode, 'dl_dir', check_s3_node, 'dl_dir') check_s3_node.inputs.img_type = 'other' wf.connect(inputnode, 'subject', outputnode, 'subject') wf.connect(check_s3_node, 'local_path', outputnode, 'rest') wf.connect(inputnode, 'scan', outputnode, 'scan') # scan parameters CSV select_scan_params = pe.Node(function.Function(input_names=['scan', 'rest_dict', 'resource'], output_names=['file_path'], function=get_rest, as_module=True), name='select_scan_params') select_scan_params.inputs.rest_dict = fmap_dct select_scan_params.inputs.resource = "scan_parameters" wf.connect(inputnode, 'scan', select_scan_params, 'scan') # if the scan parameters file is on AWS S3, download it s3_scan_params = pe.Node(function.Function(input_names=['file_path', 'creds_path', 'dl_dir', 'img_type'], output_names=['local_path'], function=check_for_s3, as_module=True), name='s3_scan_params') wf.connect(select_scan_params, 'file_path', s3_scan_params, 'file_path') wf.connect(inputnode, 'creds_path', s3_scan_params, 'creds_path') wf.connect(inputnode, 'dl_dir', s3_scan_params, 'dl_dir') wf.connect(s3_scan_params, 'local_path', outputnode, 'scan_params') return wf
[docs]def get_fmap_phasediff_metadata(data_config_scan_params): if (not isinstance(data_config_scan_params, dict) and ".json" in data_config_scan_params): with open(data_config_scan_params, 'r', encoding='utf-8') as _f: data_config_scan_params = json.load(_f) echo_time = None echo_time_one = None echo_time_two = None if "EchoTime" in data_config_scan_params: echo_time = data_config_scan_params.get("EchoTime") elif "EchoTime1" in data_config_scan_params and "EchoTime2" \ in data_config_scan_params: echo_time_one = data_config_scan_params.get("EchoTime1") echo_time_two = data_config_scan_params.get("EchoTime2") dwell_time = data_config_scan_params.get("DwellTime") pe_direction = data_config_scan_params.get("PhaseEncodingDirection") total_readout = data_config_scan_params.get("TotalReadoutTime") return (dwell_time, pe_direction, total_readout, echo_time, echo_time_one, echo_time_two)
[docs]@Function.sig_imports(['from CPAC.utils.typing import TUPLE']) def calc_delta_te_and_asym_ratio(effective_echo_spacing: float, echo_times: list) -> TUPLE[float, float]: """Calcluate ``deltaTE`` and ``ees_asym_ratio`` from given metadata Parameters ---------- effective_echo_spacing : float EffectiveEchoSpacing from sidecar JSON echo_times : list Returns ------- deltaTE : float ees_asym_ratio : float """ if not isinstance(effective_echo_spacing, float): raise LookupError('C-PAC could not find `EffectiveEchoSpacing` in ' 'either fmap or func sidecar JSON, but that field ' 'is required for PhaseDiff distortion correction.') # convert into milliseconds if necessary # these values will/should never be more than 10ms if ((echo_times[0] * 1000) < 10) and ((echo_times[1] * 1000) < 10): echo_times[0] = echo_times[0] * 1000 echo_times[1] = echo_times[1] * 1000 deltaTE = abs(echo_times[0] - echo_times[1]) ees_asym_ratio = (effective_echo_spacing / deltaTE) return deltaTE, ees_asym_ratio
[docs]def gather_echo_times(echotime_1, echotime_2=None, echotime_3=None, echotime_4=None): echotime_list = [echotime_1, echotime_2, echotime_3, echotime_4] echotime_list = list(filter(lambda item: item is not None, echotime_list)) echotime_list = list(set(echotime_list)) if len(echotime_list) != 2: raise Exception("\n[!] Something went wrong with the field map echo " "times - there should be two distinct values.\n\n" f"Echo Times:\n{echotime_list}\n") return echotime_list
[docs]def match_epi_fmaps(bold_pedir, epi_fmap_one, epi_fmap_params_one, epi_fmap_two=None, epi_fmap_params_two=None): """Parse the field map files in the data configuration and determine which ones have the same and opposite phase-encoding directions as the BOLD scan in the current pipeline. Example - parse the files under the 'fmap' level, i.e. 'epi_AP': anat: /path/to/T1w.nii.gz fmap: epi_AP: scan: /path/to/field-map.nii.gz scan_parameters: <config dictionary containing phase-encoding direction> func: rest_1: scan: /path/to/bold.nii.gz scan_parameters: <config dictionary of BOLD scan parameters> 1. Check PhaseEncodingDirection field in the metadata for the BOLD. 2. Check whether there are one or two EPI's in the field map data. 3. Grab the one or two EPI field maps. """ fmap_dct = {epi_fmap_one: epi_fmap_params_one} if epi_fmap_two and epi_fmap_params_two: fmap_dct[epi_fmap_two] = epi_fmap_params_two opposite_pe_epi = None same_pe_epi = None for epi_scan in fmap_dct.keys(): scan_params = fmap_dct[epi_scan] if not isinstance(scan_params, dict) and ".json" in scan_params: with open(scan_params, 'r') as f: scan_params = json.load(f) if "PhaseEncodingDirection" in scan_params: epi_pedir = scan_params["PhaseEncodingDirection"] if epi_pedir == bold_pedir: same_pe_epi = epi_scan elif epi_pedir[0] == bold_pedir[0]: opposite_pe_epi = epi_scan return (opposite_pe_epi, same_pe_epi)
[docs]def ingress_func_metadata(wf, cfg, rpool, sub_dict, subject_id, input_creds_path, unique_id=None, num_strat=None): name_suffix = '' for suffix_part in (unique_id, num_strat): if suffix_part is not None: name_suffix += f'_{suffix_part}' # Grab field maps diff = False blip = False fmap_rp_list = [] fmap_TE_list = [] if "fmap" in sub_dict: second = False for key in sub_dict["fmap"]: gather_fmap = create_fmap_datasource( sub_dict["fmap"], f"fmap_gather_{key}_{subject_id}") gather_fmap.inputs.inputnode.set( subject=subject_id, creds_path=input_creds_path, dl_dir=cfg.pipeline_setup['working_directory']['path']) gather_fmap.inputs.inputnode.scan = key orig_key = key if 'epi' in key and not second: key = 'epi-1' second = True elif 'epi' in key and second: key = 'epi-2' rpool.set_data(key, gather_fmap, 'outputspec.rest', {}, "", "fmap_ingress") rpool.set_data(f'{key}-scan-params', gather_fmap, 'outputspec.scan_params', {}, "", "fmap_params_ingress") fmap_rp_list.append(key) get_fmap_metadata_imports = ['import json'] get_fmap_metadata = pe.Node(Function( input_names=['data_config_scan_params'], output_names=['dwell_time', 'pe_direction', 'total_readout', 'echo_time', 'echo_time_one', 'echo_time_two'], function=get_fmap_phasediff_metadata, imports=get_fmap_metadata_imports), name=f'{key}_get_metadata{name_suffix}') wf.connect(gather_fmap, 'outputspec.scan_params', get_fmap_metadata, 'data_config_scan_params') if "phase" in key: # leave it open to all three options, in case there is a # phasediff image with either a single EchoTime field (which # usually matches one of the magnitude EchoTimes), OR # a phasediff with an EchoTime1 and EchoTime2 # at least one of these rpool keys will have a None value, # which will be sorted out in gather_echo_times below rpool.set_data(f'{key}-TE', get_fmap_metadata, 'echo_time', {}, "", "fmap_TE_ingress") fmap_TE_list.append(f"{key}-TE") rpool.set_data(f'{key}-TE1', get_fmap_metadata, 'echo_time_one', {}, "", "fmap_TE1_ingress") fmap_TE_list.append(f"{key}-TE1") rpool.set_data(f'{key}-TE2', get_fmap_metadata, 'echo_time_two', {}, "", "fmap_TE2_ingress") fmap_TE_list.append(f"{key}-TE2") elif "magnitude" in key: rpool.set_data(f'{key}-TE', get_fmap_metadata, 'echo_time', {}, "", "fmap_TE_ingress") fmap_TE_list.append(f"{key}-TE") rpool.set_data(f'{key}-dwell', get_fmap_metadata, 'dwell_time', {}, "", "fmap_dwell_ingress") rpool.set_data(f'{key}-pedir', get_fmap_metadata, 'pe_direction', {}, "", "fmap_pedir_ingress") rpool.set_data(f'{key}-total-readout', get_fmap_metadata, 'total_readout', {}, "", "fmap_readout_ingress") if 'phase' in key or 'mag' in key: diff = True if re.match('epi_[AP]{2}', orig_key): blip = True if diff: calc_delta_ratio = pe.Node(Function( input_names=['effective_echo_spacing', 'echo_times'], output_names=['deltaTE', 'ees_asym_ratio'], function=calc_delta_te_and_asym_ratio, imports=['from typing import Optional, Tuple']), name=f'diff_distcor_calc_delta{name_suffix}') gather_echoes = pe.Node(Function( input_names=['echotime_1', 'echotime_2', 'echotime_3', 'echotime_4'], output_names=['echotime_list'], function=gather_echo_times), name='fugue_gather_echo_times') for idx, fmap_file in enumerate(fmap_TE_list, start=1): try: node, out_file = rpool.get(fmap_file)[ f"['{fmap_file}:fmap_TE_ingress']"]['data'] wf.connect(node, out_file, gather_echoes, f'echotime_{idx}') except KeyError: pass wf.connect(gather_echoes, 'echotime_list', calc_delta_ratio, 'echo_times') # Add in nodes to get parameters from configuration file # a node which checks if scan_parameters are present for each scan scan_params = pe.Node(Function( input_names=['data_config_scan_params', 'subject_id', 'scan', 'pipeconfig_tr', 'pipeconfig_tpattern', 'pipeconfig_start_indx', 'pipeconfig_stop_indx'], output_names=['tr', 'tpattern', 'template', 'ref_slice', 'start_indx', 'stop_indx', 'pe_direction', 'effective_echo_spacing'], function=get_scan_params, imports=['from CPAC.utils.utils import check, try_fetch_parameter'] ), name=f"bold_scan_params_{subject_id}{name_suffix}") scan_params.inputs.subject_id = subject_id scan_params.inputs.set( pipeconfig_start_indx=cfg.functional_preproc['truncation'][ 'start_tr'], pipeconfig_stop_indx=cfg.functional_preproc['truncation']['stop_tr']) node, out = rpool.get('scan')["['scan:func_ingress']"]['data'] wf.connect(node, out, scan_params, 'scan') # Workaround for extracting metadata with ingress if rpool.check_rpool('derivatives-dir'): selectrest_json = pe.Node(function.Function(input_names=['scan', 'rest_dict', 'resource'], output_names=['file_path'], function=get_rest, as_module=True), name='selectrest_json') selectrest_json.inputs.rest_dict = sub_dict selectrest_json.inputs.resource = "scan_parameters" wf.connect(node, out, selectrest_json, 'scan') wf.connect(selectrest_json, 'file_path', scan_params, 'data_config_scan_params') else: # wire in the scan parameter workflow node, out = rpool.get('scan-params')[ "['scan-params:scan_params_ingress']"]['data'] wf.connect(node, out, scan_params, 'data_config_scan_params') rpool.set_data('TR', scan_params, 'tr', {}, "", "func_metadata_ingress") rpool.set_data('tpattern', scan_params, 'tpattern', {}, "", "func_metadata_ingress") rpool.set_data('template', scan_params, 'template', {}, "", "func_metadata_ingress") rpool.set_data('start-tr', scan_params, 'start_indx', {}, "", "func_metadata_ingress") rpool.set_data('stop-tr', scan_params, 'stop_indx', {}, "", "func_metadata_ingress") rpool.set_data('pe-direction', scan_params, 'pe_direction', {}, "", "func_metadata_ingress") if diff: # Connect EffectiveEchoSpacing from functional metadata rpool.set_data('effectiveEchoSpacing', scan_params, 'effective_echo_spacing', {}, '', 'func_metadata_ingress') node, out_file = rpool.get('effectiveEchoSpacing')[ "['effectiveEchoSpacing:func_metadata_ingress']"]['data'] wf.connect(node, out_file, calc_delta_ratio, 'effective_echo_spacing') rpool.set_data('deltaTE', calc_delta_ratio, 'deltaTE', {}, '', 'deltaTE_ingress') rpool.set_data('ees-asym-ratio', calc_delta_ratio, 'ees_asym_ratio', {}, '', 'ees_asym_ratio_ingress') return wf, rpool, diff, blip, fmap_rp_list
[docs]def create_general_datasource(wf_name): from CPAC.pipeline import nipype_pipeline_engine as pe import nipype.interfaces.utility as util wf = pe.Workflow(name=wf_name) inputnode = pe.Node(util.IdentityInterface( fields=['unique_id', 'data', 'scan', 'creds_path', 'dl_dir'], mandatory_inputs=True), name='inputnode') check_s3_node = pe.Node(function.Function(input_names=['file_path', 'creds_path', 'dl_dir', 'img_type'], output_names=['local_path'], function=check_for_s3, as_module=True), name='check_for_s3') check_s3_node.inputs.img_type = "other" wf.connect(inputnode, 'data', check_s3_node, 'file_path') wf.connect(inputnode, 'creds_path', check_s3_node, 'creds_path') wf.connect(inputnode, 'dl_dir', check_s3_node, 'dl_dir') outputnode = pe.Node(util.IdentityInterface(fields=['unique_id', 'data', 'scan']), name='outputspec') wf.connect(inputnode, 'unique_id', outputnode, 'unique_id') wf.connect(inputnode, 'scan', outputnode, 'scan') wf.connect(check_s3_node, 'local_path', outputnode, 'data') return wf
[docs]def create_check_for_s3_node(name, file_path, img_type='other', creds_path=None, dl_dir=None, map_node=False): if map_node: check_s3_node = pe.MapNode(function.Function(input_names=['file_path', 'creds_path', 'dl_dir', 'img_type'], output_names=[ 'local_path'], function=check_for_s3, as_module=True), iterfield=['file_path'], name='check_for_s3_%s' % name) else: check_s3_node = pe.Node(function.Function(input_names=['file_path', 'creds_path', 'dl_dir', 'img_type'], output_names=['local_path'], function=check_for_s3, as_module=True), name='check_for_s3_%s' % name) check_s3_node.inputs.set( file_path=file_path, creds_path=creds_path, dl_dir=dl_dir, img_type=img_type ) return check_s3_node
# Check if passed-in file is on S3
[docs]def check_for_s3(file_path, creds_path=None, dl_dir=None, img_type='other', verbose=False): # Import packages import os import nibabel as nib import botocore.exceptions from indi_aws import fetch_creds # Init variables s3_str = 's3://' if creds_path: if "None" in creds_path or "none" in creds_path or \ "null" in creds_path: creds_path = None if dl_dir is None: dl_dir = os.getcwd() if file_path is None: # in case it's something like scan parameters or field map files, but # we don't have any return None # TODO: remove this once scan parameter input as dictionary is phased out if isinstance(file_path, dict): # if this is a dictionary, just skip altogether local_path = file_path return local_path if file_path.lower().startswith(s3_str): file_path = s3_str + file_path[len(s3_str):] # Get bucket name and bucket object bucket_name = file_path[len(s3_str):].split('/')[0] # Extract relative key path from bucket and local path s3_prefix = s3_str + bucket_name s3_key = file_path[len(s3_prefix) + 1:] local_path = os.path.join(dl_dir, bucket_name, s3_key) # Get local directory and create folders if they dont exist local_dir = os.path.dirname(local_path) if not os.path.exists(local_dir): os.makedirs(local_dir, exist_ok=True) if os.path.exists(local_path): print("{0} already exists- skipping download.".format(local_path)) else: # Download file try: bucket = fetch_creds.return_bucket(creds_path, bucket_name) print("Attempting to download from AWS S3: {0}".format( file_path)) bucket.download_file(Key=s3_key, Filename=local_path) except botocore.exceptions.ClientError as exc: error_code = int(exc.response['Error']['Code']) err_msg = str(exc) if error_code == 403: err_msg = 'Access to bucket: "%s" is denied; using credentials ' \ 'in subject list: "%s"; cannot access the file "%s"' \ % (bucket_name, creds_path, file_path) elif error_code == 404: err_msg = 'File: {0} does not exist; check spelling and try ' \ 'again'.format( os.path.join(bucket_name, s3_key)) else: err_msg = 'Unable to connect to bucket: "%s". Error message:\n%s' \ % (bucket_name, exc) raise Exception(err_msg) except Exception as exc: err_msg = 'Unable to connect to bucket: "%s". Error message:\n%s' \ % (bucket_name, exc) raise Exception(err_msg) # Otherwise just return what was passed in, resolving if a link else: local_path = os.path.realpath(file_path) # Check if it exists or it is successfully downloaded if not os.path.exists(local_path): # alert users to 2020-07-20 Neuroparc atlas update (v0 to v1) ndmg_atlases = {} with open( os.path.join( os.path.dirname(os.path.dirname(__file__)), 'resources/templates/ndmg_atlases.csv' ) ) as ndmg_atlases_file: ndmg_atlases['v0'], ndmg_atlases['v1'] = zip(*[( f'/ndmg_atlases/label/Human/{atlas[0]}', f'/ndmg_atlases/label/Human/{atlas[1]}' ) for atlas in csv.reader( ndmg_atlases_file)]) if local_path in ndmg_atlases['v0']: raise FileNotFoundError( ''.join([ 'Neuroparc atlas paths were updated on July 20, 2020. ' 'C-PAC configuration files using Neuroparc v0 atlas paths ' '(including C-PAC default and preconfigured pipeline ' 'configurations from v1.6.2a and earlier) need to be ' 'updated to use Neuroparc atlases. Your current ' 'configuration includes the Neuroparc v0 path ' f'{local_path} which needs to be updated to ', ndmg_atlases['v1'][ndmg_atlases['v0'].index(local_path)], '. For a full list such paths, see https://fcp-indi.' 'github.io/docs/nightly/user/ndmg_atlases' ]) ) else: raise FileNotFoundError(f'File {local_path} does not exist!') if verbose: print("Downloaded file:\n{0}\n".format(local_path)) # Check image dimensionality if local_path.endswith('.nii') or local_path.endswith('.nii.gz'): img_nii = nib.load(local_path) if img_type == 'anat': if len(img_nii.shape) != 3: raise IOError('File: %s must be an anatomical image with 3 ' 'dimensions but %d dimensions found!' % (local_path, len(img_nii.shape))) elif img_type == 'func': if len(img_nii.shape) not in [3, 4]: raise IOError('File: %s must be a functional image with 3 or ' '4 dimensions but %d dimensions found!' % (local_path, len(img_nii.shape))) return local_path
[docs]def gather_extraction_maps(c): ts_analysis_dict = {} sca_analysis_dict = {} if hasattr(c, 'timeseries_extraction'): tsa_roi_dict = c.timeseries_extraction['tse_roi_paths'] # Timeseries and SCA config selections processing # flip the dictionary for roi_path in tsa_roi_dict.keys(): ts_analysis_to_run = [ x.strip() for x in tsa_roi_dict[roi_path].split(",") ] for analysis_type in ts_analysis_to_run: if analysis_type not in ts_analysis_dict.keys(): ts_analysis_dict[analysis_type] = [] ts_analysis_dict[analysis_type].append(roi_path) if c.timeseries_extraction['run']: if not tsa_roi_dict: err = "\n\n[!] CPAC says: Time Series Extraction is " \ "set to run, but no ROI NIFTI file paths were " \ "provided!\n\n" raise Exception(err) if c.seed_based_correlation_analysis['run']: try: sca_roi_dict = c.seed_based_correlation_analysis[ 'sca_roi_paths' ] except KeyError: err = "\n\n[!] CPAC says: Seed-based Correlation Analysis " \ "is set to run, but no ROI NIFTI file paths were " \ "provided!\n\n" raise Exception(err) # flip the dictionary for roi_path in sca_roi_dict.keys(): # update analysis dict for analysis_type in sca_roi_dict[roi_path].split(","): analysis_type = analysis_type.replace(" ", "") if analysis_type not in sca_analysis_dict.keys(): sca_analysis_dict[analysis_type] = [] sca_analysis_dict[analysis_type].append(roi_path) return (ts_analysis_dict, sca_analysis_dict)
[docs]def get_highest_local_res(template: Union[Path, str], tagname: str) -> Path: """Given a reference template path and a resolution string, get all resolutions of that template in the same local path and return the highest resolution. Parameters ---------- template : Path or str tagname : str Returns ------- str Raises ------ LookupError If no matching local template is found. Examples -------- >>> get_highest_local_res( ... '/cpac_templates/MacaqueYerkes19_T1w_2mm_brain.nii.gz', '2mm') PosixPath('/cpac_templates/MacaqueYerkes19_T1w_0.5mm_brain.nii.gz') >>> get_highest_local_res( ... '/cpac_templates/dne_T1w_2mm.nii.gz', '2mm') Traceback (most recent call last): ... LookupError: Could not find template /cpac_templates/dne_T1w_2mm.nii.gz """ from CPAC.pipeline.schema import RESOLUTION_REGEX if isinstance(template, str): template = Path(template) template_pattern = ( RESOLUTION_REGEX.replace('^', '').replace('$', '').join([ re.escape(_part) for _part in template.name.split(tagname, 1)])) matching_templates = [file for file in template.parent.iterdir() if re.match(template_pattern, file.name)] matching_templates.sort() try: return matching_templates[0] except (FileNotFoundError, IndexError): raise LookupError(f"Could not find template {template}")
[docs]def res_string_to_tuple(resolution): """ Converts a resolution string to a tuple of floats. Parameters ---------- resolution : str Resolution string, e.g. "3.438mmx3.438mmx3.4mm" Returns ------- resolution :tuple Tuple of floats, e.g. (3.438, 3.438, 3.4) """ if "x" in str(resolution): return tuple( float(i.replace('mm', '')) for i in resolution.split("x")) return (float(resolution.replace('mm', '')),) * 3
[docs]def resolve_resolution(resolution, template, template_name, tag=None): from nipype.interfaces import afni from CPAC.pipeline import nipype_pipeline_engine as pe from CPAC.utils.datasource import check_for_s3 tagname = None local_path = None if "{" in template and tag is not None: tagname = "${" + tag + "}" try: if tagname is not None: local_path = check_for_s3( template.replace(tagname, str(resolution))) except (IOError, OSError): local_path = None ## TODO debug - it works in ipython but doesn't work in nipype wf # try: # local_path = check_for_s3('/usr/local/fsl/data/standard/MNI152_T1_3.438mmx3.438mmx3.4mm_brain_mask_dil.nii.gz') # except (IOError, OSError): # local_path = None if local_path is None: if tagname is not None: if template.startswith('s3:'): ref_template = template.replace(tagname, '1mm') local_path = check_for_s3(ref_template) else: local_path = get_highest_local_res(template, tagname) elif tagname is None and template.startswith('s3:'): local_path = check_for_s3(template) else: local_path = template resample = pe.Node(interface=afni.Resample(), name=template_name, mem_gb=0, mem_x=(0.0115, 'in_file', 't')) resample.inputs.voxel_size = res_string_to_tuple(resolution) resample.inputs.outputtype = 'NIFTI_GZ' resample.inputs.resample_mode = 'Cu' resample.inputs.in_file = local_path resample.base_dir = '.' resampled_template = resample.run() local_path = resampled_template.outputs.out_file return local_path
[docs]def create_anat_datasource(wf_name='anat_datasource'): from CPAC.pipeline import nipype_pipeline_engine as pe import nipype.interfaces.utility as util wf = pe.Workflow(name=wf_name) inputnode = pe.Node(util.IdentityInterface( fields=['subject', 'anat', 'creds_path', 'dl_dir', 'img_type'], mandatory_inputs=True), name='inputnode') check_s3_node = pe.Node(function.Function(input_names=['file_path', 'creds_path', 'dl_dir', 'img_type'], output_names=['local_path'], function=check_for_s3, as_module=True), name='check_for_s3') wf.connect(inputnode, 'anat', check_s3_node, 'file_path') wf.connect(inputnode, 'creds_path', check_s3_node, 'creds_path') wf.connect(inputnode, 'dl_dir', check_s3_node, 'dl_dir') wf.connect(inputnode, 'img_type', check_s3_node, 'img_type') outputnode = pe.Node(util.IdentityInterface(fields=['subject', 'anat']), name='outputspec') wf.connect(inputnode, 'subject', outputnode, 'subject') wf.connect(check_s3_node, 'local_path', outputnode, 'anat') # Return the workflow return wf
[docs]def create_roi_mask_dataflow(masks, wf_name='datasource_roi_mask'): import os mask_dict = {} for mask_file in masks: mask_file = mask_file.rstrip('\r\n') if mask_file.strip() == '' or mask_file.startswith('#'): continue name, desc = lookup_identifier(mask_file) if name == 'template': base_file = os.path.basename(mask_file) try: valid_extensions = ['.nii', '.nii.gz'] base_name = [ base_file[:-len(ext)] for ext in valid_extensions if base_file.endswith(ext) ][0] for key in ['res', 'space']: base_name = bids_remove_entity(base_name, key) except IndexError: # pylint: disable=raise-missing-from raise ValueError('Error in spatial_map_dataflow: File ' f'extension of {base_file} not ".nii" or ' '.nii.gz') except Exception as e: raise e else: base_name = format_identifier(name, desc) if base_name in mask_dict: raise ValueError('Duplicate templates/atlases not allowed: ' f'{mask_file} {mask_dict[base_name]}') mask_dict[base_name] = mask_file wf = pe.Workflow(name=wf_name) inputnode = pe.Node(util.IdentityInterface(fields=['mask', 'mask_file', 'creds_path', 'dl_dir'], mandatory_inputs=True), name='inputspec') mask_keys, mask_values = \ zip(*mask_dict.items()) inputnode.synchronize = True inputnode.iterables = [ ('mask', mask_keys), ('mask_file', mask_values), ] check_s3_node = pe.Node(function.Function(input_names=['file_path', 'creds_path', 'dl_dir', 'img_type'], output_names=['local_path'], function=check_for_s3, as_module=True), name='check_for_s3') wf.connect(inputnode, 'mask_file', check_s3_node, 'file_path') wf.connect(inputnode, 'creds_path', check_s3_node, 'creds_path') wf.connect(inputnode, 'dl_dir', check_s3_node, 'dl_dir') check_s3_node.inputs.img_type = 'mask' outputnode = pe.Node(util.IdentityInterface(fields=['out_file', 'out_name']), name='outputspec') wf.connect(check_s3_node, 'local_path', outputnode, 'out_file') wf.connect(inputnode, 'mask', outputnode, 'out_name') return wf
[docs]def create_spatial_map_dataflow(spatial_maps, wf_name='datasource_maps'): import os wf = pe.Workflow(name=wf_name) spatial_map_dict = {} for spatial_map_file in spatial_maps: spatial_map_file = spatial_map_file.rstrip('\r\n') base_file = os.path.basename(spatial_map_file) try: valid_extensions = ['.nii', '.nii.gz'] base_name = [ base_file[:-len(ext)] for ext in valid_extensions if base_file.endswith(ext) ][0] if base_name in spatial_map_dict: raise ValueError( 'Files with same name not allowed: %s %s' % ( spatial_map_file, spatial_map_dict[base_name] ) ) spatial_map_dict[base_name] = spatial_map_file except IndexError as e: raise Exception('Error in spatial_map_dataflow: ' 'File extension not in .nii and .nii.gz') inputnode = pe.Node(util.IdentityInterface(fields=['spatial_map', 'spatial_map_file', 'creds_path', 'dl_dir'], mandatory_inputs=True), name='inputspec') spatial_map_keys, spatial_map_values = \ zip(*spatial_map_dict.items()) inputnode.synchronize = True inputnode.iterables = [ ('spatial_map', spatial_map_keys), ('spatial_map_file', spatial_map_values), ] check_s3_node = pe.Node(function.Function(input_names=['file_path', 'creds_path', 'dl_dir', 'img_type'], output_names=['local_path'], function=check_for_s3, as_module=True), name='check_for_s3') wf.connect(inputnode, 'spatial_map_file', check_s3_node, 'file_path') wf.connect(inputnode, 'creds_path', check_s3_node, 'creds_path') wf.connect(inputnode, 'dl_dir', check_s3_node, 'dl_dir') check_s3_node.inputs.img_type = 'mask' select_spatial_map = pe.Node(util.IdentityInterface(fields=['out_file', 'out_name'], mandatory_inputs=True), name='select_spatial_map') wf.connect(check_s3_node, 'local_path', select_spatial_map, 'out_file') wf.connect(inputnode, 'spatial_map', select_spatial_map, 'out_name') return wf
[docs]def create_grp_analysis_dataflow(wf_name='gp_dataflow'): from CPAC.pipeline import nipype_pipeline_engine as pe import nipype.interfaces.utility as util from CPAC.utils.datasource import select_model_files wf = pe.Workflow(name=wf_name) inputnode = pe.Node(util.IdentityInterface(fields=['ftest', 'grp_model', 'model_name'], mandatory_inputs=True), name='inputspec') selectmodel = pe.Node(function.Function(input_names=['model', 'ftest', 'model_name'], output_names=['fts_file', 'con_file', 'grp_file', 'mat_file'], function=select_model_files, as_module=True), name='selectnode') wf.connect(inputnode, 'ftest', selectmodel, 'ftest') wf.connect(inputnode, 'grp_model', selectmodel, 'model') wf.connect(inputnode, 'model_name', selectmodel, 'model_name') outputnode = pe.Node(util.IdentityInterface(fields=['fts', 'grp', 'mat', 'con'], mandatory_inputs=True), name='outputspec') wf.connect(selectmodel, 'mat_file', outputnode, 'mat') wf.connect(selectmodel, 'grp_file', outputnode, 'grp') wf.connect(selectmodel, 'fts_file', outputnode, 'fts') wf.connect(selectmodel, 'con_file', outputnode, 'con') return wf
[docs]def resample_func_roi(in_func, in_roi, realignment, identity_matrix): import os import nibabel as nb from CPAC.utils.monitoring.custom_logging import log_subprocess # load func and ROI dimension func_img = nb.load(in_func) func_shape = func_img.shape roi_img = nb.load(in_roi) roi_shape = roi_img.shape # check if func size = ROI size, return func and ROI; else resample using flirt if roi_shape != func_shape: # resample func to ROI: in_file = func, reference = ROI if 'func_to_ROI' in realignment: in_file = in_func reference = in_roi out_file = os.path.join(os.getcwd(), in_file[in_file.rindex( '/') + 1:in_file.rindex('.nii')] + '_resampled.nii.gz') out_func = out_file out_roi = in_roi interp = 'trilinear' # resample ROI to func: in_file = ROI, reference = func elif 'ROI_to_func' in realignment: in_file = in_roi reference = in_func out_file = os.path.join(os.getcwd(), in_file[in_file.rindex( '/') + 1:in_file.rindex('.nii')] + '_resampled.nii.gz') out_func = in_func out_roi = out_file interp = 'nearestneighbour' cmd = ['flirt', '-in', in_file, '-ref', reference, '-out', out_file, '-interp', interp, '-applyxfm', '-init', identity_matrix] log_subprocess(cmd) else: out_func = in_func out_roi = in_roi return out_func, out_roi