From 89f14bc05c4bc15086c7e0f8735cc39c6e3792f6 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Mon, 26 Nov 2018 23:18:40 -0500
Subject: [PATCH] Do regenerate top level task-* stab file, retain only our
 custom fields

Otherwise it would be inconsistent and populated only with the first run analysis output, so multiple sessions etc would not really get all common values,
deminishing its value.
See https://github.com/nipy/heudiconv/issues/277
---
 heudiconv/bids.py  | 35 +++++++++++++++++++++++++++--------
 heudiconv/utils.py | 10 ++++++++--
 2 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/heudiconv/bids.py b/heudiconv/bids.py
index 97dfdec9..62795b20 100644
--- a/heudiconv/bids.py
+++ b/heudiconv/bids.py
@@ -76,6 +76,9 @@ def populate_bids_templates(path, defaults={}):
     #                    'Manufacturer', 'SliceTiming', ''}
     for fpath in find_files('.*_task-.*\_bold\.json', topdir=path,
                         exclude_vcs=True, exclude="/\.(datalad|heudiconv)/"):
+        #
+        # According to BIDS spec I think both _task AND _acq (may be more? _rec, _dir, ...?) should be retained?
+        # TODO: if we are to fix it, then old ones (without _acq) should be removed first
         task = re.sub('.*_(task-[^_\.]*(_acq-[^_\.]*)?)_.*', r'\1', fpath)
         json_ = load_json(fpath)
         if task not in tasks:
@@ -95,17 +98,33 @@ def populate_bids_templates(path, defaults={}):
             lgr.debug("Generating %s", events_file)
             with open(events_file, 'w') as f:
                 f.write("onset\tduration\ttrial_type\tresponse_time\tstim_file\tTODO -- fill in rows and add more tab-separated columns if desired")
+
     # extract tasks files stubs
     for task_acq, fields in tasks.items():
         task_file = op.join(path, task_acq + '_bold.json')
-        # do not touch any existing thing, it may be precious
-        if not op.lexists(task_file):
-            lgr.debug("Generating %s", task_file)
-            fields["TaskName"] = ("TODO: full task name for %s" %
-                                  task_acq.split('_')[0].split('-')[1])
-            fields["CogAtlasID"] = "TODO"
-            with open(task_file, 'w') as f:
-                f.write(json_dumps_pretty(fields, indent=2, sort_keys=True))
+        # Since we are pulling all unique fields we have to possibly
+        # rewrite this file to guarantee consistency.
+        # See https://github.com/nipy/heudiconv/issues/277 for a usecase/bug
+        # when we didn't touch existing one.
+        # But the fields we enter (TaskName and CogAtlasID) might need need
+        # to be populated from the file if it already exists
+        placeholders = {
+            "TaskName": ("TODO: full task name for %s" %
+                           task_acq.split('_')[0].split('-')[1]),
+            "CogAtlasID": "TODO",
+        }
+        if op.lexists(task_file):
+            j = load_json(task_file)
+            # Retain possibly modified placeholder fields
+            for f in placeholders:
+                if f in j:
+                    placeholders[f] = j[f]
+            act = "Regenerating"
+        else:
+            act = "Generating"
+        lgr.debug("%s %s", act, task_file)
+        fields.update(placeholders)
+        save_json(fields, indent=2, sort_keys=True, pretty=True)
 
 
 def tuneup_bids_json_files(json_files):
diff --git a/heudiconv/utils.py b/heudiconv/utils.py
index 3539a5a9..e40806a6 100644
--- a/heudiconv/utils.py
+++ b/heudiconv/utils.py
@@ -171,7 +171,7 @@ def assure_no_file_exists(path):
         os.unlink(path)
 
 
-def save_json(filename, data, indent=4):
+def save_json(filename, data, indent=4, sort_keys=True, pretty=False):
     """Save data to a json file
 
     Parameters
@@ -180,11 +180,17 @@ def save_json(filename, data, indent=4):
         Filename to save data in.
     data : dict
         Dictionary to save in json file.
+    indent : int, optional
+    sort_keys : bool, optional
+    pretty : bool, optional
 
     """
     assure_no_file_exists(filename)
     with open(filename, 'w') as fp:
-        fp.write(_canonical_dumps(data, sort_keys=True, indent=indent))
+        fp.write(
+            (json_dumps_pretty if pretty else _canonical_dumps)(
+                data, sort_keys=sort_keys, indent=indent)
+        )
 
 
 def json_dumps_pretty(j, indent=2, sort_keys=True):