clamsproject · jyoune · Jul 10, 2024 · Jul 16, 2024 · Jul 16, 2024 · Jul 17, 2024
diff --git a/prototype/config.yml b/prototype/config.yml
@@ -0,0 +1 @@
+storage_dir: C:\Users\eljor\desktop\fast_playground\
diff --git a/prototype/storage_api.py b/prototype/storage_api.py
@@ -0,0 +1,175 @@
+from mmif import Mmif
+from clams import mmif_utils
+from flask import Flask, request, jsonify, send_from_directory
+from enum import Enum
+from pydantic import BaseModel
+from typing import List, Dict
+from typing_extensions import Annotated
+import os
+import yaml
+import hashlib
+import json
+
+
+app = Flask(__name__)
+# get post request from user
+# read mmif inside post request, get view metadata
+# store in nested directory relating to view metadata
+
+# TODO: this app accepts "unresolvable" as an app version number; it needs to be fixed because
+# TODO: "unresolvable" is not specific and can represent multiple versions.
+
+
+@app.route("/")
+def root():
+    return {"message": "Storage api for pipelined mmif files"}
+
+
+@app.route("/upload_mmif/", methods=["POST"])
+def upload_mmif():
+    body = request.get_data(as_text=True)
+    # read local storage directory from config.yml
+    with open('config.yml', 'r') as file:
+        config = yaml.safe_load(file)
+    directory = config['storage_dir']
+    mmif = Mmif(body)
+    # get guid from location
+    # document = body.[0]['properties']['location'].split('/')[2].split('.')[0]
+    document = mmif.documents['d1']['properties'].location.split('/')[2].split('.')[0]
+    # append '.mmif' to guid
+    document = document + '.mmif'
+    # IMPORTANT: In order to enable directory creation after this loop and also store each parameter
+    # dictionary in its proper directory, I create a dictionary to associate the current path level with
+    # its param dict. After this loop, I create the dirs and then iterate through this dictionary to
+    # place the param dicts in their proper spots.
+    param_path_dict = {}
+    for view in mmif.views:
+        # this should return the back half of the app url, so just app name and version number
+        subdir_list = view.metadata.app.split('/')[3:]
+        # create path string for this view
+        view_path = os.path.join('', *subdir_list)
+        # now we want to convert the parameter dictionary to a string and then hash it.
+        # this hash will be the name of another subdirectory.
+        try:
+            param_dict = view.metadata["parameters"]
+            param_list = ['='.join(pair) for pair in param_dict.items()]
+            param_list.sort()
+            param_string = ','.join(param_list)
+        except KeyError:
+            param_dict = ""
+            param_string = ""
+        # hash the (sorted and concatenated list of params) string and join with path
+        # NOTE: this is *not* for security purposes, so the usage of md5 is not an issue.
+        param_hash = hashlib.md5(param_string.encode('utf-8')).hexdigest()
+        view_path = os.path.join(view_path, param_hash)
+        # check if this is a duplicate view. if it is, skip the current view.
+        # NOTE: duplicate views are those with the same app, version number, AND parameter dict.
+        if view_path in directory:
+            continue
+        # create path by joining directory with the current view path
+        directory = os.path.join(directory, view_path)
+        # now that we know it's not a duplicate view and we have the proper path location, we
+        # store it and the associated param dict inside param_path_dict.
+        param_path_dict[directory] = param_dict
+    # we have finished looping through the views. now time to create the directories
+    # and dump the param dicts
+    os.makedirs(directory, exist_ok=True)
+    for path in param_path_dict:
+        file_path = os.path.join(path, 'parameters.json')
+        with open(file_path, "w") as f:
+            json.dump(param_path_dict[path], f)
+    # put mmif into the lowest level directory with filename based on guid
+    file_path = os.path.join(directory, document)
+    with open(file_path, "w") as f:
+        f.write(mmif.serialize())
+    return "Success", 201
+
+
+@app.route("/retrieve/", methods=["POST"])
+def download_mmif():
+    # if not request.is_json:
+    #     return {'error': 'Request must be JSON'}, 400
+    data = json.loads(request.data.decode('utf-8'))
+    # get both pipeline and guid from data
+    # obtain pipeline using helper method
+    pipeline = pipeline_from_param_json(data)
+    # get number of views for rewind if necessary
+    num_views = len(data['pipeline'])
+    guid = data.get('guid')
+    # validate existence of both args
+    if not pipeline or not guid:
+        return jsonify({'error': 'Missing required parameters: need both pipeline & guid'})
+    # concat pipeline with local storage
+    with open('config.yml', 'r') as file:
+        config = yaml.safe_load(file)
+    storage = config['storage_dir']
+    pipeline = os.path.join(storage, pipeline)
+    guid = guid + ".mmif"
+    # get file from storage directory
+    path = os.path.join(pipeline, guid)
+    # if file exists, we can return it
+    try:
+        with open(path, 'r') as file:
+            mmif = file.read()
+        return mmif
+    # otherwise we will use the rewinder
+    # this assumes the user has provided a subset of a mmif pipeline that we have previously stored
+    # in the case where this is not true, we return a FileNotFound error.
+    except FileNotFoundError:
+        return rewind_time(pipeline, guid, num_views)
+
+
+# helper method for extracting pipeline
+def pipeline_from_param_json(param_json):
+    """
+    This method reads in a json containing the names of the pipelined apps and their
+    respective parameters, and then builds a path out of the pipelined apps and hashed
+    parameters.
+    """
+    pipeline = ""
+    for clams_app in param_json["pipeline"]:
+        # not using os path join until later for testing purposes
+        pipeline = pipeline + "/" + clams_app
+        # try to get param items
+        try:
+            param_list = ['='.join(pair) for pair in param_json["pipeline"][clams_app].items()]
+            param_list.sort()
+            param_string = ','.join(param_list)
+        # throws attribute error if empty (because empty means it's a set and not dict)
+        except AttributeError:
+            param_string = ""
+        # hash parameters
+        param_hash = hashlib.md5(param_string.encode('utf-8')).hexdigest()
+        pipeline = pipeline + "/" + param_hash
+    # removing first "/" so it doesn't mess with os.path.join later
+    pipeline = pipeline[1:]
+    return pipeline
+
+
+def rewind_time(pipeline, guid, num_views):
+    """
+    This method takes in a pipeline (path), a guid, and a number of views, and uses os.walk to iterate through
+    directories that begin with that pipeline. It takes the first mmif file that matches the guid and uses the
+    rewind feature to include only the views indicated by the pipeline.
+    """
+    for home, dirs, files in os.walk(pipeline):
+        # find mmif with matching guid to rewind
+        for file in files:
+            if guid == file:
+                # rewind the mmif
+                with open(os.path.join(home, file), 'r') as f:
+                    mmif = Mmif(f.read())
+                    # we need to calculate the number of views to rewind
+                    rewound = mmif_utils.rewind.rewind_mmif(mmif, len(mmif.views) - num_views)
+                return rewound.serialize()
+    raise FileNotFoundError
+
+
+
+
+if __name__ == "__main__":
+    app.run(port=8912)
+
+
+
+
diff --git a/prototype/test.json b/prototype/test.json
@@ -0,0 +1,22 @@
+{
+  "pipeline": {
+    "swt-detection/unresolvable": {
+    },
+
+    "simple-timepoints-stitcher/v1.3": {
+      "labelMap": "['B:bars', 'S:slate', 'S-H:slate', 'S-C:slate', 'S-D:slate', 'S-G:slate', 'I:chyron', 'N:chyron', 'Y:chyron', 'C:credits']"
+    },
+
+    "doctr-wrapper/unresolvable": {
+      "tfLabel": "chyron",
+      "pretty": ""
+    },
+
+    "doctr-wrapper": {
+      "tfLabel": "['credits', 'credit']",
+      "pretty": ""
+    }
+
+  },
+  "guid": "cpb-aacip-0515ac167c0"
+}
diff --git a/prototype/test_rewind.json b/prototype/test_rewind.json
@@ -0,0 +1,12 @@
+{
+  "pipeline": {
+    "swt-detection/unresolvable": {
+    },
+
+    "simple-timepoints-stitcher/v1.3": {
+      "labelMap": "['B:bars', 'S:slate', 'S-H:slate', 'S-C:slate', 'S-D:slate', 'S-G:slate', 'I:chyron', 'N:chyron', 'Y:chyron', 'C:credits']"
+    }
+
+  },
+  "guid": "cpb-aacip-0515ac167c0"
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		storage_dir: C:\Users\eljor\desktop\fast_playground\