Merge branch 'release/v0.15.0' #104

UAL-RE · Oct 9, 2020 · 42eccb1 · 42eccb1
2 parents c53f031 + e6134bd
commit 42eccb1
Show file tree

Hide file tree

Showing 9 changed files with 250 additions and 65 deletions.
diff --git a/README.md b/README.md
@@ -24,17 +24,16 @@ It follows a workflow that was developed by members of the
 The software has a number of backend features, such as:
  1. Retrieving private datasets from the [Figshare API](https://docs.figshare.com)
     that are undergoing curatorial review
- 2. Identifying whether a README.txt file is present in the ReDATA deposit.
-    If such a file does not exists, it will provide ReDATA curators a
-    copy of the [README.txt template](https://osf.io/sj8xv/download).
-    Ultimately, it will perform an inspection to ensure that the README.txt
-    adheres to a defined format and populates metadata information based on
-    information submitted to ReDATA
+ 2. Constructing a README.txt file based on information from the deposit's
+    metadata and information provided by the researchers using a Qualtrics
+    form that walks the users through additional information
  3. Retrieving a [Deposit Agreement Form](https://bit.ly/ReDATA_DepositAgreement)
     from Qualtrics, which is a requirement for all ReDATA deposits
  4. Retrieving a copy of [Curatorial Review Report template (MS-Word)](https://bit.ly/ReDATA_CurationTemplate)
     for ReDATA curators to complete.
- 5. Supporting ReDATA curators with access and workflow management through
+ 5. Creating a hierarchical folder structure the supports library preservation
+    and archive
+ 6. Supporting ReDATA curators with access and workflow management through
     standard UNIX commands
 
 These backend services ingest the datasets and accompanying files (described above)
@@ -200,6 +199,10 @@ We use [SemVer](http://semver.org/) for versioning. For the versions available,
 A list of released features and their issue number(s).
 List is sorted from moderate to minor revisions for reach release.
 
+v0.15.0:
+ * Implementation of Qualtrics README file #98
+ * Folder re-structuring for versioning #100
+
 v0.14.0 - 0.14.1:
  * Full stdout and file logging #83
  * Configuration handling using dictionary structure #87, #93

diff --git a/ldcoolp/config/default.ini b/ldcoolp/config/default.ini
@@ -61,3 +61,6 @@ download_url = https://%(dataCenter)s.qualtrics.com/Q/Data/Ajax/GetSingleRespons
 
 # Base URL for survey submission
 generate_url = https://%(dataCenter)s.qualtrics.com/jfe/form/
+
+# README Qualtrics settings
+readme_survey_id = ***override***
diff --git a/ldcoolp/curation/api/qualtrics.py b/ldcoolp/curation/api/qualtrics.py
@@ -2,6 +2,12 @@
 import io
 from os import remove
 
+# base64 encoding/decoding
+import base64
+
+# Text handling for README
+from html2text import html2text
+
 # CSV handling
 import zipfile
 import pandas as pd
@@ -31,6 +37,10 @@
 # Column order for markdown print-out of Qualtrics table
 cols_order = ['ResponseId', 'Q4_1', 'Q5', 'Q6_1', 'Q7']
 
+readme_cols_order = ['ResponseId', 'article_id', 'curation_id']
+
+readme_custom_content = ['cite', 'summary', 'files', 'materials', 'contrib', 'notes']
+
 
 class Qualtrics:
     """
@@ -77,7 +87,7 @@ class Qualtrics:
       List all surveys for a user in a dictionary form:
       See: https://api.qualtrics.com/docs/managing-surveys#list-surveys
 
-    get_survey_responses()
+    get_survey_responses(survey_id)
       Retrieve pandas DataFrame containing responses for a survey
       See: https://api.qualtrics.com/docs/getting-survey-responses-via-the-new-export-apis
 
@@ -107,6 +117,8 @@ def __init__(self, qualtrics_dict=config_default_dict['qualtrics'], log=None):
         self.survey_id = self.dict['survey_id']
         self.file_format = 'csv'
 
+        self.readme_survey_id = self.dict['readme_survey_id']
+
         # Logging
         self.file_logging = False
         if isinstance(log, type(None)):
@@ -131,12 +143,12 @@ def list_surveys(self):
 
         return survey_dict
 
-    def get_survey_responses(self, verbose=False):
+    def get_survey_responses(self, survey_id, verbose=False):
         """Retrieve pandas DataFrame containing responses for a survey"""
 
         progress_status = "inProgress"
 
-        download_url = self.endpoint(f"surveys/{self.survey_id}/export-responses")
+        download_url = self.endpoint(f"surveys/{survey_id}/export-responses")
 
         # Create Data Export
         download_payload = {"format": self.file_format}
@@ -189,7 +201,7 @@ def pandas_write_buffer(self, df):
     def find_deposit_agreement(self, dn_dict):
         """Get Response ID based on a match search for depositor name"""
 
-        qualtrics_df = self.get_survey_responses()
+        qualtrics_df = self.get_survey_responses(self.survey_id)
 
         # First perform search via article_id or curation_id
         self.log.info("Attempting to identify using article_id or curation_id ...")
@@ -303,3 +315,122 @@ def generate_url(self, dn_dict):
                    urlencode(query_str_dict, safe=url_safe, quote_via=quote)
 
         return full_url
+
+    def generate_readme_url(self, dn):
+        """Generate URL for README tool using Q_EED option"""
+
+        df_curation = dn.curation_dict
+
+        # Preferred citation
+        single_str_citation = df_curation['item']['citation']
+
+        # handle period in author list.  Assume no period in dataset title
+        str_list = list([single_str_citation.split('):')[0] + '). '])
+        str_list += [str_row + '.' for str_row in single_str_citation.split('):')[1].split('. ')]
+
+        citation_list = [content for content in str_list[0:-2]]
+        citation_list.append(f"{str_list[-2]} {str_list[-1]}")
+        citation_list = ' <br> '.join(citation_list)
+
+        # summary
+        figshare_description = df_curation['item']['description']
+
+        query_str_dict = {'article_id': dn.name_dict['article_id'],
+                          'curation_id': dn.name_dict['curation_id'],
+                          'title': dn.name_dict['title'],
+                          'depositor_name': dn.name_dict['simplify_fullName'],
+                          'preferred_citation': citation_list,
+                          'license': df_curation['item']['license']['name'],
+                          'summary': figshare_description}
+        # doi
+        if not df_curation['item']['doi']:  # empty case
+            query_str_dict['doi'] = f"https://doi.org/10.25422/azu.data.{dn.name_dict['article_id']}"
+        else:
+            query_str_dict['doi'] = f"https://doi.org/{df_curation['item']['doi']}"
+
+        # links
+        if not df_curation['item']['references']:  # not empty case
+            links = " <br> ".join(df_curation['item']['references'])
+            query_str_dict['links'] = links
+
+        # query_str_encode = str(query_str_dict).encode('base64', 'strict')
+        q_eed = base64.urlsafe_b64encode(json.dumps(query_str_dict).encode()).decode()
+
+        full_url = f"{self.dict['generate_url']}{self.readme_survey_id}?" + \
+                   'Q_EED=' + q_eed
+
+        return full_url
+
+    def find_qualtrics_readme(self, dn_dict):
+        """Get Response ID based on a article_id,curation_id search"""
+
+        qualtrics_df = self.get_survey_responses(self.readme_survey_id)
+
+        # First perform search via article_id or curation_id
+        self.log.info("Attempting to identify using article_id or curation_id ...")
+        article_id = str(dn_dict['article_id'])
+        curation_id = str(dn_dict['curation_id'])
+
+        try:
+            response_df = qualtrics_df[(qualtrics_df['article_id'] == article_id) |
+                                       (qualtrics_df['curation_id'] == curation_id)]
+        except KeyError:
+            self.log.warn("article_id and curation_id not in qualtrics survey !")
+            response_df = pd.DataFrame()
+
+        if not response_df.empty:
+            self.log.info("Unique match based on article_id or curation_id !")
+            if response_df.shape[0] != 1:
+                self.log.warn("More than one entries found !!!")
+
+            self.pandas_write_buffer(response_df[readme_cols_order])
+
+        if response_df.empty:
+            self.log.warn("Empty DataFrame")
+            raise ValueError
+        else:
+            if response_df.shape[0] == 1:
+                response_dict = df_to_dict_single(response_df)
+                self.log.info("Only one entry found!")
+                self.log.info(f"Survey completed on {response_dict['date_completed']}")
+                self.log.info(f" ... for {response_dict['article_id']}")
+                return response_dict['ResponseId'], response_df
+            else:
+                self.log.warn("Multiple entries found")
+                response_df = pd.DataFrame()
+                self.pandas_write_buffer(response_df[readme_cols_order])
+
+                raise ValueError
+
+    def retrieve_qualtrics_readme(self, dn_dict=None, ResponseId=None, browser=True):
+        """Opens web browser to navigate to a page with Deposit Agreement Form"""
+
+        if isinstance(ResponseId, type(None)):
+            try:
+                ResponseId, response_df = self.find_qualtrics_readme(dn_dict)
+                self.log.info(f"Qualtrics README ResponseID : {ResponseId}")
+
+                qualtrics_dict = df_to_dict_single(response_df[readme_custom_content])
+                for key in qualtrics_dict.keys():
+                    if isinstance(qualtrics_dict[key], float):
+                        qualtrics_dict[key] = str(qualtrics_dict[key])
+
+                # Separate cite, contrib for list style
+                for field in ['cite', 'contrib']:
+                    if qualtrics_dict[field] != 'nan':
+                        qualtrics_dict[field] = qualtrics_dict[field].split('\n')
+
+                # Markdown files, materials
+                for field in ['files', 'materials']:
+                    if qualtrics_dict[field] != 'nan':
+                        qualtrics_dict[field] = html2text(qualtrics_dict[field])
+                    # Strip extra white space from html2text
+                    if qualtrics_dict[field][-2:] == "\n\n":
+                        qualtrics_dict[field] = qualtrics_dict[field][:-2]
+
+                return qualtrics_dict
+            except ValueError:
+                self.log.warn("Error with retrieving ResponseId")
+                self.log.info("PROMPT: If you wish, you can manually enter ResponseId to retrieve.")
+                ResponseId = input("PROMPT: An EMPTY RETURN will generate a custom Qualtrics link to provide ... ")
+                self.log.info(f"RESPONSE: {ResponseId}")
diff --git a/ldcoolp/curation/depositor_name.py b/ldcoolp/curation/depositor_name.py
@@ -142,7 +142,7 @@ def get_folder_name(self):
         else:
             new_vers = self.curation_dict['version'] + 1
 
-        folderName += f"_{self.article_id}_v{new_vers}"
+        folderName += f"_{self.article_id}/v{new_vers}"
 
         if self.verbose:
             self.log.info(f"depository_name : {folderName}")

diff --git a/ldcoolp/curation/inspection/readme/README_template.md b/ldcoolp/curation/inspection/readme/README_template.md
@@ -1,88 +1,104 @@
 ---------------------------------------------
-# {{ readme_dict.title }}
+# {{ figshare_dict.title }}
 
+Preferred citation (DataCite format):
 
-Preferred citation (DataCite format):  
-{% for cite_text in readme_dict.preferred_citation %}
+{% if qualtrics_dict.cite == 'nan' %}
+{% for cite_text in figshare_dict.preferred_citation %}
   {{ cite_text }}
-{%- endfor %}
-
-{#
- Example (for a journal article):
-  LastName1, FirstName1; LastName2, FirstName2; LastName3, FirstName3; etc. (YYYY).
-  "[Title of Article]".
-  Journal name, and journal information (e.g., volume, issue, page numbers)
-  [DOI link to publication]
-
- Example (for ReDATA DOI):
-  LastName1, FirstName1; LastName2, FirstName2; LastName3, FirstName3; etc. (YYYY).
-  "[Title of Your Dataset Here]".
-  University of Arizona Research Data Repository.
-  [Item Type]. https://doi.org/10.25422/azu.data.[DOI_NUMBER]
-#}
+{% endfor %}
+{% else %}
+{% for cite_text in qualtrics_dict.cite %}
+  {{ cite_text }}
+{% endfor %}
+{% endif %}
+
 
 Corresponding Author:   
-  {{ readme_dict.firstname }} {{ readme_dict.lastname }}, University of Arizona, {{ readme_dict.email }}
+  {{ figshare_dict.firstname }} {{ figshare_dict.lastname }}, University of Arizona, {{ figshare_dict.email }}
 
 
 License:
-  {{ readme_dict.license }}
+  {{ figshare_dict.license }}
 
 
 DOI:
-  https://doi.org/{{ readme_dict.doi }}
+  https://doi.org/{{ figshare_dict.doi }}
+
 
 
 ---------------------------------------------
 ## Summary
 
-{{ readme_dict.description }}
+{{ figshare_dict.description }}
+{% if qualtrics_dict.summary != 'nan' %}
+
+{{ qualtrics_dict.summary }}
+{% endif %}
+{% if ( (qualtrics_dict.files != 'nan') or
+        (qualtrics_dict.materials != 'nan') or
+        (qualtrics_dict.contrib != 'nan') or
+        (qualtrics_dict.notes != 'nan') or
+        (figshare_dict.references != []) ) %}
 
 
 
+{% endif %}
+{% if qualtrics_dict.files != 'nan' %}
 ---------------------------------------------
 ## Files and Folders
 
-
-#### [Folder 1 Name]: [Description of contents]
-- [file1 or set of related files]: [Purpose, contents, naming convention, etc.]
-- [file2 or set of related files]: [Purpose, contents, naming convention, etc.]
-
-#### [Folder 1 Name]/[Subfolder 1 Name]: [Description of contents]
-- [file1 or set of related files]: [Purpose, contents, naming convention, etc.]
-
-#### [Folder 2 Name]: [Description of contents]
-- [file1 or set of related files]: [Purpose, contents, naming convention, etc.]
-- [file2 or set of related files]: [Purpose, contents, naming convention, etc.]
+{{ qualtrics_dict.files }}
+{% endif %}
+{% if ( (qualtrics_dict.materials != 'nan') or
+        (qualtrics_dict.contrib != 'nan') or
+        (qualtrics_dict.notes != 'nan') or
+        (figshare_dict.references != []) ) %}
 
 
 
+{% endif %}
+{% if qualtrics_dict.materials != 'nan' %}
 ---------------------------------------------
 ## Materials & Methods
 
-- [Software program 1 w/version number], [URL, DOI, citation, etc.]. [Short description of why it's needed].
-- [Software program 2 w/version number], [URL, DOI, citation, etc.]. [Short description of why it's needed].
-- [Instrument name/model], [manufacturer]. [Short description of why it's needed].
+{{ qualtrics_dict.materials }}
+{% endif %}
+{% if ( (qualtrics_dict.contrib != 'nan') or
+        (qualtrics_dict.notes != 'nan') or
+        (figshare_dict.references != []) ) %}
 
 
 
+{% endif %}
+{% if qualtrics_dict.contrib != 'nan' %}
 ---------------------------------------------
 ## Contributor Roles
 
 The roles are defined by the CRediT taxonomy http://credit.niso.org/
 
-- [FirstName1 LastName1], [Affiliation]: [role1], [role2], [...]
-- [FirstName2 LastName2], [Affiliation]: [role1], [role2], [...]
-- [FirstName3 LastName3], [Affiliation]: [role3], [role4], [...]
+{% for contrib in qualtrics_dict.contrib %}
+  - {{ contrib }}
+{% endfor -%}
+{% endif %}
+{% if ( (qualtrics_dict.notes != 'nan') or
+        (figshare_dict.references != []) ) %}
 
 
 
+{% endif %}
+{% if qualtrics_dict.notes != 'nan' or figshare_dict.references != [] %}
 ---------------------------------------------
 ## Additional Notes
 
-[Add additional notes here]
+{% endif %}
+{% if qualtrics_dict.notes != 'nan' %}
+{{ qualtrics_dict.notes }}
 
+{% endif %}
+{% if figshare_dict.references != [] %}
 Links:
-{% for reference in readme_dict.references %}
-  {{ reference }}
-{%- endfor %}
+{% for reference in figshare_dict.references %}
+  - {{ reference }}
+{% endfor -%}
+{% endif %}