Skip to content

Commit

Permalink
Merge branch 'release/v0.15.0' #104
Browse files Browse the repository at this point in the history
  • Loading branch information
astrochun committed Oct 9, 2020
2 parents c53f031 + e6134bd commit 42eccb1
Show file tree
Hide file tree
Showing 9 changed files with 250 additions and 65 deletions.
17 changes: 10 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,16 @@ It follows a workflow that was developed by members of the
The software has a number of backend features, such as:
1. Retrieving private datasets from the [Figshare API](https://docs.figshare.com)
that are undergoing curatorial review
2. Identifying whether a README.txt file is present in the ReDATA deposit.
If such a file does not exists, it will provide ReDATA curators a
copy of the [README.txt template](https://osf.io/sj8xv/download).
Ultimately, it will perform an inspection to ensure that the README.txt
adheres to a defined format and populates metadata information based on
information submitted to ReDATA
2. Constructing a README.txt file based on information from the deposit's
metadata and information provided by the researchers using a Qualtrics
form that walks the users through additional information
3. Retrieving a [Deposit Agreement Form](https://bit.ly/ReDATA_DepositAgreement)
from Qualtrics, which is a requirement for all ReDATA deposits
4. Retrieving a copy of [Curatorial Review Report template (MS-Word)](https://bit.ly/ReDATA_CurationTemplate)
for ReDATA curators to complete.
5. Supporting ReDATA curators with access and workflow management through
5. Creating a hierarchical folder structure the supports library preservation
and archive
6. Supporting ReDATA curators with access and workflow management through
standard UNIX commands

These backend services ingest the datasets and accompanying files (described above)
Expand Down Expand Up @@ -200,6 +199,10 @@ We use [SemVer](http://semver.org/) for versioning. For the versions available,
A list of released features and their issue number(s).
List is sorted from moderate to minor revisions for reach release.

v0.15.0:
* Implementation of Qualtrics README file #98
* Folder re-structuring for versioning #100

v0.14.0 - 0.14.1:
* Full stdout and file logging #83
* Configuration handling using dictionary structure #87, #93
Expand Down
3 changes: 3 additions & 0 deletions ldcoolp/config/default.ini
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,6 @@ download_url = https://%(dataCenter)s.qualtrics.com/Q/Data/Ajax/GetSingleRespons

# Base URL for survey submission
generate_url = https://%(dataCenter)s.qualtrics.com/jfe/form/

# README Qualtrics settings
readme_survey_id = ***override***
139 changes: 135 additions & 4 deletions ldcoolp/curation/api/qualtrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
import io
from os import remove

# base64 encoding/decoding
import base64

# Text handling for README
from html2text import html2text

# CSV handling
import zipfile
import pandas as pd
Expand Down Expand Up @@ -31,6 +37,10 @@
# Column order for markdown print-out of Qualtrics table
cols_order = ['ResponseId', 'Q4_1', 'Q5', 'Q6_1', 'Q7']

readme_cols_order = ['ResponseId', 'article_id', 'curation_id']

readme_custom_content = ['cite', 'summary', 'files', 'materials', 'contrib', 'notes']


class Qualtrics:
"""
Expand Down Expand Up @@ -77,7 +87,7 @@ class Qualtrics:
List all surveys for a user in a dictionary form:
See: https://api.qualtrics.com/docs/managing-surveys#list-surveys
get_survey_responses()
get_survey_responses(survey_id)
Retrieve pandas DataFrame containing responses for a survey
See: https://api.qualtrics.com/docs/getting-survey-responses-via-the-new-export-apis
Expand Down Expand Up @@ -107,6 +117,8 @@ def __init__(self, qualtrics_dict=config_default_dict['qualtrics'], log=None):
self.survey_id = self.dict['survey_id']
self.file_format = 'csv'

self.readme_survey_id = self.dict['readme_survey_id']

# Logging
self.file_logging = False
if isinstance(log, type(None)):
Expand All @@ -131,12 +143,12 @@ def list_surveys(self):

return survey_dict

def get_survey_responses(self, verbose=False):
def get_survey_responses(self, survey_id, verbose=False):
"""Retrieve pandas DataFrame containing responses for a survey"""

progress_status = "inProgress"

download_url = self.endpoint(f"surveys/{self.survey_id}/export-responses")
download_url = self.endpoint(f"surveys/{survey_id}/export-responses")

# Create Data Export
download_payload = {"format": self.file_format}
Expand Down Expand Up @@ -189,7 +201,7 @@ def pandas_write_buffer(self, df):
def find_deposit_agreement(self, dn_dict):
"""Get Response ID based on a match search for depositor name"""

qualtrics_df = self.get_survey_responses()
qualtrics_df = self.get_survey_responses(self.survey_id)

# First perform search via article_id or curation_id
self.log.info("Attempting to identify using article_id or curation_id ...")
Expand Down Expand Up @@ -303,3 +315,122 @@ def generate_url(self, dn_dict):
urlencode(query_str_dict, safe=url_safe, quote_via=quote)

return full_url

def generate_readme_url(self, dn):
"""Generate URL for README tool using Q_EED option"""

df_curation = dn.curation_dict

# Preferred citation
single_str_citation = df_curation['item']['citation']

# handle period in author list. Assume no period in dataset title
str_list = list([single_str_citation.split('):')[0] + '). '])
str_list += [str_row + '.' for str_row in single_str_citation.split('):')[1].split('. ')]

citation_list = [content for content in str_list[0:-2]]
citation_list.append(f"{str_list[-2]} {str_list[-1]}")
citation_list = ' <br> '.join(citation_list)

# summary
figshare_description = df_curation['item']['description']

query_str_dict = {'article_id': dn.name_dict['article_id'],
'curation_id': dn.name_dict['curation_id'],
'title': dn.name_dict['title'],
'depositor_name': dn.name_dict['simplify_fullName'],
'preferred_citation': citation_list,
'license': df_curation['item']['license']['name'],
'summary': figshare_description}
# doi
if not df_curation['item']['doi']: # empty case
query_str_dict['doi'] = f"https://doi.org/10.25422/azu.data.{dn.name_dict['article_id']}"
else:
query_str_dict['doi'] = f"https://doi.org/{df_curation['item']['doi']}"

# links
if not df_curation['item']['references']: # not empty case
links = " <br> ".join(df_curation['item']['references'])
query_str_dict['links'] = links

# query_str_encode = str(query_str_dict).encode('base64', 'strict')
q_eed = base64.urlsafe_b64encode(json.dumps(query_str_dict).encode()).decode()

full_url = f"{self.dict['generate_url']}{self.readme_survey_id}?" + \
'Q_EED=' + q_eed

return full_url

def find_qualtrics_readme(self, dn_dict):
"""Get Response ID based on a article_id,curation_id search"""

qualtrics_df = self.get_survey_responses(self.readme_survey_id)

# First perform search via article_id or curation_id
self.log.info("Attempting to identify using article_id or curation_id ...")
article_id = str(dn_dict['article_id'])
curation_id = str(dn_dict['curation_id'])

try:
response_df = qualtrics_df[(qualtrics_df['article_id'] == article_id) |
(qualtrics_df['curation_id'] == curation_id)]
except KeyError:
self.log.warn("article_id and curation_id not in qualtrics survey !")
response_df = pd.DataFrame()

if not response_df.empty:
self.log.info("Unique match based on article_id or curation_id !")
if response_df.shape[0] != 1:
self.log.warn("More than one entries found !!!")

self.pandas_write_buffer(response_df[readme_cols_order])

if response_df.empty:
self.log.warn("Empty DataFrame")
raise ValueError
else:
if response_df.shape[0] == 1:
response_dict = df_to_dict_single(response_df)
self.log.info("Only one entry found!")
self.log.info(f"Survey completed on {response_dict['date_completed']}")
self.log.info(f" ... for {response_dict['article_id']}")
return response_dict['ResponseId'], response_df
else:
self.log.warn("Multiple entries found")
response_df = pd.DataFrame()
self.pandas_write_buffer(response_df[readme_cols_order])

raise ValueError

def retrieve_qualtrics_readme(self, dn_dict=None, ResponseId=None, browser=True):
"""Opens web browser to navigate to a page with Deposit Agreement Form"""

if isinstance(ResponseId, type(None)):
try:
ResponseId, response_df = self.find_qualtrics_readme(dn_dict)
self.log.info(f"Qualtrics README ResponseID : {ResponseId}")

qualtrics_dict = df_to_dict_single(response_df[readme_custom_content])
for key in qualtrics_dict.keys():
if isinstance(qualtrics_dict[key], float):
qualtrics_dict[key] = str(qualtrics_dict[key])

# Separate cite, contrib for list style
for field in ['cite', 'contrib']:
if qualtrics_dict[field] != 'nan':
qualtrics_dict[field] = qualtrics_dict[field].split('\n')

# Markdown files, materials
for field in ['files', 'materials']:
if qualtrics_dict[field] != 'nan':
qualtrics_dict[field] = html2text(qualtrics_dict[field])
# Strip extra white space from html2text
if qualtrics_dict[field][-2:] == "\n\n":
qualtrics_dict[field] = qualtrics_dict[field][:-2]

return qualtrics_dict
except ValueError:
self.log.warn("Error with retrieving ResponseId")
self.log.info("PROMPT: If you wish, you can manually enter ResponseId to retrieve.")
ResponseId = input("PROMPT: An EMPTY RETURN will generate a custom Qualtrics link to provide ... ")
self.log.info(f"RESPONSE: {ResponseId}")
2 changes: 1 addition & 1 deletion ldcoolp/curation/depositor_name.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def get_folder_name(self):
else:
new_vers = self.curation_dict['version'] + 1

folderName += f"_{self.article_id}_v{new_vers}"
folderName += f"_{self.article_id}/v{new_vers}"

if self.verbose:
self.log.info(f"depository_name : {folderName}")
Expand Down
102 changes: 59 additions & 43 deletions ldcoolp/curation/inspection/readme/README_template.md
Original file line number Diff line number Diff line change
@@ -1,88 +1,104 @@
---------------------------------------------
# {{ readme_dict.title }}
# {{ figshare_dict.title }}

Preferred citation (DataCite format):

Preferred citation (DataCite format):
{% for cite_text in readme_dict.preferred_citation %}
{% if qualtrics_dict.cite == 'nan' %}
{% for cite_text in figshare_dict.preferred_citation %}
{{ cite_text }}
{%- endfor %}

{#
Example (for a journal article):
LastName1, FirstName1; LastName2, FirstName2; LastName3, FirstName3; etc. (YYYY).
"[Title of Article]".
Journal name, and journal information (e.g., volume, issue, page numbers)
[DOI link to publication]

Example (for ReDATA DOI):
LastName1, FirstName1; LastName2, FirstName2; LastName3, FirstName3; etc. (YYYY).
"[Title of Your Dataset Here]".
University of Arizona Research Data Repository.
[Item Type]. https://doi.org/10.25422/azu.data.[DOI_NUMBER]
#}
{% endfor %}
{% else %}
{% for cite_text in qualtrics_dict.cite %}
{{ cite_text }}
{% endfor %}
{% endif %}


Corresponding Author:
{{ readme_dict.firstname }} {{ readme_dict.lastname }}, University of Arizona, {{ readme_dict.email }}
{{ figshare_dict.firstname }} {{ figshare_dict.lastname }}, University of Arizona, {{ figshare_dict.email }}


License:
{{ readme_dict.license }}
{{ figshare_dict.license }}


DOI:
https://doi.org/{{ readme_dict.doi }}
https://doi.org/{{ figshare_dict.doi }}



---------------------------------------------
## Summary

{{ readme_dict.description }}
{{ figshare_dict.description }}
{% if qualtrics_dict.summary != 'nan' %}

{{ qualtrics_dict.summary }}
{% endif %}
{% if ( (qualtrics_dict.files != 'nan') or
(qualtrics_dict.materials != 'nan') or
(qualtrics_dict.contrib != 'nan') or
(qualtrics_dict.notes != 'nan') or
(figshare_dict.references != []) ) %}



{% endif %}
{% if qualtrics_dict.files != 'nan' %}
---------------------------------------------
## Files and Folders


#### [Folder 1 Name]: [Description of contents]
- [file1 or set of related files]: [Purpose, contents, naming convention, etc.]
- [file2 or set of related files]: [Purpose, contents, naming convention, etc.]

#### [Folder 1 Name]/[Subfolder 1 Name]: [Description of contents]
- [file1 or set of related files]: [Purpose, contents, naming convention, etc.]

#### [Folder 2 Name]: [Description of contents]
- [file1 or set of related files]: [Purpose, contents, naming convention, etc.]
- [file2 or set of related files]: [Purpose, contents, naming convention, etc.]
{{ qualtrics_dict.files }}
{% endif %}
{% if ( (qualtrics_dict.materials != 'nan') or
(qualtrics_dict.contrib != 'nan') or
(qualtrics_dict.notes != 'nan') or
(figshare_dict.references != []) ) %}



{% endif %}
{% if qualtrics_dict.materials != 'nan' %}
---------------------------------------------
## Materials & Methods

- [Software program 1 w/version number], [URL, DOI, citation, etc.]. [Short description of why it's needed].
- [Software program 2 w/version number], [URL, DOI, citation, etc.]. [Short description of why it's needed].
- [Instrument name/model], [manufacturer]. [Short description of why it's needed].
{{ qualtrics_dict.materials }}
{% endif %}
{% if ( (qualtrics_dict.contrib != 'nan') or
(qualtrics_dict.notes != 'nan') or
(figshare_dict.references != []) ) %}



{% endif %}
{% if qualtrics_dict.contrib != 'nan' %}
---------------------------------------------
## Contributor Roles

The roles are defined by the CRediT taxonomy http://credit.niso.org/

- [FirstName1 LastName1], [Affiliation]: [role1], [role2], [...]
- [FirstName2 LastName2], [Affiliation]: [role1], [role2], [...]
- [FirstName3 LastName3], [Affiliation]: [role3], [role4], [...]
{% for contrib in qualtrics_dict.contrib %}
- {{ contrib }}
{% endfor -%}
{% endif %}
{% if ( (qualtrics_dict.notes != 'nan') or
(figshare_dict.references != []) ) %}



{% endif %}
{% if qualtrics_dict.notes != 'nan' or figshare_dict.references != [] %}
---------------------------------------------
## Additional Notes

[Add additional notes here]
{% endif %}
{% if qualtrics_dict.notes != 'nan' %}
{{ qualtrics_dict.notes }}

{% endif %}
{% if figshare_dict.references != [] %}
Links:
{% for reference in readme_dict.references %}
{{ reference }}
{%- endfor %}
{% for reference in figshare_dict.references %}
- {{ reference }}
{% endfor -%}
{% endif %}
Loading

0 comments on commit 42eccb1

Please sign in to comment.