From 50cff5f9be14e0a61bf666dff134b9e51c09fc33 Mon Sep 17 00:00:00 2001 From: Acribbs Date: Thu, 14 Nov 2024 08:13:14 +0000 Subject: [PATCH] removed readthdocs --- docs_readthedocs/Makefile | 20 - docs_readthedocs/conf.py | 205 ------ .../defining_workflow/Tutorial.rst | 304 --------- .../defining_workflow/Writing_workflow.rst | 611 ------------------ .../defining_workflow/run_parameters.rst | 37 -- docs_readthedocs/function_doc/Core.rst | 15 - docs_readthedocs/function_doc/Core/CSV.rst | 5 - .../function_doc/Core/Database.rst | 4 - .../function_doc/Core/Experiment.rst | 4 - .../function_doc/Core/IOTools.rst | 4 - .../function_doc/Core/Logfile.rst | 4 - docs_readthedocs/function_doc/Core/csv2db.rst | 4 - docs_readthedocs/function_doc/Overview.rst | 6 - docs_readthedocs/function_doc/Pipeline.rst | 8 - .../function_doc/Pipeline/Control.rst | 4 - .../function_doc/Pipeline/Database.rst | 4 - .../function_doc/Pipeline/Execution.rst | 4 - .../function_doc/Pipeline/Files.rst | 4 - .../function_doc/Pipeline/Utils.rst | 4 - .../getting_started/Cluster_config.rst | 36 -- docs_readthedocs/getting_started/Examples.rst | 324 ---------- .../getting_started/Installation.rst | 112 ---- docs_readthedocs/getting_started/Tutorial.rst | 72 --- docs_readthedocs/img/CGAT_logo.png | Bin 702858 -> 0 bytes docs_readthedocs/index.rst | 138 ---- .../project_info/Contributing.rst | 39 -- docs_readthedocs/project_info/FAQ.rst | 9 - docs_readthedocs/project_info/Licence.rst | 36 -- docs_readthedocs/project_info/citations.rst | 59 -- .../project_info/how_to_contribute.rst | 93 --- docs_readthedocs/release.rst | 24 - docs_readthedocs/remote/Azure.rst | 108 ---- docs_readthedocs/remote/GC.rst | 127 ---- docs_readthedocs/remote/S3.rst | 126 ---- docs_readthedocs/remote/SFTP.rst | 53 -- readthedocs.yml | 2 - 36 files changed, 2609 deletions(-) delete mode 100644 docs_readthedocs/Makefile delete mode 100644 docs_readthedocs/conf.py delete mode 100644 docs_readthedocs/defining_workflow/Tutorial.rst delete mode 100644 docs_readthedocs/defining_workflow/Writing_workflow.rst delete mode 100644 docs_readthedocs/defining_workflow/run_parameters.rst delete mode 100644 docs_readthedocs/function_doc/Core.rst delete mode 100644 docs_readthedocs/function_doc/Core/CSV.rst delete mode 100644 docs_readthedocs/function_doc/Core/Database.rst delete mode 100644 docs_readthedocs/function_doc/Core/Experiment.rst delete mode 100644 docs_readthedocs/function_doc/Core/IOTools.rst delete mode 100644 docs_readthedocs/function_doc/Core/Logfile.rst delete mode 100644 docs_readthedocs/function_doc/Core/csv2db.rst delete mode 100644 docs_readthedocs/function_doc/Overview.rst delete mode 100644 docs_readthedocs/function_doc/Pipeline.rst delete mode 100644 docs_readthedocs/function_doc/Pipeline/Control.rst delete mode 100644 docs_readthedocs/function_doc/Pipeline/Database.rst delete mode 100644 docs_readthedocs/function_doc/Pipeline/Execution.rst delete mode 100644 docs_readthedocs/function_doc/Pipeline/Files.rst delete mode 100644 docs_readthedocs/function_doc/Pipeline/Utils.rst delete mode 100644 docs_readthedocs/getting_started/Cluster_config.rst delete mode 100644 docs_readthedocs/getting_started/Examples.rst delete mode 100644 docs_readthedocs/getting_started/Installation.rst delete mode 100644 docs_readthedocs/getting_started/Tutorial.rst delete mode 100755 docs_readthedocs/img/CGAT_logo.png delete mode 100644 docs_readthedocs/index.rst delete mode 100644 docs_readthedocs/project_info/Contributing.rst delete mode 100644 docs_readthedocs/project_info/FAQ.rst delete mode 100644 docs_readthedocs/project_info/Licence.rst delete mode 100644 docs_readthedocs/project_info/citations.rst delete mode 100644 docs_readthedocs/project_info/how_to_contribute.rst delete mode 100644 docs_readthedocs/release.rst delete mode 100644 docs_readthedocs/remote/Azure.rst delete mode 100644 docs_readthedocs/remote/GC.rst delete mode 100644 docs_readthedocs/remote/S3.rst delete mode 100644 docs_readthedocs/remote/SFTP.rst delete mode 100644 readthedocs.yml diff --git a/docs_readthedocs/Makefile b/docs_readthedocs/Makefile deleted file mode 100644 index d7751a97..00000000 --- a/docs_readthedocs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -SPHINXPROJ = cgatcore -SOURCEDIR = . -BUILDDIR = _build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs_readthedocs/conf.py b/docs_readthedocs/conf.py deleted file mode 100644 index 09f2ee49..00000000 --- a/docs_readthedocs/conf.py +++ /dev/null @@ -1,205 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# -# cgatcore documentation build configuration file, created by -# sphinx-quickstart on Sat Mar 3 13:24:26 2018. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys - -sys.path.insert(0, os.path.abspath('../')) - - -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = ['sphinx.ext.autodoc', - 'sphinx.ext.todo', - 'sphinx.ext.coverage', - 'sphinx.ext.imgmath', - 'sphinx.ext.ifconfig', - 'sphinx.ext.inheritance_diagram', - 'sphinx.ext.intersphinx', - 'sphinx.ext.napoleon'] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = 'cgatcore' -copyright = '2018-2019, CGAT Developers' -author = 'CGAT Developers' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = '1.0' -# The full version, including alpha/beta/rc tags. -release = '1.0' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = False - - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = "sphinx_rtd_theme" - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -# html_static_path = ['_static'] - -# Custom sidebar templates, must be a dictionary that maps document names -# to template names. -# -# This is required for the alabaster theme -# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars -html_sidebars = { - '**': [ - 'relations.html', # needs 'show_related': True theme option to display - 'searchbox.html', - ] -} - -# Included at the end of each rst file -rst_epilog = ''' -.. _CGAT Training Programme: http://www.cgat.org -.. _CGAT pipeline Collection: https://www.cgat.org/downloads/public/CGATpipelines/documentation/ -.. _CGAT Code Collection: https://www.cgat.org/downloads/public/cgat/documentation/ -.. _pysam: https://github.com/pysam-developers/pysam -.. _samtools: http://samtools.sourceforge.net/ -.. _htslib: http://www.htslib.org/ -.. _tabix: http://samtools.sourceforge.net/tabix.shtml/ -.. _Galaxy: https://main.g2.bx.psu.edu/ -.. _cython: http://cython.org/ -.. _python: http://python.org/ -.. _ipython: http://ipython.org/ -.. _pyximport: http://www.prescod.net/pyximport/ -.. _sphinx: http://sphinx-doc.org/ -.. _ruffus: http://www.ruffus.org.uk/ -.. _cgatreport: https://github.com/AndreasHeger/CGATReport/ -.. _sqlite: http://www.sqlite.org/ -.. _make: http://www.gnu.org/software/make -.. _UCSC: http://genome.ucsc.edu -.. _ENSEMBL: http://www.ensembl.org -.. _GO: http://www.geneontology.org -.. _gwascatalog: http://www.genome.gov/gwastudies/ -.. _distlid: http://distild.jensenlab.org/ -.. _mysql: https://mariadb.org/ -.. _postgres: http://www.postgresql.org/ -.. _bedtools: http://bedtools.readthedocs.org/en/latest/ -.. _UCSC Tools: http://genome.ucsc.edu/admin/git.html -.. _git: http://git-scm.com/ -.. _sge: http://wiki.gridengine.info/wiki/index.php/Main_Page -.. _alignlib: https://github.com/AndreasHeger/alignlib -.. _iGenomes: https://support.illumina.com/sequencing/sequencing_software/igenome.html -''' -# -- Options for HTMLHelp output ------------------------------------------ - -# Output file base name for HTML help builder. -htmlhelp_basename = 'cgatcoredoc' - - -# -- Options for LaTeX output --------------------------------------------- - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, 'cgatcore.tex', 'cgatcore Documentation', - 'CGAT Developers', 'manual'), -] - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'cgatcore', 'cgatcore Documentation', - [author], 1) -] - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'cgatcore', 'cgatcore Documentation', - author, 'cgatcore', 'One line description of project.', - 'Miscellaneous'), -] diff --git a/docs_readthedocs/defining_workflow/Tutorial.rst b/docs_readthedocs/defining_workflow/Tutorial.rst deleted file mode 100644 index 29a069a3..00000000 --- a/docs_readthedocs/defining_workflow/Tutorial.rst +++ /dev/null @@ -1,304 +0,0 @@ -.. _defining_workflow-Configuration: - - -============================ -Writing a workflow- Tutorial -============================ - -The explicit aim of cgat-core is to allow users to quickly and easily build their own computational pipelines that will speed up your analysis workflow. - -Installation of cgat-core -------------------------- - -In order to begin writing a pipeline you will need to install the cgat-core code -(see :ref:`getting_started-Installation`) for installation instructions. - - -Tutorial start --------------- - -Setting up the pipleine -======================= - -**1.** First navigate to a directory where you want to start building your code:: - - mkdir test && cd test && mkdir configuration && touch configuration/pipeline.yml && touch pipeline_test.py && touch ModuleTest.py - -This will create a directory called test in the current directory with the following layout:: - - |-- configuration - | `-- pipeline.yml - `-- pipeline_test.py - -- ModuleTest.py - - -The layout has the following components:: - -pipeline_test.py - This is the file that will contain all of the ruffus workflows, the file needs - the format pipeline_.py -test/ - Directory containing the configuration yml file. The directory needs to be named - the same as the pipeline_.py file. This folder will contain the `pipeline.yml` - configuration file. -ModuleTest.py - This file will contain functions that will be imported into the main ruffus - workflow file, pipeline_test.py - -**2.** View the source code within pipeline_test.py - -This is where the ruffus tasks will be written. The code begins with a doc -string detailing the pipeline functionality. You should use this section to document your -pipeline. :: - - '''This pipeline is a test and this is where the documentation goes ''' - -The pipeline then needs a few utility functions to help with executing the pipeline. - -**Import statements** You will need to import ruffus and cgatcore utilities :: - - from ruffus import * - import cgatcore.experiment as E - from cgatcore import pipeline as P - -Importing ruffus allows ruffus decorators to be used within out pipeline - -Importing experiment from cgatcore is a module that contains ultility functions for argument parsion, logging and record keeping -within scripts. - -Importing pipeline from cgatcore allows users to run utility functions for interfacing with CGAT ruffus pipelines -with an HPC cluster, uploading data to a database, provides paramaterisation and more. - -You'll also need python modules:: - - import os - import sys - -**Config parser:** This code helps with parsing the pipeline.yml file:: - - # load options from the config file - PARAMS = P.get_parameters( - ["%s/pipeline.yml" % os.path.splitext(__file__)[0], - "../pipeline.yml", - "pipeline.yml"]) - -**Pipeline configuration:** We will add configurable variables to our pipeline.yml file -so that we can modify the output of out pipeline. With `pipeline.yml` open, copy and paste the following -into the file. :: - - database: - name: "csvdb" - -When you come to run the pipeline the configuration variables (in this case csvdb) can be accessed in the pipeline -by PARAMS["database_name"]. - - -**Database connection:** This code helps with connecting to a sqlite database:: - - def connect(): - '''utility function to connect to database. - - Use this method to connect to the pipeline database. - Additional databases can be attached here as well. - - Returns an sqlite3 database handle. - ''' - - dbh = sqlite3.connect(PARAMS["database_name"]) - - return dbh - - -**Commandline parser:** This bit of code allows pipeline to parse arguments. :: - - def main(argv=None): - if argv is None: - argv = sys.argv - P.main(argv) - - - if __name__ == "__main__": - sys.exit(P.main(sys.argv)) - - -Running test pipeline -===================== - -You now have the bare bones layout of the pipeline and you now need code to execute. Below you will -find example code that you can copy and paste into your pipeline_test.py file. The code -includes two ruffus_ **@transform** tasks that parse the pipeline.yml. The first function -called :code:`countWords` is then called which contains a statement that counts the -number of words in the file. The statement is then ran using :code:`P.run()` function. - -The second ruffus_ **@transform** function called :code:`loadWordCounts` takes as an input the output of -the function countWords and loads the number of words to a sqlite database using :code:`P.load()`. - -The third :code:`def full()` function is a dummy task that is written to run the whole -pipeline. It has an **@follows** function that takes the :code:`loadWordCounts` function. -This helps complete the pipeline chain and the pipeline can be ran with the tak name full to execute the -whole workflow. - -The following code should be pasted just before the **Commandline parser** arguments and after the **database connection** code. -:: - - # --------------------------------------------------- - # Specific pipeline tasks - @transform("pipeline.yml", - regex("(.*)\.(.*)"), - r"\1.counts") - def countWords(infile, outfile): - '''count the number of words in the pipeline configuration files.''' - - # the command line statement we want to execute - statement = '''awk 'BEGIN { printf("word\\tfreq\\n"); } - {for (i = 1; i <= NF; i++) freq[$i]++} - END { for (word in freq) printf "%%s\\t%%d\\n", word, freq[word] }' - < %(infile)s > %(outfile)s''' - - # execute command in variable statement. - # - # The command will be sent to the cluster. The statement will be - # interpolated with any options that are defined in in the - # configuration files or variable that are declared in the calling - # function. For example, %(infile)s will we substituted with the - # contents of the variable "infile". - P.run(statement) - - - @transform(countWords, - suffix(".counts"), - "_counts.load") - def loadWordCounts(infile, outfile): - '''load results of word counting into database.''' - P.load(infile, outfile, "--add-index=word") - - # --------------------------------------------------- - # Generic pipeline tasks - @follows(loadWordCounts) - def full(): - pass - -To run the pipeline navigate to the working directory and then run the pipeline. :: - - python /location/to/code/pipeline_test.py config - python /location/to/code/pipeline_test.py show full -v 5 - -This will place the pipeline.yml in the folder. Then run :: - - python /location/to/code/pipeline_test.py make full -v5 --local - -The pipeline will then execute and count the words in the yml file. - - -Modifying the test pipeline to build your own workflows -======================================================= - -The next step is to modify the basic code in the pipeline to fit your particular -NGS workflow needs. For example, say we wanted to convert a sam file into a bam -file then perform flag stats on that output bam file. The code and layout that we just wrote -can be easily modified to perform this. We would remove all of the code from the -specific pipeline tasks and write our own. - -The pipeline will have two steps: -1. Identify all sam files and convert to a bam file. -2. Take the output of step 1 and then perform flagstats on that bam file. - -The first step would involve writing a function to identify all -`sam` files in a `data.dir/` directory. This first function would accept a sam file then -use samtools view to convert it to a bam file. Therefore, we would require an ``@transform`` -function. - -The second function would then take the output of the first function, perform samtools -flagstat and then output the results as a flat .txt file. Again, an ``@transform`` function is required -to track the input and outputs. - -This would be written as follows: -:: - @transform("data.dir/*.sam", - regex("data.dir/(\S+).sam"), - r"\1.bam") - def bamConvert(infile, outfile): - 'convert a sam file into a bam file using samtools view' - - statement = ''' samtools view -bT /ifs/mirror/genomes/plain/hg19.fasta - %(infile)s > %(outfile)s''' - - P.run(statement) - - @transform(bamConvert, - suffix(".bam"), - "_flagstats.txt") - def bamFlagstats(infile, outfile): - 'perform flagstats on a bam file' - - statement = '''samtools flagstat %(infile)s > %(outfile)s''' - - P.run(statement) - - -To run the pipeline:: - - python /path/to/file/pipeline_test.py make full -v5 - - -The bam files and flagstats outputs should then be generated. - - -Parameterising the code using the .yml file -=========================================== - -Having written the basic function of our pipleine, as a philosophy, -we try and avoid any hard coded parameters. - -This means that any variables can be easily modified by the user -without having to modify any code. - -Looking at the code above, the hard coded link to the hg19.fasta file -can be added as a customisable parameter. This could allow the user to -specify any fasta file depending on the genome build used to map and -generate the bam file. - -In order to do this the :file:`pipeline.yml` file needs to be modified. This -can be performed in the following way: - -Configuration values are accessible via the :py:data:`PARAMS` -variable. The :py:data:`PARAMS` variable is a dictionary mapping -configuration parameters to values. Keys are in the format -``section_parameter``. For example, the key ``genome_fasta`` will -provide the configuration value of:: - - genome: - fasta: /ifs/mirror/genomes/plain/hg19.fasta - -In the pipeline.yml, add the above code to the file. In the pipeline_test.py -code the value can be accessed via ``PARAMS["genome_fasta"]``. - -Therefore the code we wrote before for parsing bam files can be modified to -:: - @transform("data.dir/*.sam", - regex("data.dir/(\S+).sam"), - r"\1.bam") - def bamConvert(infile, outfile): - 'convert a sam file into a bam file using samtools view' - - genome_fasta = PARAMS["genome_fasta"] - - statement = ''' samtools view -bT %(genome_fasta)s - %(infile)s > %(outfile)s''' - - P.run(statement) - - @transform(bamConvert, - suffix(".bam"), - "_flagstats.txt") - def bamFlagstats(infile, outfile): - 'perform flagstats on a bam file' - - statement = '''samtools flagstat %(infile)s > %(outfile)s''' - - P.run(statement) - - -Running the code again should generate the same output. However, if you -had bam files that came from a different genome build then the parameter in the yml file -can be modified easily, the output files deleted and the pipeline ran using the new configuration values. diff --git a/docs_readthedocs/defining_workflow/Writing_workflow.rst b/docs_readthedocs/defining_workflow/Writing_workflow.rst deleted file mode 100644 index 189a403a..00000000 --- a/docs_readthedocs/defining_workflow/Writing_workflow.rst +++ /dev/null @@ -1,611 +0,0 @@ -.. _defining_workflow-Writing_workflow: - -================== -Writing a workflow -================== - - -.. _defining_workflow-philosophy: - -Our workflow philosophy ------------------------ - -The explicit aim of CGAT-core is to allow users to quickly and easily build their own computational pipelines that will speed up your analysis workflow. - -When building pipelines it is often useful to keep in mind the following philosophy: - -Flexibility - There are always new tools and insights that could be incorporated into a pipeline. Ultimately, a pipeline should be flexible and the code should not constrain you when implimenting new features. -Scriptability - The pipeline should be scriptable, i.e, the whole pipeline can be run within another pipeline. Similarly, parts of a pipeline can be duplicated to process several data streams in parallel. This is a crucial feature in genome studies as a single analysis will not permit making inferences by itself. When we write a pipeline we usually attempt to write a command line script (and include it in the CGAT-apps repository) and then run this script as a command line statement in the pipeline. -Reproducibility - The pipeline is fully automated. The same inputs and configuration will produce the same outputs. -Reusability - The pipeline should be able to be re-used on similar data, preferably only requiring changes to a configuration file (pipeline.yml). -Archivability - Once finished, the whole project should be able to be archived without too many major dependencies on external data. This should be a simple process and hence all project data should be self-contained. It should not involve going through various directories or databases to figure out which files and tables belong to a project or a project depends on. - -.. _defining_workflow-building: - -Building a pipeline -------------------- - -The best way to build a pipeline is to start from an example. In `cgat-showcase `_ we have a toy example of an RNA-seq -analysis pipeline that aims to show users how simple workflows can be generated with minimal code. `cgat-flow `_ demonstrates a set of complex workflows. - -For a step by step tutorial on how to run the pipelines please refer to our :ref:`getting_started-Tutorial`. - -For help on how to construct pipelines from scratch please continue reading for more information. - -In an empty directory you will need to make a new directory and then a python file -with the same name. For example:: - - mkdir test && touch pipeline_test.py - -All pipelines require a yml configuration file that will allow you to add configurable values to modify the behaviour of your code. -This is placed within the test/ directory, which should have the same name as the name of your pipeline_test.py file:: - - touch test/pipeline.yml - -In order to help with debugging and reading our code, our pipelines are written so that -a pipeline task file contains Ruffus tasks and calls functions in an associated module file, -which contains all of the code to transform and analyse the data. - -Therefore, if you wish to create a module file, we usually save this file in the following convention, -``ModuleTest.py`` and it can be imported into the main pipeline task file (``pipeline_test.py``)as: - -.. code-block:: python - - import ModuleTest - -This section describes how pipelines can be constructed using the -:mod:`pipeline` module in cgat-core. The `pipeline `_ module contains a variety of -useful functions for pipeline construction. - -.. _defining_workflow-p-input: - -pipeline input --------------- - -Pipelines are executed within a dedicated working -directory. They usually require the following files within this -directory: - - * a pipeline configuration file :file:`pipeline.yml` - * input data files, usually listed in the documentatuion of each pipeline - -Other files that might be used in a pipeline are: - - * external data files such as genomes that a referred to by they their full path name. - -The pipelines will work from the input files in the working -directory, usually identified by their suffix. For example, a -mapping pipeline might look for any ``*.fastq.gz`` files in the -directory, run QC on these and map the reads to a genome sequence etc. - -.. _defining_workflow-p-output: - -pipeline output ----------------- - -The pipeline will create files and database tables in the -working directory. When building a pipeline, you can choose -any file/directory layout that suits your needs. Some prefer flat -hierarchies with many files, while others prefer deep directories. - -.. _defining_workflow-guidelines: - -Guidelines ----------- - -To preserve disk space, we always use compressed files as -much as possible. Most data files compress very well, for example -fastq files often compress by a factor of 80% or more: a 10Gb file -will use just 2Gb. - -Working with compressed files is straight-forward using unix pipes and -the commands ``gzip``, ``gunzip`` or ``zcat``. - -If you require random access to a file, load the file into the -database and index it appropriately. Genomic interval files can be -indexed with tabix to allow random access. - -.. _pipelineCommands: - - -Import statements ------------------ - -In order to run our pipelines you will need to import the cgatcore python -modules into your pipeline. For every CGAT pipeline we recommend importing the -basic modules as follows. Then any additional modules can be imported as required. - -.. code-block:: python - - from ruffus import * - import cgatcore.experiment as E - from cgatcore import pipeline as P - import cgatcore.iotools as iotools - -Selecting the appropriate Ruffus decorator ------------------------------------------- - -Before starting to write a pipeline it is always best to map out -on a whiteboard the the steps and flow of your potential pipeline. This will allow you -to identify the input and outputs of each task. Once you have assessed this then the next step is -to identify which Ruffus decorator you require. Documentation on each decorator can be found in the -`ruffus documentation `_ - - - -Running commands within tasks ------------------------------ - -To run a command line program within a pipeline task, build a -statement and call the :meth:`pipeline.run` method:: - - @transform( '*.unsorted', suffix('.unsorted'), '.sorted') - def sortFile( infile, outfile ): - - statement = '''sort %(infile)s > %(outfile)s''' - P.run(statement) - -On calling the :meth:`pipeline.run` method, the environment of the -caller is examined for a variable called ``statement``. The variable -is subjected to string substitution from other variables in the local -namespace. In the example above, ``%(infile)s`` and ``%(outfile)s`` -are substituted with the values of the variables ``infile`` and -``outfile``, respectively. - -The same mechanism also permits setting configuration parameters, for example:: - - @transform( '*.unsorted', suffix('.unsorted'), '.sorted') - def sortFile( infile, outfile ): - - statement = '''sort -t %(tmpdir)s %(infile)s > %(outfile)s''' - P.run(statement) - -will automatically substitute the configuration parameter ``tmpdir`` -into the command. See ConfigurationValues_ for more on using configuration -parameters. - -The pipeline will stop and return an error if the command exits with an error code. - -If you chain multiple commands, only the return value of the last -command is used to check for an error. Thus, if an upstream command -fails, it will go unnoticed. To detect these errors, insert -``&&`` between commands. For example:: - - @transform( '*.unsorted.gz', suffix('.unsorted.gz'), '.sorted) - def sortFile( infile, outfile ): - - statement = '''gunzip %(infile)s %(infile)s.tmp && - sort -t %(tmpdir)s %(infile)s.tmp > %(outfile)s && - rm -f %(infile)s.tmp - P.run(statement) - -Of course, the statement aboved could be executed more efficiently -using pipes:: - - @transform( '*.unsorted.gz', suffix('.unsorted.gz'), '.sorted.gz') - def sortFile( infile, outfile ): - - statement = '''gunzip < %(infile)s - | sort -t %(tmpdir)s - | gzip > %(outfile)s''' - P.run(statement) - -The pipeline inserts code automatically to check for error return -codes if multiple commands are combined in a pipe. - -Running commands on the cluster -------------------------------- - -In order to run commands on cluster, use ``to_cluster=True``. - -To run the command from the previous section on the cluster:: - - @files( '*.unsorted.gz', suffix('.unsorted.gz'), '.sorted.gz') - def sortFile( infile, outfile ): - - to_cluster = True - statement = '''gunzip < %(infile)s - | sort -t %(tmpdir)s - | gzip > %(outfile)s''' - P.run(statement) - -The pipeline will automatically create the job submission files, -submit the job to the cluster and wait for its return. - -pipelines will use the command line options ``--cluster-queue``, -``--cluster-priority``, etc. for global job control. For example, to -change the priority when starting the pipeline, use:: - - python --cluster-priority=-20 - -To set job options specific to a task, you can define additional -variables:: - - @files( '*.unsorted.gz', suffix('.unsorted.gz'), '.sorted.gz') - def sortFile( infile, outfile ): - - to_cluster = True - job_queue = 'longjobs.q' - job_priority = -10 - job_options= "-pe dedicated 4 -R y" - - statement = '''gunzip < %(infile)s - | sort -t %(tmpdir)s - | gzip > %(outfile)s''' - P.run(statement) - -The above statement will be run in the queue ``longjobs.q`` at a -priority of ``-10``. Additionally, it will be executed in the -parallel environment ``dedicated`` with at least 4 cores. - -Array jobs can be controlled through the ``job_array`` variable:: - - @files( '*.in', suffix('.in'), '.out') - def myGridTask( infile, outfile ): - - job_array=(0, nsnps, stepsize) - - statement = '''grid_task.bash %(infile)s %(outfile)s - > %(outfile)s.$SGE_TASK_ID 2> %(outfile)s.err.$SGE_TASK_ID - ''' - P.run(statement) - - -Note that the :file:`grid_task.bash` file must be grid engine -aware. This means it makes use of the :envvar:`SGE_TASK_ID`, -:envvar:`SGE_TASK_FIRST`, :envvar:`SGE_TASK_LAST` and -:envvar:`SGE_TASK_STEPSIZE` environment variables to select the chunk -of data it wants to work on. - -The job submission files are files called `tmp*` in the :term:`working -directory`. These files will be deleted automatically. However, the -files will remain after aborted runs to be cleaned up manually. - -.. _defining_workflow-databases: - - -Useful information regarding decorators ---------------------------------------- - -To see a full list of ruffus decorators that control the flow of the pipeline please -see the `ruffus documentation `_. - -However, during peer review it was pointed out that it would be helpful to include a few examples of -how you can modify the infile name and transform it to the output filename. There are a few ways of doing this: - -The first way is to capture the suffix so the outfile is placed into the same folder as the infile:: - - # pairs are a tuple of read pairs (read1, read2) - @transform(pairs, - suffix(.fastq.gz), - ("_trimmed.fastq.gz", "_trimmed.fastq.gz")) - -This will transform an input .fastq.gz and result in an output -with a new siffix _trimmed.fastq.gz. - -Another way to add a output file into aother filer is to use a regex:: - - @follows(mkdir("new_folder.dir")) - @transform(pairs, - regex((\S+).fastq.gz), - (r"new_folder.dir/\1_trimmed.fastq.gz", r"new_folder.dir/\1_trimmed.fastq.gz")) - -This can also be achieved using the formatter function:: - - @follows(mkdir("new_folder.dir")) - @transform(pairs, - formatter((\S+).fastq.gz), - ("new_folder.dir/{SAMPLE[0]}_trimmed.fastq.gz", r"new_folder.dir/{SAMPLE[0]}_trimmed.fastq.gz")) - - -Combining commands together ---------------------------- - -In order to combine commands together you will need to use `&&` -to make sure your commands are chained correctly. For example:: - - statement = """ - module load cutadapt && - cutadapt .... - """ - - P.run(statement) - -If you didnt have the `&&` then the command will fail because the cutadapt command will be -executed as part of the module load statement. - -Databases ---------- - -Loading data into the database -============================== - -:mod:`pipeline.py` offers various tools for working with databases. By -default, it is configured to use an sqlite3 database in the -:term:`working directory` called :file:`csvdb`. - -Tab-separated output files can be loaded into a table using the -:meth:`pipeline.load` function. For example:: - - @jobs_limit(PARAMS.get("jobs_limit_db", 1), "db") - @transform('data_*.tsv.gz', suffix('.tsv.gz'), '.load') - def loadTables(infile, outfile): - P.load(infile, outfile) - -The task above will load all tables ending with ``tsv.gz`` into the -database Table names are given by the filenames, i.e, the data in -:file:`data_1.tsv.gz` will be loaded into the table :file:`data_1`. - -The load mechanism uses the script :file:`csv2db.py` and can be -configured using the configuration options in the ``database`` section -of :file:`pipeline.ini`. Additional options can be given via the -optional *options* argument:: - - @jobs_limit(PARAMS.get("jobs_limit_db", 1), "db") - @transform('data_*.tsv.gz', suffix('.tsv.gz'), '.load') - def loadTables( infile, outfile ): - P.load(infile, outfile, "--add-index=gene_id") - -In order for the load mechanism to be transparent, it is best avoided -to call the :file:`csv2db.py` script directly. Instead, use the -:meth:`pipeline.load` function. If the :file:`csv2db.py` needs to -called at the end of a succession of statements, use the -:meth:`pipeline.build_load_statement` method, for example:: - - def loadTranscript2Gene(infile, outfile): - '''build and load a map of transcript to gene from gtf file - ''' - load_statement = P.build_load_statement( - P.toTable(outfile), - options="--add-index=gene_id " - "--add-index=transcript_id ") - - statement = ''' - gunzip < %(infile)s - | python %(scriptsdir)s/gtf2tsv.py --output-map=transcript2gene -v 0 - | %(load_statement)s - > %(outfile)s''' - P.run() - -See also the variants :meth:`pipeline.mergeAndLoad` and -`:meth:`pipeline.concatenateAndLoad` to combine multiple tables and -upload to the database in one go. - -Connecting to a database -======================== - -To use data in the database in your tasks, you need to first connect -to the database. The best way to do this is via the connect() method -in pipeline.py. - -The following example illustrates how to use the connection:: - - @transform( ... ) - def buildCodingTranscriptSet( infile, outfile ): - - dbh = connect() - - statement = '''SELECT DISTINCT transcript_id FROM transcript_info WHERE transcript_biotype = 'protein_coding' ''' - cc = dbh.cursor() - transcript_ids = set( [x[0] for x in cc.execute(statement)] ) - ... - -.. _pipelineReports: - -Reports -------- - -MultiQC -======= - -When using cgat-core to build pipelines we recomend using `MultiQC `_ -as the default reporting tool for generic thrid party computational biology software. - -To run multiQC in our pipelines you only need to run a statement as a commanline -task. For example we impliment this in our pipelines as:: - - @follows(mkdir("MultiQC_report.dir")) - @originate("MultiQC_report.dir/multiqc_report.html") - def renderMultiqc(infile): - '''build mulitqc report''' - - statement = '''LANG=en_GB.UTF-8 multiqc . -f; - mv multiqc_report.html MultiQC_report.dir/''' - - P.run(statement) - - -Rmarkdown -========= - -MultiQC is very useful for running third generation computational biology tools. However, currently -it is very difficult to use it as a bespoke reporting tool. Therefore, one was of running -bespoke reports is using the Rmarkdown framework and using the render functionality of knitr. - -Rendering an Rmarkdown document is very easy if you place the .Rmd file in the same test/ directory as the pipeline.yml. -Then the file can easily run using:: - - @follows(mkdir("Rmarkdown.dir")) - @originate("Rmarkdown.dir/report.html") - def render_rmarkdown(outfile): - - NOTEBOOK_ROOT = os.path.join(os.path.dirname(__file__), "test") - - statement = '''cp %(NOTEBOOK_ROOT)s/report.Rmd Rmarkdown.dir && - cd Rmarkdown.dir/ && R -e "rmarkdown::render('report.Rmd',encoding = 'UTF-8')" ''' - - P.run(statement) - -This should generate an html output of whatever report your wrote for your particular task. - - -Jupyter notebook -================ - -Another bespoke reporting that we also perform for our pipelines is to use a Jupyter notebook -implimentation and execture it in using the commandline. All that is required is that you -place your jupyter notebook into the same test/ directory as the pipeline.yml and call the following:: - - @follows(mkdir("jupyter_report.dir")) - @originate("jupyter_report.dir/report.html") - def render_jupyter(outfile): - - NOTEBOOK_ROOT = os.path.join(os.path.dirname(__file__), "test") - - statement = '''cp %(NOTEBOOK_ROOT)s/report.ipynb jupyter_report.dir/ && cd jupyter_report.dir/ && - jupyter nbconvert --ExecutePreprocessor.timeout=None --to html --execute *.ipynb --allow-errors; - - P.run(statement) - - -.. _ConfigurationValues: - -Configuration values --------------------- - -Setting up configuration values -=============================== - -There are different ways to pass on configuration values to pipelines. -Here we explain the priority for all the possible options so you can -choose the best one for your requirements. - -The pipeline goes *in order* through different configuration options -to load configuration values and stores them in the :py:data:`PARAMS` -dictionary. This order determines a priority so values read in the first -place can be overwritten by values read in subsequent steps; i.e. values -read lastly have higher priority. - -Here is the order in which the configuration values are read: - -1. Hard-coded values in :file:`cgatcore/pipeline/parameters.py`. -2. Parameters stored in :file:`pipeline.yml` files in different locations. -3. Variables declared in the ruffus tasks calling ``P.run()``; - e.g. ``job_memory=32G`` -4. :file:`.cgat.yml` file in the home directory -5. ``cluster_*`` options specified in the command line; - e.g. ``python pipeline_example.py --cluster-parallel=dedicated make full`` - - -This means that configuration values for the cluster provided as -command-line options will have the highest priority. Therefore:: - - python pipeline_example.py --cluster-parallel=dedicated make full - -will overwrite any ``cluster_parallel`` configuration values given -in :file:`pipeline.yml` files. Type:: - - python pipeline_example.py --help - -to check the full list of available command-line options. - -You are encouraged to include the following snippet at the beginning -of your pipeline script to setup proper configuration values for -your analyses:: - - # load options from the config file - from cgatcore import pipeline as P - # load options from the config file - P.get_parameters( - ["%s/pipeline.yml" % os.path.splitext(__file__)[0], - "../pipeline.yml", - "pipeline.yml"]) - -The method :meth:`pipeline.getParameters` reads parameters from -the :file:`pipeline.yml` located in the current :term:`working directory` -and updates :py:data:`PARAMS`, a global dictionary of parameter values. -It automatically guesses the type of parameters in the order of ``int()``, -``float()`` or ``str()``. If a configuration variable is empty (``var=``), -it will be set to ``None``. - -However, as explained above, there are other :file:`pipeline.yml` -files that are read by the pipeline at start up. In order to get the -priority of them all, you can run:: - - python pipeline_example.py printconfig - -to see a complete list of :file:`pipeline.yml` files and their priorities. - - -Using configuration values -========================== - -Configuration values are accessible via the :py:data:`PARAMS` -variable. The :py:data:`PARAMS` variable is a dictionary mapping -configuration parameters to values. Keys are in the format -``section_parameter``. For example, the key ``bowtie_threads`` will -provide the configuration value of:: - - bowtie: - threads: 4 - -In a script, the value can be accessed via -``PARAMS["bowtie_threads"]``. - -Undefined configuration values will throw a :class:`ValueError`. To -test if a configuration variable exists, use:: - - if 'bowtie_threads' in PARAMS: pass - -To test, if it is unset, use:: - - if 'bowie_threads' in PARAMS and not PARAMS['botwie_threads']: - pass - -Task specific parameters ------------------------- - -Task specific parameters can be set by creating a task specific section in -the :file:`pipeline.yml`. The task is identified by the output filename. -For example, given the following task:: - - @files( '*.fastq', suffix('.fastq'), '.bam') - def mapWithBowtie( infile, outfile ): - ... - -and the files :file:`data1.fastq` and :file:`data2.fastq` in the -:term:`working directory`, two output files :file:`data.bam` and -:file:`data2.bam` will be created on executing ``mapWithBowtie``. Both -will use the same parameters. To set parameters specific to the -execution of :file:`data1.fastq`, add the following to -:file:`pipeline.yml`:: - - data1.fastq: - bowtie_threads: 16 - -This will set the configuration value ``bowtie_threads`` to 16 when -using the command line substitution method in :meth:`pipeline.run`. To -get an task-specific parameter values in a python task, use:: - - @files( '*.fastq', suffix('.fastq'), '.bam') - def mytask( infile, outfile ): - MY_PARAMS = P.substitute_parameters( locals() ) - -Thus, task specific are implemented generically using the -:meth:`pipeline.run` mechanism, but pipeline authors need to -explicitely code for track specific parameters. - -Using different conda environments ----------------------------------- - -In addition to running a pipeline using your default conda environment, specifying `job_condaenv=""` to the -P.run() function allows you run the statement using a different conda environment. For example:: - - @follows(mkdir("MultiQC_report.dir")) - @originate("MultiQC_report.dir/multiqc_report.html") - def renderMultiqc(infile): - '''build mulitqc report''' - - statement = '''LANG=en_GB.UTF-8 multiqc . -f; - mv multiqc_report.html MultiQC_report.dir/''' - - P.run(statement, job_condaenv="multiqc") - -This can be extremely useful when you have python 2 only code but are running in a python 3 environment. Or -more importantly, when you have conflicting dependancies in software and you need to seperate them out into -two different environments.xs diff --git a/docs_readthedocs/defining_workflow/run_parameters.rst b/docs_readthedocs/defining_workflow/run_parameters.rst deleted file mode 100644 index e3b04f14..00000000 --- a/docs_readthedocs/defining_workflow/run_parameters.rst +++ /dev/null @@ -1,37 +0,0 @@ -.. _defining_workflow-run_parameters: - -====================== -Setting run parameters -====================== - -Our workflows are executed using defaults that specify parameters for -setting requirements for memory, threads, environment, e.c.t. Each of these -parameters can be modified within the pipeline. - -Modifiable run parameters -------------------------- - -- `job_memory`: Number of slots (threads/cores/CPU) to use for the task. Default: "4G" -- `job_total_memory`: Total memory to use for a job. -- `to_cluster`: Send the job to the cluster. Default: True -- `without_cluster`: When this is set to True the job is ran locally. Default: False -- `cluster_memory_ulimit`: Restrict virtual memory. Default: False -- `job_condaenv`: Name of the conda environment to use for each job. Default: will use the one specified in bashrc -- `job_array`: If set True, run statement as an array job. Job_array should be tuple with start, end, and increment. Default: False - -Specifying parameters to job ----------------------------- - -Parameters can be set within a pipeline task as follows:: - - @transform( '*.unsorted', suffix('.unsorted'), '.sorted') - def sortFile( infile, outfile ): - - statement = '''sort -t %(tmpdir)s %(infile)s > %(outfile)s''' - - P.run(statement, - job_condaenv="sort_environment", - job_memory=30G, - job_threads=2, - without_cluster = False, - job_total_memory = 50G) diff --git a/docs_readthedocs/function_doc/Core.rst b/docs_readthedocs/function_doc/Core.rst deleted file mode 100644 index 1c5c25fb..00000000 --- a/docs_readthedocs/function_doc/Core.rst +++ /dev/null @@ -1,15 +0,0 @@ - -=================== -Core helper modules -=================== - -Add links to the other core documentation - -.. toctree:: - - Core/Experiment.rst - Core/CSV.rst - Core/csv2db.rst - Core/Database.rst - Core/IOTools.rst - Core/Logfile.rst diff --git a/docs_readthedocs/function_doc/Core/CSV.rst b/docs_readthedocs/function_doc/Core/CSV.rst deleted file mode 100644 index b01f7bbb..00000000 --- a/docs_readthedocs/function_doc/Core/CSV.rst +++ /dev/null @@ -1,5 +0,0 @@ - -.. automodule:: cgatcore.csv - :members: - :show-inheritance: - diff --git a/docs_readthedocs/function_doc/Core/Database.rst b/docs_readthedocs/function_doc/Core/Database.rst deleted file mode 100644 index 6a500030..00000000 --- a/docs_readthedocs/function_doc/Core/Database.rst +++ /dev/null @@ -1,4 +0,0 @@ - -.. automodule:: cgatcore.database - :members: - :show-inheritance: \ No newline at end of file diff --git a/docs_readthedocs/function_doc/Core/Experiment.rst b/docs_readthedocs/function_doc/Core/Experiment.rst deleted file mode 100644 index 7ee2d027..00000000 --- a/docs_readthedocs/function_doc/Core/Experiment.rst +++ /dev/null @@ -1,4 +0,0 @@ - -.. automodule:: cgatcore.experiment - :members: - :show-inheritance: \ No newline at end of file diff --git a/docs_readthedocs/function_doc/Core/IOTools.rst b/docs_readthedocs/function_doc/Core/IOTools.rst deleted file mode 100644 index 6b45809d..00000000 --- a/docs_readthedocs/function_doc/Core/IOTools.rst +++ /dev/null @@ -1,4 +0,0 @@ - -.. automodule:: cgatcore.iotools - :members: - :show-inheritance: \ No newline at end of file diff --git a/docs_readthedocs/function_doc/Core/Logfile.rst b/docs_readthedocs/function_doc/Core/Logfile.rst deleted file mode 100644 index 5c3f1c9c..00000000 --- a/docs_readthedocs/function_doc/Core/Logfile.rst +++ /dev/null @@ -1,4 +0,0 @@ - -.. automodule:: cgatcore.Logfile - :members: - :show-inheritance: \ No newline at end of file diff --git a/docs_readthedocs/function_doc/Core/csv2db.rst b/docs_readthedocs/function_doc/Core/csv2db.rst deleted file mode 100644 index 80d547a7..00000000 --- a/docs_readthedocs/function_doc/Core/csv2db.rst +++ /dev/null @@ -1,4 +0,0 @@ - -.. automodule:: cgatcore.csv2db - :members: - :show-inheritance: diff --git a/docs_readthedocs/function_doc/Overview.rst b/docs_readthedocs/function_doc/Overview.rst deleted file mode 100644 index 98d514f2..00000000 --- a/docs_readthedocs/function_doc/Overview.rst +++ /dev/null @@ -1,6 +0,0 @@ -.. function_doc-Overview - - -========================= -Overview of the functions -========================= diff --git a/docs_readthedocs/function_doc/Pipeline.rst b/docs_readthedocs/function_doc/Pipeline.rst deleted file mode 100644 index 87c00eff..00000000 --- a/docs_readthedocs/function_doc/Pipeline.rst +++ /dev/null @@ -1,8 +0,0 @@ - -================ -pipeline modules -================ - -.. automodule:: cgatcore.pipeline - :members: - :show-inheritance: diff --git a/docs_readthedocs/function_doc/Pipeline/Control.rst b/docs_readthedocs/function_doc/Pipeline/Control.rst deleted file mode 100644 index 75ebe0ed..00000000 --- a/docs_readthedocs/function_doc/Pipeline/Control.rst +++ /dev/null @@ -1,4 +0,0 @@ - -.. automodule:: cgatcore.pipeline.control - :members: - :show-inheritance: diff --git a/docs_readthedocs/function_doc/Pipeline/Database.rst b/docs_readthedocs/function_doc/Pipeline/Database.rst deleted file mode 100644 index 40c20e2e..00000000 --- a/docs_readthedocs/function_doc/Pipeline/Database.rst +++ /dev/null @@ -1,4 +0,0 @@ - -.. automodule:: cgatcore.pipeline.database - :members: - :show-inheritance: diff --git a/docs_readthedocs/function_doc/Pipeline/Execution.rst b/docs_readthedocs/function_doc/Pipeline/Execution.rst deleted file mode 100644 index 8cb31a75..00000000 --- a/docs_readthedocs/function_doc/Pipeline/Execution.rst +++ /dev/null @@ -1,4 +0,0 @@ - -.. automodule:: cgatcore.pipeline.execution - :members: - :show-inheritance: diff --git a/docs_readthedocs/function_doc/Pipeline/Files.rst b/docs_readthedocs/function_doc/Pipeline/Files.rst deleted file mode 100644 index 920d95b6..00000000 --- a/docs_readthedocs/function_doc/Pipeline/Files.rst +++ /dev/null @@ -1,4 +0,0 @@ - -.. automodule:: cgatcore.pipeline.files - :members: - :show-inheritance: diff --git a/docs_readthedocs/function_doc/Pipeline/Utils.rst b/docs_readthedocs/function_doc/Pipeline/Utils.rst deleted file mode 100644 index 942da4e4..00000000 --- a/docs_readthedocs/function_doc/Pipeline/Utils.rst +++ /dev/null @@ -1,4 +0,0 @@ - -.. automodule:: cgatcore.pipeline.utils - :members: - :show-inheritance: diff --git a/docs_readthedocs/getting_started/Cluster_config.rst b/docs_readthedocs/getting_started/Cluster_config.rst deleted file mode 100644 index d7c10811..00000000 --- a/docs_readthedocs/getting_started/Cluster_config.rst +++ /dev/null @@ -1,36 +0,0 @@ -.. _getting_started-Config: - - -===================== -Cluster configuration -===================== - -Currently SGE, SLURM, Torque and PBSPro workload managers are supported. The default cluster options for -cgatcore are set for SunGrid Engine (SGE). Therefore, if you would like to run an alternative workload manager -then you will need to configure your settings for your cluster. In order to do this you will need to -create a :file:`.cgat.yml` within the user`s home directory. - -This will allow you to overide the default configurations. To view the hardcoded parameters for cgatcore -please see the `parameters.py `_ -file. - -For an example of how to configure a PBSpro workload manager see this link to this `config example `_. - -The .cgat.yml is placed in your home directory and when a pipeline is executed it will automatically prioritise the -:file:`.cgat.yml` parameters over the cgatcore hard coded parameters. For example, adding the following to the -.cgat.yml file will implement cluster settings for PBSpro:: - - - memory_resource: mem - - options: -l walltime=00:10:00 -l select=1:ncpus=8:mem=1gb - - queue_manager: pbspro - - queue: NONE - - parallel_environment: "dedicated" - - - - diff --git a/docs_readthedocs/getting_started/Examples.rst b/docs_readthedocs/getting_started/Examples.rst deleted file mode 100644 index bf38d5f0..00000000 --- a/docs_readthedocs/getting_started/Examples.rst +++ /dev/null @@ -1,324 +0,0 @@ -.. _getting_started-Examples: - - -================== -Running a pipeline -================== - - -This section provides a tutorial-like introduction of how to run CGAT pipelines. As an example of how we build simple -computational pipelines please refer to `cgat-showcase `_. As an example of how we use cgatcore to -build more complex computational pipelines, please refer to the code detailed in our `cgat-flow repository `_. - -.. _getting_started-Intro: - -Introduction -============= - -A pipeline takes input data and performs a series of automated steps on it to produce some output data. - -Each pipeline is usually coupled with a report (usually MultiQC or Rmarkdown) document to -summarize and visualize the results. - -It really helps if you are familiar with: - - * the unix command line to run and debug the pipeline - * python_ in order to understand what happens in the pipeline - * ruffus_ in order to understand the pipeline code - * sge_ (or any other workflow manager) in order to monitor your jobs - * git_ in order to up-to-date code - -.. _getting_started-setting-up-pipeline: - -Setting up a pipeline -====================== - -**Step 1**: Install cgat-showcase (our toy example of a cgatcore pipeline): - -Check that your computing environment is appropriate and follow cgat-showcase installation instructions (see `Installation instructions `_). - -**Step2**: Clone the repository - -To inspect the code and the layout clone the repository:: - - git clone https://github.com/cgat-developers/cgat-showcase.git - -When inspecting the respoitory: -The source directory will contain the pipeline master script named -:file:`cgatshowcase/pipeline_.py` - -The default configuration files will be contained in the folder -:file:`cgatshowcase/pipeline/` - -All our pipelines are written to be lightweight. Therefore, a module file -assoaiated with the pipeline master script, typically named -:file:`cgatshowcase/Module.py`, is usually where code required to run the tasks -of the pipleine is located. - -**Step 3**: To run a pipeline you will need to create a working directory -and enter it. For example:: - - mkdir version1 - cd version1/ - -This is where the pipeline will be executed and files will be generated in this -directory. - -However, the cgat-showcase example comes with test data and this can be downloaded by running:: - - wget https://www.cgat.org/downloads/public/showcase/showcase_test_data.tar.gz - tar -zxvf showcase_test_data.tar.gz - cd showcase_test_data - -**Step 4**: Configure the cluster - -Running pipelines on a cluster required the drmaa API settings to be configures and passed -to cgatcore. The default cluster engine is SGE, however we also support SLURM and Torque/PBSpro. -In order to execute using a non SGE cluster you will need to setup a `.cgat.yml` file in your -home directory and specify parameters according to the `cluster configuration documentation `_. - -**Step 5**: Our pipelines are written with minimal hard coded options. Therefore, -to run a pipeline an initial configuration file needs to be -generated. A configuration file with all the default values can be obtained by -running:: - - cgatshowcase config - -For example, if you wanted to run the transdiffexprs pipeline you would run:: - - cgatshowcase transdiffexprs config - - -This will create a new :file:`pipeline.yml` file. **YOU MUST EDIT THIS -FILE**. The default values are unlikely to be configured correctly for your data. The -configuration file should be well documented and the format is -simple. The documenation for the `ConfigParser -`_ python module -contains the full specification. - -**Step 6**: Add the input files. The required input is specific for each -pipeline in the documentation string at the; read the pipeline documentation to find out exactly which -files are needed and where they should be put. Commonly, a pipeline -works from input files linked into the working directory and -named following pipeline specific conventions. - -**Step 7**: You can check if all the external dependencies to tools and -R packages are satisfied by running:: - - cgatshowcase check - -.. _getting_started-pipelineRunning: - -Running a pipeline -=================== - -pipelines are controlled by a single python script called -:file:`pipeline_.py` that lives in the source directory. Command line usage information is available by running:: - - cgatshowcase --help - -Alternatively, you can call the python script directly:: - - python /path/to/code/cgatshowcase/pipeline_.py --help - -The basic syntax for ``pipeline_.py`` is:: - - cgatshowcase [workflow options] [workflow arguments] - -For example, to run the readqc pipeline you would run the following:: - - cgatshowcase readqc make full - -``workflow options`` can be one of the following: - -make - - run all tasks required to build task - -show - - show tasks required to build task without executing them - -plot - - plot image of workflow (requires `inkscape `_) of - pipeline state for task - -touch - - touch files without running task or its pre-requisites. This sets the - timestamps for files in task and its pre-requisites such that they will - seem up-to-date to the pipeline. - -config - - write a new configuration file :file:`pipeline.ini` with - default values. An existing configuration file will not be - overwritten. - -clone - - clone a pipeline from :file:`srcdir` into the current - directory. Cloning attempts to conserve disk space by linking. - -In case you are running a long pipeline, make sure you start it -appropriately, for example:: - - nice -19 nohup cgatshowcase make full -v5 -c1 - -This will keep the pipeline running if you close the terminal. - -Fastq naming convention ------------------------ - -Most of our pipelines assume that input fastq files follows the following -naming convention (with the read inserted between the fastq and the gz. The reason -for this is so that regular expressions do not have to acount for the read within the name. -It is also more explicit:: - - sample1-condition.fastq.1.gz - sample1-condition.fastq.2.gz - - -Additional pipeline options ---------------------------- - -In addition to running the pipeline with default command line options, running a -pipeline with --help will allow you to see additional options for ``workflow arguments`` -when running the pipelines. These will modify the way the pipeline in ran. - -`- -no-cluster` - - This option allows the pipeline to run locally. - -`- -input-validation` - - This option will check the pipeline.ini file for missing values before the - pipeline starts. - -`- -debug` - - Add debugging information to the console and not the logfile - -`- -dry-run` - - Perform a dry run of the pipeline (do not execute shell commands) - -`- -exceptions` - - Echo exceptions immidietly as they occur. - -`-c - -checksums` - - Set the level of ruffus checksums. - -.. _getting_started-Building-reports: - -Building pipeline reports -================================ - -We always associate some for of reporting with our pipelines to display summary information as a set of nicely formatted -html pages. - -Currently in CGAT we have 3 preferred types of report generation. - - * MultiQC report (for general alignment and tool reporting) - * R markdown (for bespoke reporting) - * Jupyter notebook (for bespoke reporting) - -To determine which type of reporting is implimented for each pipeline, refer to -the specific pipeline documentation at the beginning of the script. - -Reports are generated using the following command once a workflow has completed:: - - cgatshowcase make build_report - -MultiQC report --------------- - -MultiQC is a python framework for automating reporting and we have imliemnted it in the -majority of our workflows to generate QC stats for frequently used tools (mostly in our -generic workflows). - - -R markdown ----------- -R markdown report generation is very useful for generating bespoke reports that require user -defined reporting. We have implimented this in our bamstats workflow. - -Jupyter notebook ----------------- -Jupyter notebook is a second approach that we use to produce bespoke reports. An example is -also implimented in our bamstats workflow. - -.. _getting_started-Troubleshooting: - -Troubleshooting -=============== - -Many things can go wrong while running the pipeline. Look out for - - * bad input format. The pipeline does not perform sanity checks on the input format. If the input is bad, you might see wrong or missing results or an error message. - * pipeline disruptions. Problems with the cluster, the file system or the controlling terminal might all cause the pipeline to abort. - * bugs. The pipeline makes many implicit assumptions about the input files and the programs it runs. If program versions change or inputs change, the pipeline might not be able to deal with it. The result will be wrong or missing results or an error message. - -If the pipeline aborts, locate the step that caused the error by -reading the logfiles and the error messages on stderr -(:file:`nohup.out`). See if you can understand the error and guess the -likely problem (new program versions, badly formatted input, ...). If -you are able to fix the error, remove the output files of the step in -which the error occured and restart the pipeline. Processing should -resume at the appropriate point. - -.. note:: - - Look out for upstream errors. For example, the pipeline might build - a geneset filtering by a certain set of contigs. If the contig - names do not match, the geneset will be empty, but the geneset - building step might conclude successfully. However, you might get - an error in any of the downstream steps complaining that the gene - set is empty. To fix this, fix the error and delete the files - created by the geneset building step and not just the step that - threw the error. - -Common pipeline errors ----------------------- - -One of the most common errors when runnig the pipeline is:: - - GLOBAL_SESSION = drmaa.Session() - NameError: name 'drmaa' is not defined - -This error occurrs because you are not connected to the cluster. Alternatively -you can run the pipleine in local mode by adding `- -no-cluster` as a command line option. - -Updating to the latest code version ------------------------------------ - -To get the latest bugfixes, go into the source directory and type:: - - git pull - -The first command retrieves the latest changes from the master -repository and the second command updates your local version with -these changes. - - -Using qsub commands -------------------- - -We would always recommend using cgat-core to perform the job submission as this -is handled in the background without the need to use qsub commands. However, -if users wish to use qsub then it is perfectly simple to do so. Since our -statements to P.run() are essentially commandline scripts then you can write -the qsub as you would normally do when sending a script to the commandline. For -example:: - - statement = "qsub [commands] echo 'This is where you would put commands you wan ran' " - - P.run(statament) - -When running the pipeline make sure you specify `--no-cluster` as a commandlie option and your -good to go. - -.. _pipelineReporting: diff --git a/docs_readthedocs/getting_started/Installation.rst b/docs_readthedocs/getting_started/Installation.rst deleted file mode 100644 index c3f3271e..00000000 --- a/docs_readthedocs/getting_started/Installation.rst +++ /dev/null @@ -1,112 +0,0 @@ -.. _getting_started-Installation: - - -============ -Installation -============ - -The following sections describe how to install the cgatcore framework. - -.. _getting_started-Conda: - -Conda Installation ------------------- - -The our preffered method of installation is using conda. If you dont have conda installed then -please install conda using `miniconda `_ or `anaconda `_. - -cgatcore is currently installed using the bioconda channel and the recipe can be found on `github `_. To install cgatcore:: - - conda install -c conda-forge -c bioconda cgatcore - -.. _getting_started-Automated: - - -Pip installation ----------------- -We recommend installation through conda because it manages the dependancies. However, cgatcore is -generally lightweight and can be installed easily using pip package manager. However, you may also have to -install other dependancies manually:: - - pip install cgatcore - -.. _getting_started-pip: - -Automated installation ----------------------- - -The following sections describe how to install the cgatcore framework. - -The preferred method to install the cgatcore is using conda but we have also created a bash installation script, -which uses `conda `_ under the hood. - -Here are the steps:: - - # download installation script: - curl -O https://raw.githubusercontent.com/cgat-developers/cgat-core/master/install.sh - - # see help: - bash install.sh - - # install the development version (recommended, no production version yet): - bash install.sh --devel [--location ] - - # the code is downloaded in zip format by default. If you want to get a git clone, use: - --git # for an HTTPS clone - --git-ssh # for a SSH clone (you need to be a cgat-developer contributor on GitHub to do this) - - # enable the conda environment as requested by the installation script - # NB: you probably want to automate this by adding the instructions below to your .bashrc - source /conda-install/etc/profile.d/conda.sh - conda activate base - conda activate cgat-c - -The installation script will put everything under the specified location. -The aim of the script is to provide a portable installation that does not interfere with the existing -software. As a result, you will have a conda environment working with the cgat-core which can be enabled -on demand according to your needs. - -.. _getting_started-Manual: - -Manual installation -------------------- - -To obtain the latest code, check it out from the public git_ repository and activate it:: - - git clone https://github.com/cgat-developers/cgat-core.git - cd cgat-core - python setup.py develop - -Once checked-out, you can get the latest changes via pulling:: - - git pull - - -.. _getting_started-Additional: - -Installing additonal software ------------------------------ - -When building your own workflows we recomend using conda to install software into your environment where possible. - -This can easily be performed by:: - - conda search - conda install - -Access libdrmaa shared library ------------------------------- - -You may also need access to the libdrmaa.so.1.0 C library, which can often be installed as part of the -libdrmaa-dev package on most Unixes. Once you have installed that, you may need to tell DRMAA Python -where it is installed by setting the DRMAA_LIBRARY_PATH environment variable, if it is not installed -in a location that Python usually looks for libraries. - -In order to set this correctly every time please add the following line to your bashrc, but set the library -path to the location of the libdrmaa.so.1.0:: - - export DRMAA_LIBRARY_PATH=/usr/lib/libdrmaa.so.1.0 - - - -.. _conda: https://conda.io diff --git a/docs_readthedocs/getting_started/Tutorial.rst b/docs_readthedocs/getting_started/Tutorial.rst deleted file mode 100644 index 13edd592..00000000 --- a/docs_readthedocs/getting_started/Tutorial.rst +++ /dev/null @@ -1,72 +0,0 @@ -.. _getting_started-Tutorial: - - -============================= -Running a pipeline - Tutorial -============================= - - -Before beginning this tutorial make sure you have the CGAT-core installed correctly, -please see here (see :ref:`getting_started-Installation`) for installation instructions. - -As a tutorial example of how to run a CGAT workflow we will run the cgat-showcase pipeline. Therefore, -you will also need to install the cgat-showcase (see `instructions `_) - -The aim of this pipeline is to perform pseaudoalignment using kallisto. The pipeline can be ran locally or -dirtributed accross a cluster. This tutorial will explain the steps required to run this pipeline. Further documentation -on cgat-showcase can be found `here `_. - -The cgat-showcase highlights some of the functionality of cgat-core. However, we also have our utility -pipelines contained in the cgat-flow repository which demonstrate our advanced pipelines for next-generation -sequencing analysis (see `cgat-flow `_). - -Tutorial start --------------- - - -**1.** First download the tutorial data:: - - mkdir showcase - cd showcase - wget https://www.cgat.org/downloads/public/showcase/showcase_test_data.tar.gz - tar -zxvf showcase_test_data.tar.gz - -**2.** Next we will generate a configuration yml file so the pipeline output can be modified:: - - cd showcase_test_data - cgatshowcase transdiffexpres config - -or you can alternatively call the workflow file directly:: - - python /path/to/file/pipeline_transdiffexpres.py config - -This will generate a **pipeline.yml** file containing the configuration parameters than can be used to modify -the output of the pipleine. However, for this tutorial you do not need to modify the parameters to run the -pipeline. In the :ref:`modify_config` section below I have detailed how you can modify the config file to -change the output of the pipeline. - -**3.** Next we will run the pipleine:: - - cgatshowcase transdiffexpres make full -v5 --no-cluster - -This ``--no-cluster`` will run the pipeline locally if you do not have access to a cluster. Alternatively if you have a -cluster remove the ``--no-cluster`` option and the pipleine will distribute your jobs accross the cluster. - -.. note:: - - There are many commandline options available to run the pipeline. To see available options please run :code:`cgatshowcase --help`. - -**4.** Generate a report - -The final step is to generate a report to display the output of the pipeline. We have a preference for using MultiQC -for generate bioinformatics tools (such as mappers and pseudoaligners) and Rmarkdown for generating custom reports. -In order to generate these run the command:: - - cgatshowcase transdiffexprs make build_report -v 5 --no-cluster - -This will generate a MultiQC report in the folder `MultiQC_report.dir/` and an Rmarkdown report in `R_report.dir/`. - - - -This completes the tutorial for running the transdiffexprs pipeline for cgat-showcase, hope you find it as useful as -we do for writing workflows within python. diff --git a/docs_readthedocs/img/CGAT_logo.png b/docs_readthedocs/img/CGAT_logo.png deleted file mode 100755 index de6a95e41e986f8b2144f9bd70173f3d5c38c06a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 702858 zcmeI536NdYedfDcy-O{%T56isVzhujj1gN5ky2x^Cyr%z94>H@iCLbT%|JbbaWzF8WwQL(|m_4UJ!IZ^E7Z zi(Z(H|8-pdTbJL|(9qX2`CsE7-Txv&H7xk-Z++||4Gmv8si9%;KQ}atPT>DL8XCTI zMnl8fS2i@PxDSD^yJyW$E@|ler|u73bpG#rxv^maff^?!U-?G@BtQZrKmsHnMPO2p z`9}gIKmsH{0wf?wfWj#Wior;L1W14cNMMoxg_G@o1W14cNPq+~5TI~ofW@FBKmsH{ z0wh4;WE&s>5+DH*Ab|`7D4ZE!F(?U;011!)2~arM21tMeNPq-LAOiskX9iddN&+N6 z0wh2J6i&7Q5+DH*AORA{K!C!T0TzRj011!)36KDVlWl+mNPq-LfCMrSXwDFpL1QFv zaP;LPM~6kphDV+)E#lbWt)t^xOYh*iv+a~AP18HuPVVYBXG&97>QlZY0TLjA=m~I> z_vov5vVcMn-+g@scg7BHD;LGAEFhw@x-OeN{W2+zHyLP7&*$g# zTw^i^rcDAQKmv{lP&ge|fMoU%%RL9yK_-P;DH+R2FbsLZ^>mUoGbI5MAOQ;mIIGqI zWc4hBSPH#VPsMJ`clJGm45^)VsWUecAOU*>D4h0KOXh@0Jy6Qs`yYjBN=ET57__sa z^%P7O^35>XLjoi~0{IEBt2RGFJt;shA(lhK51}(wPl~QnId%T`v0lZs70jOmNPq-1 z6QFQv2AVaw`1CR%fEl(Y&wU_y%L@`90TPIV0EN>{Jw~jTr9c<<-HII&O7HNR1W14c zLMK4sv|WSfj17AHG7~R!)uO{TQxpa!0TLhq&k1mHkfjQOi@9UZcQ9+l)dmHY%K6@G z#Nru_biS7g-;n?bgiV0LX}td6VnQ!{+o`~KvER)A#qVtU^1JW&e-a=85(u6Eh0|0O z!n1^n3E9ZMm%1qvt)yGnUkq00G7}OY0S5#qocf`oS9aU3Z;5>X^rs$LiqWjD2@O2$ zB>@sJMS#MolPtWeZM(inA1lQaK)CsPiV5G6011RkfWoP#?kJghI5HIyAORBao&be2 zZxvB8^Acoy5+DH*2#NrOQ@y$v*45~^9K-SVWkNCawYnLDMaOCn%6K^g~-4uoSZEhPaGFhYRpSqCe|G&j9{ zbHTR&s>7(}$K6vesQEcRBmoj2fxrk*IIC6|$26;=$?`$>)C;o&XIK&-0TS??0EM%1 z4Y7l^GM9{Z(OJulgEBH>a1tN^-w9AS(=~(*R?#=hZC2AqiLcQ&Ypf5M>kD?Wtt>F0zMFE zoS2yK!44-jVCwAVcW!f%b;Oxs{mF^__e5Mk7L5c*px^`w>TzNtpp83j=DxGqTR(hF z4sB|u$^=P(1iT_pMB&6{K(B4TmYeA5ZHU-h?|r>r<}_NpRGAtHkbs8-3MZVL2V|lZ z!qJ>R`^&LyOlyJ^(k zBw(08QG^q-XJ7xt7r0k@*5)*HxNqo4Un?7L+p7Ll5WXb= z5+H$S2^3;h?a=T;Z|%H|gPNtSSSaGi(P^8<7UTYwu|)?brv38b=;TGFzpmz{RsEfx zn%~vmJ|$B`1||U#Ac5cr#4ns!aKa;~;T^(FJa}{(E=G<{6=JEqu|hsCpFiyn7f#on zG7}^L5+DJ82*fO$g&owK^drX?Z8&r~{K(DYlijK|8{Cpp_fCQ`& zh*mgFZV5CvF<`BNGG!!29-)|UEG^4rxt;JozkOCO1+<(D?~niqL_i=;^@Mc3vh~X7 zM>gSJzA&=f6R}Kv8K|8pAbd*#BoGaO2!#{#fv`1DG&NLS5DZzqFtTF($Z|vDmE}3A z-Jy1>abqkJAOX(_#3q~(t0wgFsRI{7GM)Ft%D3q`?alx5oa3ON@=+L)1W14cJSPy9 zaBh10=6Ciz%I=7p%2rDeoq+nY}6F1^ER5+DH*2%11V!Wo%r!mV6CvSQuv>VW*q zYTtzKozusOLe=2#J_(ROa0DU|&X`madim7;$u8O88dEicKhi&SVCl?ipYlElkN^pU zMIdGqv@tq?W?2N2XEDi~fKrsmf>~Th*n%J!1(S569>XohbEy5TlMmno~ z%KIch0wfR+flxip&{Y$X2`_WQp=ANJl{}_y*gu{pI^&W636Ow41cDUKpjESIl9{yi zpY9nwySpuU%L@`90TKw5KzPC#u4)!mGR1dec>HLfUlN*40wh2Jr3Atg&eyhI8{BLl zO#k`Y-B%Z3EVHz2rLNIidtPrx>SnBOcRjsf*E1W2p58FBWoTsUP*Ti}xwAUv&6;)Y zlH)EqWBSs0$$egs00{&{Ah5|n!L8TCB9lktck6&inZC6V_bVGQB9jC+ad`01^m-KuE$FoN7Kgyc((*@gkEr2p7o2$Qs;m(io&-q16@g%cGc47_a3=a@JwLs~a+|yTv{|us z)$V&~%X6Q*x3o((E8FQy=bv@&)$l>HMr3#rAOZUX!Vu1Y&OBk?Y_&#=qphQyRf~?< z7yo!|wNlI9hwFLPx32CvcZqxyh9rS-2nH1ic)bE+wdPwfn%8PLeW67W|z{om~X^Jmu% zeKCU5rcJm0vuC!&bg-Cd)fCl1?KIsinLY_96Yx|xH@|b+@W`{urLqta`?O=BriF|m z$ReiA5A3L>sB~&)1dX3Xh?0Ph!ij0LJNA6XTiocLz4x7g;+$@hvm($*8_iM_o2n_W zQ#*4uKSn1369l{yPS2;&ZW&vA$J>9(1t*eiaZY>ljVH~J4D5L@`>H8`P&++u7&Nbd z1pE?COa*%V7yeENdg{Q%7}P9aD^V+QS98VCBs8pO1bh)r%%R=3>zf|Z$Dk&r0&&fW zeAQrF6Fs!_HRXxPhopAOvtmdRFh{_Py=pPE`QoOFJ>2U6D>d)iIl!qv^6l^P`O~oY zgyWj>#1ukuKU)Q8j6wn$33%agVrbJt)#Oy5OzjNIP9T}lY;@pH4{doZ(C;i~!aF2j zlYo2S#F9-9hc>C2GOZAvC3XcmZ+6GZ-u5`WOi^l|Ra00%!}-gVLzXW5C+Na!DQ5n?&1j_ql647zbE7$1siilR|F6{?s~NFYA}m%@qlC-6A) zmqB9+ajT|KN^F3K{?%Ch#XOB6EY%bx4MsZ$CkFD=4dar4cLbcNr-wbYQA#Hi6Iz)l zrgsg{8-qfWP600%0rjS?X$T4AB;ZUq(Nmk#biL6dQ%!i4lXG`^JEGZTU2k=#QVUl# zMM0>YsS4#=5->`@jd1$bQyYzHLNBp41bWFKOZl%&=&C77O6`=dafT#;oCF*Qr*ECK z5vV36_h41Xr{;H2FSC9h;!sUdYHDZJ+GltYP$S?#IB`<7nyn^cVfjhW`?tfnTy=aW zUQNJ>KFyd^Q&bkUGx|o$0{TF}ws2z41ME%fL&;$SBs0)Oo0#&09q!O8OBYkQEybsr z0tK~Gxk)f02}lyKDxBC{&!0UH0^J}8y|UQeE{@YjB-^NoV6m#EU`6dT(Po$?2_yup z3MV>gJ=vHR2cKYQGocW~1t$Jr6qAyfzp~<1O~HrSnZKPd1rn%6z@BiThxVO)4^^|n z=6$Fp4%xJsW6r$bW!`ww3=U-G#3-T{K{Z85shwIH3=<)Nasu{*(~~(tIGP%VY=&}N z7&~Umn{1vt#ZgT`iQ1W`wJSnr&B0T9@amjZWCl<*ODmMx zSxSS~B%n;dtZ;g=1jX}dv~)C;n{VxiRLyce)Xs7yyh8$6378d5ANC1)WbYLl4lT>d z#ZGvvHo1M-EIOJdXj3(-QKfcPBg6Y7AW6WWaKhbmzAY_=HXqshX-O8AgB5D(?BK+J zWxNdyl+N17DWJ9a@KX|~O~9afI`=unh6lCF@9NXRr9Iq1&k%g7rVLnWrwk>AA%Thn zObMqCKIh-;zS_eL57Kzlf*P->npt5|JF^mFcoF~sBf{z2-9#tt+M%0#t6+?54lJGNn^n=+L)BEPN@}MX zH^wS50V6xRId?aqn!fGjj)f;wO>^Hg&sB5o$CgiDI^SIJbEXXK1i+k$Fgghohk$;M z)498ePTChnR(Mr~oC;*LI{a77WnW)2zHj82Pv5ccrOifBC?`(8QyCp27Lb5`k8^0) z<;k1Ab<$!uld5U-dliIgf-Lq=jl? zDiD{Qm}|%L3Qp}*ZUBr(0?r9&6V5#c){TyDbyXe=ZF({%=%$mpIj(808N{KQf)BORTsvXP zBw&<)HsO3{-@`@?t&uo3r1j(+6s|m}QBC2fVz&-&z!iiNV@Hp*T zFLK1Oo364h%@=zEapg%V5WUy&sixpV?bO>&m>LOKAfQJ$ckFRCy18L=nGc(}V;@=w zr-ceDW)`ukrl?J7XE9qfOC2r&J;J%?z@x4Z_~340P?P)6nri@YtES*a?KIbZm@)}y zC!j$%C8h-pP7G+bJV_8f=Y~VelDAGS#9Ue@Sr;MGB1osGM+#^W+B3@-2LTQ0>2Pk4 z4?ZUk*);b}D2{3hhSW}TZHXz9fJOp&g%jHzxSAW}gS*KUC>q;hDygEWrYJtOGt~_E zmIOQ`kXJZesU}c5cQ?=L?YMmYG!H8^0+WiVnxf*Uoe?x(79nl|d412S@ z;j|zu*<>fJ-u6e;RO3nQRO7~2B%qOiqHqq6JfqPF$)p@krp4BJ{q0jE?PRdJ1gM&I z5!DDl?W_TZACZ6s0xH5eJn}5YI4u~L%EFa!VonhEq)pXDp0`xZJoH3dYG;0iOo0TH z2`CDua(>DYF(+vA*ka}Ac_N~dmb>fa0azNBs#!`S&o#9(4?V^ufjk6Mgwx5)AXmEu z!QEWd-1|D}ijniDjHwGsqu)y^Co< zScI~&w>`h{MNWaLX}|WVo%WeC#{v=vKscTB(_$S87jWkJK2SCD(908-+L?zQ#9?Cg>)^cJ z4sK7YjGd~fj80=jYNtk+Oo{|j1TqNc*x{}A_o;Q_Zmv3>`vs-iiR@ddrYtG*;i#SF z88e51638H&_T5d?iG7a~L!0n83)&p=6`QJ=kDvb7)K2|OnIZ{f+H&vU=qr|qIoaKv z+taGmGF4NJo9S58PSb>$egO$&@Hp*nwzp;6!I>x@>7QD_c965kRLz_KjYg+-8pX@R z3rHY?aN6&xwLid#-GY`(Ybjtm$WvshW*&NW;!->9aAqzAAs{Q9m=@$AVVPKpX`86;Au} zY1vPksZCQgGvV?iFtyW@il7-m6Oa{7EaJ3WPMpDMIY;)>$^)lr%Ck}oIXE$(7*i%1 z1yqI-!;nA?0oql5sZGeHsP1+_EKilf<{5RetlvBTReTfAj_ zk>wn@`GE{@s-_Gj|HDu_{jVch=q&+R;T#>e-Ct|j<2-vN-Oc)~l&V>uQ8*!}o#E6K zE%l3llyDYdl=HmV+-t8+)1+$F0TkfJ)Xo4ajV8M%ASIlmSr7{Zq=eJ{80W!>X%-5fWWj|f35wJORWk)7;vS=C!NC~IL{QOMnzk@&d>nSt)JI?y;({A}) zq(kjggUMKN6YxxKjtZL_SqV$eI{F*fRB@dMvhLcZM5D`*;Oledajx?K6>tR z7xXUryPFnKHT8Z0YNmF0`>S*2J=(W?cFj;OKlFJl)+HC_ZIXbG!nt|e_Hmph35Suk z|El@&2d94MiT}dxS^Mn?uIIcHpP_czC(j(cB~V{Dj~w0St@T z2L+*;_!?|_=~=EovH3WV$qQQyq0g4uDZ`3kVkJ;?;f%FjYm_S#)x>7BGk^Y1H44Iy zToE|<FqZn|f)XwLHrNHxJ~_vTkPnbvumsb~PYYVl8eMD47Ih&SUVP+vGL z?}D&-d{NxQU6n2@)dVFR7v_o;^Q}wZ$ev#$VW^!63SJb1Kz-qi8$>~Ns|`*y(dv%A z>(j*D0s}1_4ni%poZ4ARgV)g$D7tV)U%@C~h^kqHdWo!z)XvB%l*Oq*puTWUX_{Um zzV?six4fY}%_1ilsA~Rn&uEcrEzq)4I|Hpanw^zEec|kEJ0)wPdBS%z?avdPaZ3pV zs+uT5!M8#zt;Dddshwff7p+wzP+vIJ(q$~?1j1EKpzCU8%XHp?6b+zuDuQFQa0nFr z@w8!hJeLTOP&;e$;io|pD7tV4U3nFa-}llM9IjOHBOh*Jb8tLTWh`l3 zc&1D+hOtioodLByPGJ(Y(|*-3M>_;4oOU={%4K}t$a9~&*HUKLvyoR#qIWtB@LxOeLsPOW4CX(9!HEHjM*66pOWI$wNFb`L-dwwGWb2ThMam?d zC<12JzWKi8ewL-+R<%j1CqQ{Fp!l_k-<$=nP!>Ncf%?MP*>-Z)L<0)HVShX`gdmx( z-t&l`@UyC>D8+H-tUAy<)z1x6at$FRwJpI-bwvG8;@cP1Oc^3rB z$YPoygo#0x=}Bg%rkY56Re$FXmV8n&7Y4IKVCsdRRmjL^?cDT$zx^}{s2Rlqf%?K} z!H85A^IKj|-Qimq0-GLqR3@0^Fv_Y4JF%qm#0!6aM@O$^o0(lT0NxASt9@Gg zzR%j}ZC6cgr2Nzs0Uw34qv?P(96#M1$^LG;p58FN@1Qkqk{RVuO_2qh&VO9_Rmtoa z%qRiOsO|djb$PPzRy*JR=k4tUqjLwfvv$q#(-H#pg>y<%S4nsqmrL5M{`!k! ze$dda}VA_SBRqmusCF!SjqR%E}C*P7lH3Id8 zvs%n(?#D4dXk?2=t57tmrf6nZnsUmg2dJH{zI5oJRhtu3&0r6;bKgswt69tYB#SS+9Xf2efDvbBNHqm9^x6K0OF#L>?|frZ@B1TW=ugow zhj!MrYjyR|mY2&n?JT#DcSxWjf#!;LQ#adM<_?cMo4RTAEjAC@aA=v)ERw|I7~@Qe zh%M&|`26MeH5_6&x0!ftPuAnRnxv?3xP zC7i9zb1fISv(es})^T*zTCSk-?Chwf$hW_JO8@#0yOn zr)g%tI4I15e_73bYG*YvyzeIgDdDuglyg?|FKqHcZ|&LLZ8me%o>#}*S+0cBiE5?* z!Ut_Qb{5$pD<_ie+cUa(Xv4G-dz*vzoUlOeyw(&z=WnT<&Z~e1$P0ey&`vZbnUE&JxcUZ}C=1gxO+ZRG?R%U{ z+N_=|7Qy@=Ctv$wswTdZ_SgP4H=2GFc#*zC?ZjY*7YJxX2n3{r69vJbGLo@2RSv!>YVt%U;- zfz!F*N+Xj;D%N|!RWpLtQn6Y+xv8q1gA)UJO3*UBv+ttQ^Q6GIg(4uUp7y(JF}v1g zO++ws1x3U6-vYFPOD721@2|B`tzKoJDxivmIF72RNUosKWQEiIdQQvxz5jGi@P~}G zOz$}H+Eu0-GF20Vw~rh#&ABxFR@tv~1384%x41Vq;VnMzs@(V~iszUM!4x0vW!M_IqnB z4|B#LoMC{YKK-G~qK4JGN=pPVEWW;lNycAdo>g+ghxz=WO0# zc>}&^oKCbY_?;^syS4A4GnCsIeAct>{oK-buXxO@v zN?FLa1tE|@I6GQTu@A+v)A_^g2ke*8T#gvL!Cawpzk6fW{wO?IFhT#A?s8Rlx_<0$ zfzY1eYnrw;viA@?{Fuu#41sCq=#0VWLu6!F9Ge8Bjz|#(VT=>=gN_^>wrSrPolib> z;9_ez;+cq!xXJT2BKm`Bo_p?%yPn>#ktdaCofXnSy_uF;S6{{q??d zH&YdXVThi`Zhh(2zsY(AHslr)Xt87~l_1}eKvn`7gcC_aJBLS}&6-D^@RmJJn2O%o z%jZwaW3!)eA)mtU-1pL!;g>d#Y#lndWeBgZcGj`uReH`@(zSFx^|E&L+23KRn7bd; zt_8GGxSr?~fp%{B{!^kE*NB3aKkwMj!5U%s5edi>$R?baA7oiL#cIyYV~gdBlrtoH zYoip-1PjJK@h^GHi%bOUdz{7D<2_SFAh1}12`3W5i4GOK4n4ggNu;H#bNZ?C@LLeS z2>Ej`!NLnI1hNTdXWPkIa?O;;@~J-e=SN&QMV0}Z&6TC zF=lzPvKgWc$*YU7AApg z!imX2_P0Tp*ZR8sMJe{5k72f-Vp(n9-2~rOdYr{-Wh`v~1hNSy3e?r{0sF-|qvJ`N znL;(6*m=lij?60v0*}9A`y>DiaB@~{5N(4-7K%U^!iix{%eC_78~0hxk=c1p06(Yf zdz_q7>v^l7dBr54Ae^>0-b2OM@2$l~-ZAYWREWksI5A+noKAWo+A@DjjdNfIBtQb0 zzn4zNI#ExSyKIr;Uv1$$A$~!AMZkV%E%N5{pkGx0uSmcf0-1#qmOB~iwBKcm&N=%l z_q=HsG~_)5&{^xG8-hFTy{8)Zh6MZ|pdy^zQ!lh{y!|d)l##=o_AA=Y9G&#j#vif}sdI8nF@yB<_2Vvaa;I&;8_D(d$(TivHwuq*f>)=@Pe7A7tND#8f@ zPMpru?XFA_6-}ozt_H7C8f^ZL{SQ=P=8MAt&V2kBn*>Y|P!!IY(=IS&QT24O7lQpM zLUcN-S4dGlapGoxD<O_Uo=?tw^Yqs~1?X^y0)12A6 z$#H=5T@w6133x|9Q8-(h=i1+Z5Ae`iYk3boWD2L#$&5T#GSG~2XW_-tf^aE56pe5}uN|jG*AWRYBEFrB#y-v)PyNOO(o{Zvk z%b;(Y)|@z<$oH0)b~~A4S@?!(8-PQ1ll`=|E1UU|Kt=+&gcEtW>aAVU z_M#J~6RHXA%m|sm!z1vG4bHmd*-sl@bD_lrBal}(UG>(Ynp`=Z?68ggJA$>FPWIRX zSoYIK-?~`92nggAPW09~*|!$xT{)e|njN+gH2rEtK&OJM(M^~%qlMFgs?`kdlYlP- z@(L%);fd2ZI5A)sDju+3{MnG*Jeiv%0$6rH=QxPB!F&Ohdw7%lbauS*)C`l3A~3u^LyT@zL`D!vK@OIEU<*V z81B5|(0^4mJXbud?L2#C+mdN5xl&t+k zw>DG88UtfDAtzhdiXy;A;qp$6wdpFU!IyU7XOOGhk?(ZwwRem@^zy2!u4-&-T)ler z0}ng^+0$Do5YZcMxMAVKg=e05=3RH)1s7It`b@0|1hfgK!zn_jr%%qjud~V7RGlAg zKj7rR<03RrJ7t8g>9CU)Wt`UB(9~$Bte)lq0lVdvTcBq`FD+P!?q8vx$d~eI0eDPV zXcJCA>FfTCD>OJa<$|tbU4z9np-pLc8B$cM=&PYQac@1fkM{U$im@v2g2ZD zx|2g1n2ZzB)#@X`95(eB4o}6bP^D63{Q4$kMIT z2?MZ~klPMh*x=98xo#sUQ~-a*p$p}1LN&3ifuAi6y9oJd7_IV_fDz$@$JyC-io-Hv zUCOE%-*sq!$XaM8_HuJ>2@Mbgyqftut1xRfeO6604D7(iF9-=_B49)~VF9*%&t!-8 zz!!8q=yry!C=s;tQ_t+ctM&pi!K?(lQcY|X)N1O>mlcfJ@L~%+vw7r8oil^-@hf6% zQyWWT2R@vaa&Yuzhfp!YcEh%12ais3Xalkq&+BiW)yo6O99D!^s)^n83NpHhc|az5 zXfY&-fyqShR<2wrnijI#yLT_Hae=|q(!JfQwLSFHBB^ShMt^^BVj%i58Y`eF^@PFm zXMg#Xtydbeyne!%W_xw-EqCm2b7W_!eRy=k`5au@v#P(d^ggfc5b#PmQRErTg*deu z`i0$RbxjF^VnQy_x0;A%dE+Vn2r`O4oZ1QRx1uG9@|LH@J0wtzK;y*3M77WI--mFz zI&ds8{%?0*y>57Q^49Ie<@2ZAbW(AD<2r$FNHuTXa@W$44GJZ~;hXb2GJols`c7-P z0>o30Le8e-UCyaOwp4!84 z&jg5}3!}?|)I844DGhyXD#96-$0_y*s@OFuS=U13%nd=pw!81Xd&Z0z*I$3Vq)7}$ z0to@*!Z~($>x-K%N%C`bv0-$X4}+aa@w=Lv&}Yjb&jg6}3v8>04T8)$_X@&^i9vJQ zC(u=^@E4YFVl!Hq**O(CKrG=Ga)D(U0@rm||H@YCI+1E0Ah)sBOabC zj?IFm+xWI+1*Si|@*)(>n}uqc>z>tLRJjzBpAje;$|x7fuTbdO#^hw!G&v@`V1Zx8 zxv<8@R~d(O_H}<|+pcfADByeY#PZIexBE7aEq>2kr*Ci~BxbMt;lkHglk3;72LxPK8|6KwJ`1pT=JeA~$E%;e z6)?98%Xm;6A(^FhD(uIPfIPFE0Y|XHYa-00x!_v-)Idf8ls}qne2g;z@v}EC86Dr6 zymfWq+eD#+1Lg|dw4|G^X96Va1!gSUJoO~g7$;`Z=5#kh8RJwt0R_)TsAizv7#gZr z=z`a3N?E%xqu@BB1s&A7gdx)yGn5_1Mf9bRBB+Z&JjkIwgjFDD?TfEu08Ql4oXchEiiN6+uH zshXL%6{nN7qIV+03bTPiR843Aj^$psa3NN#LJVY-rXmfvbT`~^gGfg~PRi3%7GUTl z%)oLT<+79;liYz9j&A&!GiRc(YQ;=IAsw}jjl`Io7xsj+d+LR5&m2og&1-$#`N5ru zF{ulVBx{*W;X+}1TTBq;(PR~IVZjObcsP5u)LM*dV#kAm z{HZ+|vHt+HGZaL^b3(_gMlZ8k*})U?Y(W)J7U;AIkw%(eWz;W37BC*#Tn(e4`&NY$ zWth`*job4&OF6>n^jSM$D%PufZ+Q|D+6)Ih zd28Xqws4|ISk`HwV3Jw*ubn7}82I#GDkPaJ2b-Kf_54n(-oY9ku+V^${R;UNiGRlzw``?&D)A}ghG)be*62q&fpIX~;V z6i^V_3GWKRdH%m_D`w{{`m0fJ^v!xZ38=J1;Tjv#7G!8s6kN@*2*uqLGGM|jOEsHn zZ!N1rH8UjdyTS!Z_>(j;XD|oC35#)RN?Xg^oTh7x4((jh{-VYN%_Su|Z=oXSu{4*9 zKPkh@L}Afije`4QRc%W!A*kKW7pt~i20z70`K+m9{fWZl&#bxR5AR9_QkFv!u=FK$ z)5Eu7(9zY83^<*rFAG@Qe7`ZbcENGCS}2};7N47ai$AB2)ioO`f*wnFU|6`~`A}co zvQ!IzUk2!yMHvH^W?uk{st!oG+L_g$iljRjRqJ>%B>cp3WdqMYVLh19iwjz zO*WFZ8FP{>Xtx{n1O?xEZ9AIw-+cTvo=+G`wH~-+SiOSjGBJ6f_%R>`cJcEdl-}8- zUa;XoLH5^6ii`_9rxb)U>$WqcIa<4RE(58O!mY9}t2dRY{9DY_QD_PB>C1!!AGJlr zGKnW)oCzl^@U7PtCIvC*nN*&*fYJyr6bgz-xZy4V#R3!j0>EG{O5y(;pj0j3E#}e| zXTyWi;wwBWF@?qd{esXxs!+62GTN{48%1UwjFXOVET6HzHl!k9y<1aM%)({Q5_)YH zAAir!H-EgMWv*^QJCXHM2QGGHfO5VN8~AGYH>bUMWpDd=y&XYpe+#9D@Q2>B z6Ym^?ml?r8tyqClgabD#Hk-YB_g1`LfWu z*%|?yd<3U+?a&vUWNjkTL`0W#wK(xag;-+V4jH+dhGQTBCnHACZEA zz^sq92OoUU=S^x+9282eLg8QDdFP$glpskHS>ze8gf|RfNKz}mXl!gOzo~pjIL(Nj zs%UXU3Y=+d+lSXo>B9?$Qz+96pn?kSESz#wL?)Bu13UIOg`wEU_TS$6xbx$#Q`E#n zp=h)dx&`+x*{BW<=*eBJ{q0lWj6zdQ_@h{X3Ex-9hc%S-`RAX99;i)}#b_cNHPaa3*Is(*r73NLX9;&1tB?|>84k39 zYRaD`2QyF|S+x^#Yfd=5@Hhcw&w+KXZU2H=V>KFU%h=*O-u}~rN2eLh#fQW(GTPrU zW%3nsLmL_(rClw*bfao`sL7hr?IV*iiHPD(a1&y)R)O$1p_+D9p!n=@;CEYI!o@^!@S4iB;#;_znfhr_ z_42Bjs-ecw9+o5vRWV-hWKP5{FB!6UEO zQB4%qXW>+MMo&ykIIqEK24Eqef)^<7YnJD$b_6&E@HTGTSe}}=qfjYHQqW5Xo8&JW zg??1EUl=eit3u;<4Dw3KrSZb>$V?VXv5~Dmo4}VEE)KvxqvOfao4hVKffG6#t*K_g zQQIs&TxSK@Qvkn&6RLS)|DR=P^F~2n z{rdF^9O_2#VmV66N}N14GWPO2`yNW&_vEb@@_c0Pr`HXy_QWKbVSxZt^V?_jnpn4E z!FXmNLqNehfq$0Mg-Q&AsAQcrin!>C)MY@{j!@*Dl^8l&vqnkys9os&VUgJ2qj17F zIGqPaU&g=0P_Hg9=*VWo;!@SPX~y&OEXhpOoWzO_uV`cE({zYCR= z4*G|F$gD0eJgTg{mr0JP3oO}8-KqWdn7g~Fc9_(s%G*k$(vg2#h%wFt2L?O6ohXzT zDY|V#In6UMn-^ILKriWQW`!1Fc!h_oF3}RWR<2x`m77KfuI_F}DccM=bE#Z%zRkMA zSTZ~B#yAtZgV4@n-L_x)@DcH+ z(SWSdg{*a{7y6ttO;|%?w+rRMs1eL>fQm@!1x)6d$; zW7SP3bq_3^nJ0xJ#8r6Q`-vs1oM)PVDpO>I1a+x`!Y_cVzcXYAmt+7pS-4qHHK*3`%;R)nqt{?*^oNz+U zOtnN;uv|;NMNT9D;Y9UdrtR$MmjzKz!vDl(3AhNtU>cbsfRRni2NK)er#|A_iUbsd zGi%Ri#eDVNROo$FD0p3)6+BJ_%j=pU6QIa~?dh77@@iXf+~kO-w<|nT24mLklC~GI z+7oL&Qy=lIw*=5Vi($?4dON(e&`-m%E=)%$;Bn&5gz)%@C`zkb3APU~!|*r-DQ~xe zuUL^-@QHo;Zk_WPZV_LRk8d||I4QbkshSG)qNaZyI-Lq-XB7VlC=B1$EmTOtDORcY zd5lUm(CDzm7V$UseTfrqt0D5?{vR%!{_Qi_J*!w1crayM&VZa0wuV)-kI{-vK%sHg z1s0NUqA($9C(K5-?TyEMi36W?o1^^zVjd8L(|*=5<*4wZ9{c1Jxv#HU?V|^~+DE*v zK){4*!i>O#6BY-mov<7OpXj+=GwXKF8&dcJn1}+sME5Lr)636RVM+@!#X-pVvv!z* ziP@EBvIBF)uvfM5)5nP8h#@>W?X(GELKH7>sECa78~IO_!l__(l2^`)teSM^op;*TT0?w76;4!n=-P=Q2>IMFy6n+?t6vydk+U5$ zdR+q82?$;$cb}~bN+AIDJXh#?z2%l$5Haf`SiwA*C@>?F_G|B|RhVgPNEjq>$plNn zz(J3uS2@V8+N36e?z2rwfvH27d5iwr3%efVj`0Z=y%(Zy7QQA`Q!m&gHMLW=;9x;J zlSJHJWc7!jvU;^sVWx3bn>7;7nzde58VjP3RM^6a0>`19CUR3soy2IFdzif}aEgfpS${oHleT?(F-M$40=vMK>0>a|=UHHF_A{5Fw58o8XWs_+$!VFAt=4$gEkzLC<Pe>`daAlw+VT5G4)A*uV zMzv!pjKD$fbvaTB8jL~Ntm;*pK&DR>vR3Ge&Sa9cz>x^2;D!BcW71AhD6IZO=PeWz zuT;>wUkplST}0*r;5n}Bbi&y>|NQgaYA3py6?&VsjL|}X6{c2UUX{7plu0@3PYsmO zhpq*gqSX!)m2je{(9R$Z@vL1d6+eY)3I!EDD0fjvQNr{eoN7WZGlj+S+dRuNgmx-) zS86Pyf)v5y%-;if)m2w1lw#$|l?t&mMp0P5t5#BtHfu>LHsJ&moZkuUMCYv*KprL{ zd{E57#W?6EXWm!vP2@3476*Z0Hv>%n;R%|~n=3rF!NEagGlX{L*C9y*%Do>2B|u&f zgFL=6{4j<7-aP)jB%iv$)P84RI#}J5{S7cUG4M;{B2-V53<7%c+y^2SP*Hmr48{4` zSkH3b`+gIf$*-F6UF;`coFP$k&0>uS{7f7O#xpeidD>vO6AyM}d&Hop={ZACD|8Pp zT(~f0r`iNz*o@RtNlCuN4~83VxIr>-X)p!lDvhq?8b6j4hC5MSh0bX$2J1>BPT>R{ zoTRdB!H>f_PbE*KVnry9GQ)oI7!rj$ik@A}%#HF~7U5@N!Ym{cyBctDW}aqaA}&<( z{`>FGnKC?2s2be4a?db-$`CpH2~V%w-%}f_h5jk@L<%=s&1KEgHJ+1UX>_$|VPK_K zJHZeMqRrUxX&WadCQ2zpc75dN@aA`J3*`)KL-4nZEgm}3w|Q*wma#<#C#HqF%^yz8 zJa2Y~C$nY8fAp23JAP)U%F2^%>N~CFlCPED_x284MqS;maILV#LQV$@0*QH@Fe#

Vfi8N6K6;0K6Gjib+q_*w)*0x>W)JQG#q(+WT+dRH#@^x%6 z%7TsmM2S4qR5+BF`UAaO+1u`wb4lT>{Vl*#?94OIOsQ1uD0M%@GZD&!X)$=M`?Q5K zhuqh%Uk}N|U$w+5DiCJNDi7DfFevf$*I!>PN8E>D=&sE3!vd;Vds?eC;Y4wA;)070 zw^j1jMnV+~0dVcwwW#RiCN98#_wL=LcT8LtLO20#c;wmFwqFYe)C9!%(iF;Cv0Ouk z`-To559bwfd+_MAied8K#P}tY5;K0_W#&)KL<$tnx>WH*gLc*prW~M}d?H`rd`PBT z-s^_w%1)=q6fyvr$9$twdBd|F5-uEHN!uz0gVNOP47Vwq`Ej9`s7(;VuOcWPu2TgE zI{YCc{1QU_;ty|AVUoAcKmUAr3dyHhE{YucM;5a^LAS}I$6BIpD@cFN1e zS`<_`mqIN4pX@{7lxa@rKvNj`lSx@)81(Gnk)28qLuG1nc2enRe+ymNxN#$1Yo}~e zIAMubxbUk&ZJ@d~($Y_(og7qJNO9fTJonW3-;Zybgz}PFyCe2E!X~?z(5p6^7rV(~ zMTyuQ@39Zgx&5?RnD67iYFZ;4dPaDD;Y2CVOYy`LPn27TzSdG_u{?mQJNX9%O;`?G zrky-_vny7pd~}(R0iP`rApC!Ao1Ri4mJcH3%fMw3*VcVIEhBYUrSWbdc(7V6m5(gcBo`#Aa9F=g^jbAfs| z^x_RVH7F3q{FJBLm@Yd8%%G0tou9B7mScLbX#*tFhf4>0Bm>(I23+V}@S$h^*m&qB z*kcYtK_6tI+jdUR=gpBVX3B6q|L*hnXtnvVka&J(ktnzh%T_|UrG5PS0Ts9J^UqEt}f6!_-!T!RgU+FIu7qRDhX0IOy< zy?rxLkU)w6o~oGcSnk-Rf;o9xPWpAJ)qi~s}xGeSL_bW__wfz#>GkqSsPMRNr%=h|7jLi2yE ziY}ZWh%Id~V+dz@M!&ah{^lZ|0yb>o${%@L#R&iR-!*S{_CI4u$hqtc2^yQO{oy$#(^hb7=UX zd}%Otv;>4O_#mIqLP(U1ilf&Q+F5%$JuMZ6NlC~^#w0D-8A`+&u5$Y^9ANYZ2Zb}u zT#R~h!fl$1(tD0{Dq$;7Bq3UmPpFa6CtXFRc(Ow>Gl@5h#G(tRnxTOJVF@_{X7y2heQ)HDwzUcf%d9Z zYcc3-Xj9oI$Futs`qeuo5o)PRkJzv0cu#_!aSbZ zA!~msDi)i=<3caJ+WF#Bs%Kq94cMU%{f<1HtUjqqA#k8L9QaC~gnf_dX|4VsNO;1& z`R1EKnQ)E2YPmxq#WNHSQ-rB@$|OpydgK#2f_Qwf_yB&_?yS)Vws47<+7DjpIT z8oH-mAQLROFim}@HMCB(X7*%u{38Jp@Phz_(`-G!c{Gl2-Mc9a90qMDm*>6x{R zsy1JKKmsICLV&_q0>k{Ja6mDE2>;-TM&E^tBihVEb(RCVPc8JI+R4QkL}p9^0TSRQ z?~W@C`?%uxvfcaF;S3hXrs&QdKMT-Dtvgk}(mT52XV~rCs7)=Ge9eM98!ZA8CxOTa zP&j?5P@KC05#4j((b4g(zSxw{j)E-9IsC7Gapa{R_8cGLV{Z#nPgsvq+M5+DH= z1Sp&?sMpN%;ONVcPv|HvY6kTB!#O>lkGr4tSM_`NNnAhF4pa-Ea=&$I z>m^@noViFPF5{2@378~6^)yLWU)pd%g^~`BJmdM0XuQ%{t2#AnCsnf+Y<@-pjtNjW z9an%t_CiEQ#wMlIw^7kzs-~z!Xy^D(?#|zFi*8yBZqnCO_-Zf;3D_Y(;k3iqR4(v7 zg^uDCQtD7gvE1f;y^0*}UR}YH z6YN$qg)=G~(|2F@=@L{d*TjCb*vTgGV6k3 z021(o0EN?&ig05F+*6UU5La=106D^9ugn{5+DJu2~aq_ zt{xgk0wh2JB;XqX3a4*1LVHMn1W14cye2^5%uqf5{ntPHREFRTN&+N60wfSC0jg)L zHOq35011!)2^b+j;WUDkiIM;bkN^q9N`S%{Yt6D;BtQZrKmtYxP&kcXWuhcN0wh2J zu@azg##*y17YUF636Ovh0u)XoSeYmZkN^pgK&%8PoUzs{%S8etKmsISgg|3M!^Fhn bJ2w9K_hwH2^T`}Obn!`_, we have deomonstrated the functionality of our -flexible workflow management system using a simple RNA-seq pipeline in `cgat-showcase `_. - -CGAT-core is open-sourced, powerful and user-friendly, and has been continually developed -as a Next Generation Sequencing (NGS) workflow management system over the past 10 years. - -For more advanced examples of cgatcore utilities please refer to our `cgat-flow `_ repository, however -please be aware that this is in constant development and has many software dependancies. - - -.. _manual-quick_example: - --------- -Citation --------- - -Our workflow management system is published in F1000 Research: - -Cribbs AP, Luna-Valero S, George C et al. CGAT-core: a python framework for building scalable, reproducible computational biology workflows [version 1; peer review: 1 approved, 1 approved with reservations]. F1000Research 2019, 8:377 -(`https://doi.org/10.12688/f1000research.18674.1 `_) - -.. _manual-support: - -------- -Support -------- - -- Please refer to our :ref:`FAQ` section -- For bugs and issues, please raise an issue on `github `_ -- For contributions, please refer to our contributor section and `github `_ source code. - --------- -Examples --------- - -**cgat-showcase** - This is a toy example of how to develop a simple workflow. Please refer to the `github page `_ and the `documentation `_. -**cgat-flow** - As an example of the flexibility and functionality of CGAT-core, we have developed a set of fully tested production pipelines for automating the analysis of our NGS data. Please refer to the `github `_ page for information on how to install and use our code. -**Single cell RNA-seq** - The cribbs lab use CGAT-core to develop pseudoalignment pipelines for single cell `dropseq methods `_ - The sansom lab use the CGAT-core workflow engine to develop single cell `sequencing analysis workflows `_. - - -------------------------------------- -Selected publications using CGAT-core -------------------------------------- - -CGAT-core has been developed over the past 10 years and as such has been used in many previously published articles - -For a non-comprehensive list of citations please see our :citing and :ref:`project_info-citations` - - - - -.. toctree:: - :caption: Getting started - :name: getting_started - :maxdepth: 1 - :hidden: - - getting_started/Installation.rst - getting_started/Cluster_config.rst - getting_started/Examples.rst - getting_started/Tutorial.rst - -.. toctree:: - :caption: Build a workflow - :name: build - :maxdepth: 1 - :hidden: - - defining_workflow/Writing_workflow.rst - defining_workflow/run_parameters.rst - defining_workflow/Tutorial.rst - -.. toctree:: - :caption: Working with remote files - :name: build - :maxdepth: 1 - :hidden: - - remote/S3.rst - remote/GC.rst - remote/Azure.rst - -.. toctree:: - :caption: cgatcore functions - :name: function_doc - :maxdepth: 1 - :hidden: - - function_doc/Pipeline.rst - function_doc/Core.rst - -.. toctree:: - :caption: Project Info - :name: project-info - :maxdepth: 1 - :hidden: - - project_info/Contributing.rst - project_info/how_to_contribute.rst - project_info/citations.rst - project_info/FAQ.rst - project_info/Licence.rst diff --git a/docs_readthedocs/project_info/Contributing.rst b/docs_readthedocs/project_info/Contributing.rst deleted file mode 100644 index 607831ef..00000000 --- a/docs_readthedocs/project_info/Contributing.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. project_info-Contributing - -========== -Developers -========== - -The following individuals are the main developers of the cgatcore - -Andreas Heger - -`Adam Cribbs `_ - -Sebastian Luna Valero - -Hania Pavlou - -David Sims - -Charlotte George - -Tom Smith - -Ian Sudbery - -Jakub Scaber - -Mike Morgan - -Katy Brown - -Nick Ilott - -Jethro Johnson - -Katherine Fawcett - -Steven Sansom - -Antonio Berlanga diff --git a/docs_readthedocs/project_info/FAQ.rst b/docs_readthedocs/project_info/FAQ.rst deleted file mode 100644 index fa0af778..00000000 --- a/docs_readthedocs/project_info/FAQ.rst +++ /dev/null @@ -1,9 +0,0 @@ -.. _FAQ: - -==== -FAQs -==== - -As our workflow develops we will add frequently asked questions here. - -In the meantime please add issues to the `github page `_ diff --git a/docs_readthedocs/project_info/Licence.rst b/docs_readthedocs/project_info/Licence.rst deleted file mode 100644 index 423a15ee..00000000 --- a/docs_readthedocs/project_info/Licence.rst +++ /dev/null @@ -1,36 +0,0 @@ -.. project_info-Licence - - -======= -Licence -======= - -CGAT-core is an open-source project and we have made the cgat-developers repositor available under the open source permissive free MIT software licence, allowing free and full use of the code for both commercial and non-commercial purposes. A copy of the licence is shown below: - -MIT License ------------ - -Copyright (c) 2018 cgat-developers - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - -Contributions by @andreashegergenomics are provided under the -MIT licence and are Copyright (c) 2018 GENOMICS plc. diff --git a/docs_readthedocs/project_info/citations.rst b/docs_readthedocs/project_info/citations.rst deleted file mode 100644 index 31111f78..00000000 --- a/docs_readthedocs/project_info/citations.rst +++ /dev/null @@ -1,59 +0,0 @@ -.. _project_info-citations: - -==================== -Citing and Citations -==================== - -cgatcore has been developed over the past 10 years and as such has been used in a number of previously published scientific artciles. - -Citing cgatcore ---------------- - -When using cgatcore for a publication, **please cite the following article** in you paper: - -ADD CITATION HERE - -More references ---------------- - -A number of publications that have used CGAT-developer tools are listed below, however this list is not an exhastive list: - -**A ChIP-seq defined genome-wide map of vitamin D receptor binding: associations with disease and evolution** SV Ramagopalan, A Heger, AJ Berlanga, NJ Maugeri, MR Lincoln, ... Genome research 20 (10), 1352-1360 2010 - -**Sequencing depth and coverage: key considerations in genomic analyses** D Sims, I Sudbery, NE Ilott, A Heger, CP Ponting Nature Reviews Genetics 15 (2), 121 2014 - -**KDM2B links the Polycomb Repressive Complex 1 (PRC1) to recognition of CpG islands** AM Farcas, NP Blackledge, I Sudbery, HK Long, JF McGouran, NR Rose, ... elife 2012 - -**Targeting polycomb to pericentric heterochromatin in embryonic stem cells reveals a role for H2AK119u1 in PRC2 recruitment** S Cooper, M Dienstbier, R Hassan, L Schermelleh, J Sharif, ... Cell reports 7 (5), 1456-1470 2014 - -**Long non-coding RNAs and enhancer RNAs regulate the lipopolysaccharide-induced inflammatory response in human monocytes** NE Ilott, JA Heward, B Roux, E Tsitsiou, PS Fenwick, L Lenzi, I Goodhead, ... Nature communications 5, 3979 2014 - -**Population and single-cell genomics reveal the Aire dependency, relief from Polycomb silencing, and distribution of self-antigen expression in thymic epithelia** SN Sansom, N Shikama-Dorn, S Zhanybekova, G Nusspaumer, ... Genome research 24 (12), 1918-1931 2014 - -**Epigenetic conservation at gene regulatory elements revealed by non-methylated DNA profiling in seven vertebrates** HK Long, D Sims, A Heger, NP Blackledge, C Kutter, ML Wright, ... Elife 2 2013 - -**The long non‐coding RNA Paupar regulates the expression of both local and distal genes** KW Vance, SN Sansom, S Lee, V Chalei, L Kong, SE Cooper, PL Oliver, ... The EMBO journal 33 (4), 296-311 2014 - -**A genome-wide association study implicates the APOE locus in nonpathological cognitive ageing** G Davies, SE Harris, CA Reynolds, A Payton, HM Knight, DC Liewald, ... Molecular Psychiatry 19 (1), 76 2014 - -**Predicting long non-coding RNAs using RNA sequencing** NE Ilott, CP Ponting Methods 63 (1), 50-59 2013 - -**Next-generation sequencing of advanced prostate cancer treated with androgen-deprivation therapy** P Rajan, IM Sudbery, MEM Villasevil, E Mui, J Fleming, M Davis, I Ahmad, ... European urology 66 (1), 32-39 2014 - -**The long non-coding RNA Dali is an epigenetic regulator of neural differentiation** V Chalei, SN Sansom, L Kong, S Lee, JF Montiel, KW Vance, CP Ponting Elife 3 2014 - -**GAT: a simulation framework for testing the association of genomic intervals** A Heger, C Webber, M Goodson, CP Ponting, G Lunter Bioinformatics 29 (16), 2046-2048 2013 - -**De novo point mutations in patients diagnosed with ataxic cerebral palsy** R Parolin Schnekenberg, EM Perkins, JW Miller, WIL Davies, ... Brain 138 (7), 1817-1832 2015 - -**SPG7 mutations are a common cause of undiagnosed ataxia** G Pfeffer, A Pyle, H Griffin, J Miller, V Wilson, L Turnbull, K Fawcett, ... Neurology 84 (11), 1174-1176 2015 - -**CDK9 inhibitors define elongation checkpoints at both ends of RNA polymerase II–transcribed genes** C Laitem, J Zaborowska, NF Isa, J Kufs, M Dienstbier, S Murphy Nature Structural and Molecular Biology 22 (5), 396 2015 - -**IRF5: RelA interaction targets inflammatory genes in macrophages** DG Saliba, A Heger, HL Eames, S Oikonomopoulos, A Teixeira, K Blazek, ... Cell reports 8 (5), 1308-1317 2014 - -**UMI-tools: modeling sequencing errors in Unique Molecular Identifiers to improve quantification accuracy** T Smith, A Heger, I Sudbery Genome research 27 (3), 491-499 2017 - -**Long noncoding RNAs in B-cell development and activation** TF Brazão, JS Johnson, J Müller, A Heger, CP Ponting, VLJ Tybulewicz Blood 128 (7), e10-e19 2016 - -**CGAT: computational genomics analysis toolkit** D Sims, NE Ilott, SN Sansom, IM Sudbery, JS Johnson, KA Fawcett, ... Bioinformatics 30 (9), 1290-1291 2014 diff --git a/docs_readthedocs/project_info/how_to_contribute.rst b/docs_readthedocs/project_info/how_to_contribute.rst deleted file mode 100644 index cc0ff263..00000000 --- a/docs_readthedocs/project_info/how_to_contribute.rst +++ /dev/null @@ -1,93 +0,0 @@ -.. _contribute: - -============ -Contributing -============ - -Contributions are very much encouraged and we greatly appreciate the time and effort people make to help maintain and support out tools. Every contribution helps, please dont be shy, we dont bite. - -You can contribute to the development of our software in a number of different ways: - -------------------- -Reporting bug fixes -------------------- - -Bugs are annoying and reporting them will help us to fix your issue. - -Bugs can be reported using the issue section in `github `_ - -When reporting issues, please include: - -- Steps in your code/command that led to the bug so it can be reproduced. -- The error message from the log message. -- Any other helpful info, such as the system/cluster engine or version information. - ------------------------------------ -Proposing a new feature/enhancement ------------------------------------ - -If you wish to contribute a new feature to the CGAT-core repository then the best way is to raise this as an issue and label it as an enhancement in `github `_ - -If you propose a new feature then please: - -- Explain how your enhancement will work -- Describe as best as you can how you plan to implement this. -- If you dont think you have the necessary skills to implement this on your own then please say and we will try our best to help (or implement this for you). However, please be aware that this is a community developed software and our volunteers have other jobs. Therefore, we may not be able to work as fast as you hoped. - ------------------------ -Pull Request Guidelines ------------------------ - -Why not contribute to our project, its a great way of making the project better, your help is always welcome. We follow the fork/pull request `model `_. To update our documentation, fix bugs or add extra enhancements you will need to create a pull request through github. - -To create a pull request perform these steps: - -1. Create a github account. - -2. Create a personal fork of the project on github. - -3. Clone the fork onto your local machine. Your remote repo on github - is called ``origin``. - -4. Add the orginal repository as a remote called ``upstream``. - -5. If you made the fork a while ago then please make sure you ``git - pull upstream`` to keep your repository up to date - -6. Create a new branch to work on! We usually name our branches with - capital first and last followed by a dash and something unique. For - example: ``git checkout -b AC-new_doc``. - -7. Impliment your fix/enhancement and make sure your code is - effectively documented. - -8. Our code has tests and these will be ran when a pull request is - submitted, however you can run our tests before you make the pull - request, we have a number written in the ``tests/`` directory. To - run all tests, type ``pytest --pep8 tests`` - -9. Add or change our documentation in the ``docs/`` directory. - -10. Squash all of your commits into a single commit with gits - `interactive rebase - `_. - -11. Push your branch to your fork on github ``git push origin`` - -12. From your fork in github.com, open a pull request in the correct - branch. - -13. ... This is where someone will review your changes and modify them - or approve them ... - -14. Once the pull request is approved and merged you can pull the - changes from the ``upstream`` to your local repo and delete your - branch. - -.. note:: - - Always write your commit messages in the present tense. Your commit - messages should describe what the commit does to the code and not - what you did to the code. - - diff --git a/docs_readthedocs/release.rst b/docs_readthedocs/release.rst deleted file mode 100644 index cdcf6146..00000000 --- a/docs_readthedocs/release.rst +++ /dev/null @@ -1,24 +0,0 @@ -============= -Release Notes -============= - -Notes on each release are below. - -Release 0.4.0 -============= - -* contributions by Genomics PLC ; https://github.com/cgat-developers/cgat-core/pull/1 -* added installation script and conda environments ; https://github.com/cgat-developers/cgat-core/pull/2 -* make pytest pass on both Linux and OSX ; https://github.com/cgat-developers/cgat-core/pull/4 -* snakefood (py2) does not parse Control.py correctly ; https://github.com/cgat-developers/cgat-core/pull/15 -* do not run P.load in the cluster ; https://github.com/cgat-developers/cgat-core/pull/22 -* fix os.path.relpath cache function ; https://github.com/cgat-developers/cgat-core/pull/24 ; https://github.com/cgat-developers/cgat-core/pull/28 -* migrating https://github.com/CGATOxford/CGATPipelines/pull/406 ; https://github.com/cgat-developers/cgat-core/pull/26 ; https://github.com/cgat-developers/cgat-core/pull/31 -* migrating https://github.com/CGATOxford/CGATPipelines/pull/411 ; https://github.com/cgat-developers/cgat-core/pull/16 -* make enforcing memory restrictions via ulimit optional ; https://github.com/cgat-developers/cgat-core/pull/27 -* change printconfig to work with yml files ; https://github.com/cgat-developers/cgat-core/pull/32 -* changes to work only with yml configuration files (ini files are no longer in use) ; https://github.com/cgat-developers/cgat-core/pull/25 -* update conda from 4.3 to 4.5 (solving "CXXABI_1.3.9 not found" error ; https://github.com/ContinuumIO/anaconda-issues/issues/5191) ; https://github.com/cgat-developers/cgat-core/commit/b940e3e1e10e29ad65ce00c346881e05584bfc9b -* migrating https://github.com/CGATOxford/CGATPipelines/pull/399 ; migrating https://github.com/cgat-developers/cgat-core/pull/34 -* new way of activating conda environments ; https://github.com/cgat-developers/cgat-core/pull/35 - diff --git a/docs_readthedocs/remote/Azure.rst b/docs_readthedocs/remote/Azure.rst deleted file mode 100644 index aa45d2c0..00000000 --- a/docs_readthedocs/remote/Azure.rst +++ /dev/null @@ -1,108 +0,0 @@ -.. _Azure: - -======================= -Microsoft Azure Storage -======================= - -This section describes how to interact with Microsoft Azure cloud storage. In order to interact with the -Azure cloud storage resource we use the `azure `_ SDK. - -Like all of our remote connection functionality, this is a work in progress and we are currently in the -process of adding extra features. If you have bug reports or comments then please raise them as an issue -on `github `_ - - -Setting up credentials ----------------------- - -Unlike other remote access providers, the credentials are set up by passing them directly into the initial class -as variables as follows:: - - Azure = AzureRemoteObject(account_name = "firstaccount", account_key = "jbiuebcjubncjklncjkln........") - -These access keys can be found in the Azure portal and locating the storage account. In the settings of the storage account -there is a selection "Access keys". The account name and access keys are listed here. - -Download from Azure -------------------- - -Using remote files with Azure can be achieved easily by using `download`, `upload` and `delete_file` functions that are written into a RemoteClass. - -Firstly you will need to initiate the class as follows:: - - from cgatcore.remote.azure import * - Azure = AzureRemoteObject(account_name = "firstaccount", account_key = "jbiuebcjubncjklncjkln........") - -In order to download a file and use it within the decorator you can follows the example:: - - @transform(Azure.download('test-azure',"pipeline.yml", "./pipeline.yml"), - regex("(.*)\.(.*)"), - r"\1.counts") - -This will download the file `pipeline.yml` in the Azure container `test-azure` locally to `./pipeline.yml` -and it will be picked up by the decoratory function as normal. - -Upload to Azure ---------------- - -In order to upload files to Azure you simply need to run:: - - Azure.upload('test-azure',"pipeline2.yml", "./pipeline.yml") - -This will upload to the `test-azure` Azure container the `./pipeline.yml` file and it will be saved as -`pipeline2.yml` in that bucket. - -Delete file from Azure ----------------------- - -In order to delete a file from the Azure container then you simply run:: - - Azure.delete_file('test-azure',"pipeline2.yml") - -This will delete the `pipeline2.yml` file from the `test-azure` container. - - -Functional example ------------------- - -As a simple example, the following one function pipeline demonstrates the way you can interact with AWS S3:: - - from ruffus import * - import sys - import os - import cgatcore.experiment as E - from cgatcore import pipeline as P - from cgatcore.remote.azure import * - - # load options from the config file - PARAMS = P.get_parameters( - ["%s/pipeline.yml" % os.path.splitext(__file__)[0], - "../pipeline.yml", - "pipeline.yml"]) - - Azure = AzureRemoteObject(account_name = "firstaccount", account_key = "jbiuebcjubncjklncjkln........") - - - @transform(Azure.download('test-azure',"pipeline.yml", "./pipeline.yml"), - regex("(.*)\.(.*)"), - r"\1.counts") - def countWords(infile, outfile): - '''count the number of words in the pipeline configuration files.''' - - # Upload file to Azure - Azure.upload('test-azure',"pipeline2.yml", "/ifs/projects/adam/test_remote/data/pipeline.yml") - - # the command line statement we want to execute - statement = '''awk 'BEGIN { printf("word\\tfreq\\n"); } - {for (i = 1; i <= NF; i++) freq[$i]++} - END { for (word in freq) printf "%%s\\t%%d\\n", word, freq[word] }' - < %(infile)s > %(outfile)s''' - - P.run(statement) - - # Delete file from Azure - Azure.delete_file('test-azure',"pipeline2.yml") - - @follows(countWords) - def full(): - pass diff --git a/docs_readthedocs/remote/GC.rst b/docs_readthedocs/remote/GC.rst deleted file mode 100644 index fd1377ac..00000000 --- a/docs_readthedocs/remote/GC.rst +++ /dev/null @@ -1,127 +0,0 @@ -.. _GC: - -============== -Google storage -============== - -This section describes how to interact with the google cloud storage -bucket and blob (files). In order to interact with the cloud -resource we use the `google.cloud` API for python. - -This is a work in progress and we would really like feedback for extra features or if there -are any bugs then please report them as `issues on github `_. - -Setting up credentials ----------------------- - -In order to use google cloud storage feature you will need to conigure -your credentials. This is quite easy with the `gcloud` tool. This tool -is ran before exectuing a workflow in the following way:: - - gcloud auth application-default login - -This sets up a JSON file with all of the credentiaals on your home -folder, usually in the file `.config/gcloud/application_default_credentials.json` - -Next you will also need to tell the API which project you are using. -Projects are usually set in the google console and all have a unique -ID. This ID needs to be passed into cgat-core. - -This can be achieved in the following ways: - -* passing project_id into the JASON file:: - - { - "client_id": "764086051850-6qr4p6gpi6hn506pt8ejuq83di341hur.apps.googleusercontent.com", - "client_secret": "d-FL95Q19q7MQmFpd7hHD0Ty", - "refresh_token": "1/d8JxxulX84r3jiJVlt-xMrpDLcIp3RHuxLHtieDu8uA", - "type": "authorized_user", - "project_id": "extended-cache-163811" - } - -* project_id can be set in the `bashrc`:: - - export GCLOUD_PROJECT=extended-cache-163811 - -Download from google storage ----------------------------- - -Using remote files with google cloud can be acieved easily by using `download`, `upload` and `delete_file` functions that are written into a RemoteClass. - -Firstly you will need to initiate the class as follows:: - - from cgatcore.remote.google_cloud import * - GC = GCRemoteObject() - -In order to download a file and use it within the decorator you can follows the example:: - - @transform(GC.download('gc-test',"pipeline.yml", "./pipeline.yml"), - regex("(.*)\.(.*)"), - r"\1.counts") - -This will download the file `pipeline.yml` from the google cloud bucket `gc-test` locally to `./pipeline.yml` -and it will be picked up by the decoratory function as normal. - -Upload to google cloud ----------------------- - -In order to upload files to google cloud you simply need to run:: - - GC.upload('gc-test',"pipeline2.yml", "./pipeline.yml") - -This will upload to the `gc-test` google cloud bucket the `./pipeline.yml` file and it will be saved as -`pipeline2.yml` in that bucket. - -Delete file from AWS S3 ------------------------ - -In order to delete a file from the AWS S3 bucket then you simply run:: - - S3.delete_file('aws-test-boto',"pipeline2.yml") - -This will delete the `pipeline2.yml` file from the `aws-test-boto` bucket. - -Functional example ------------------- - -As a simple example, the following one function pipeline demonstrates the way you can interact with the google cloud:: - - from ruffus import * - import sys - import os - import cgatcore.experiment as E - from cgatcore import pipeline as P - from cgatcore.remote.google_cloud import * - - # load options from the config file - PARAMS = P.get_parameters( - ["%s/pipeline.yml" % os.path.splitext(__file__)[0], - "../pipeline.yml", - "pipeline.yml"]) - - GC = GCRemoteObject() - - - @transform(GC.download('gc-test',"pipeline.yml", "./pipeline.yml"), - regex("(.*)\.(.*)"), - r"\1.counts") - def countWords(infile, outfile): - '''count the number of words in the pipeline configuration files.''' - - # Upload file to google cloud - GC.upload('gc-test',"pipeline2.yml", "/ifs/projects/adam/test_remote/data/pipeline.yml") - - # the command line statement we want to execute - statement = '''awk 'BEGIN { printf("word\\tfreq\\n"); } - {for (i = 1; i <= NF; i++) freq[$i]++} - END { for (word in freq) printf "%%s\\t%%d\\n", word, freq[word] }' - < %(infile)s > %(outfile)s''' - - P.run(statement) - - # Delete file from google cloud - GC.delete_file('gc-test',"pipeline2.yml") - - @follows(countWords) - def full(): - pass diff --git a/docs_readthedocs/remote/S3.rst b/docs_readthedocs/remote/S3.rst deleted file mode 100644 index 91a8704f..00000000 --- a/docs_readthedocs/remote/S3.rst +++ /dev/null @@ -1,126 +0,0 @@ -.. _S3: - -============== -AWS S3 Storage -============== - -This section described how to interact with amazon cloud simple -remote storage (S3). In order to interact with the S3 resource we -use the `boto3 `_ SDK. - -This is a work in progress and we would really like feedback for extra features or if there -are any bugs then please report them as `issues on github `_. - - -Setting up credentials ----------------------- - -In order to use the AWS remote feature you will need to configure -your credentials (The access key and secret key). You can set up -these credentials by adding the keys as environment variables in -a file `~/.aws/credentials` as detailed in the `boto3 configuration page `_. In brief you will need to add the keys as follows:: - - [default] - aws_access_key_id = YOUR_ACCESS_KEY - aws_secret_access_key = YOUR_SECRET_KEY - -These access keys can be found within your S3 AWS console and you can access them by following these steps: -* `Log in to your AWS Management Console. `_ -* Click on your user name at the top right of the page. -* Click My Security Credentials. -* Click Users in left hand menu and select a user. -* Click the Security credentials tab. -* YOUR_ACCESS_KEY is located in the Access key section - -If you have lost YOUR_SECRET_KEY then you will need to Create a new access key, please see `AWS documentation `_ on how to do this. Please not that every 90 days AWS will rotate your access keys. - -In additon, you may also want to configure the default region:: - - [default] - region=us-east-1 - -Once configuration variables have been created then you are ready to interact -with the S3 storage. - -Download from AWS S3 --------------------- - -Using remote files with AWS can be acieved easily by using `download`, `upload` and `delete_file` functions that are written into a RemoteClass. - -Firstly you will need to initiate the class as follows:: - - from cgatcore.remote.aws import * - S3 = S3RemoteObject() - -In order to download a file and use it within the decorator you can follows the example:: - - @transform(S3.download('aws-test-boto',"pipeline.yml", "./pipeline.yml"), - regex("(.*)\.(.*)"), - r"\1.counts") - -This will download the file `pipeline.yml` in the AWS bucket `aws-test-boto` locally to `./pipeline.yml` -and it will be picked up by the decoratory function as normal. - -Upload to AWS S3 ----------------- - -In order to upload files to aws S3 you simply need to run:: - - S3.upload('aws-test-boto',"pipeline2.yml", "./pipeline.yml") - -This will upload to the `aws-test-boto` S3 bucket the `./pipeline.yml` file and it will be saved as -`pipeline2.yml` in that bucket. - -Delete file from AWS S3 ------------------------ - -In order to delete a file from the AWS S3 bucket then you simply run:: - - S3.delete_file('aws-test-boto',"pipeline2.yml") - -This will delete the `pipeline2.yml` file from the `aws-test-boto` bucket. - -Functional example ------------------- - -As a simple example, the following one function pipeline demonstrates the way you can interact with AWS S3:: - - from ruffus import * - import sys - import os - import cgatcore.experiment as E - from cgatcore import pipeline as P - from cgatcore.remote.aws import * - - # load options from the config file - PARAMS = P.get_parameters( - ["%s/pipeline.yml" % os.path.splitext(__file__)[0], - "../pipeline.yml", - "pipeline.yml"]) - - S3 = S3RemoteObject() - - - @transform(S3.download('aws-test-boto',"pipeline.yml", "./pipeline.yml"), - regex("(.*)\.(.*)"), - r"\1.counts") - def countWords(infile, outfile): - '''count the number of words in the pipeline configuration files.''' - - # Upload file to S3 - S3.upload('aws-test-boto',"pipeline2.yml", "/ifs/projects/adam/test_remote/data/pipeline.yml") - - # the command line statement we want to execute - statement = '''awk 'BEGIN { printf("word\\tfreq\\n"); } - {for (i = 1; i <= NF; i++) freq[$i]++} - END { for (word in freq) printf "%%s\\t%%d\\n", word, freq[word] }' - < %(infile)s > %(outfile)s''' - - P.run(statement) - - # Delete file from S3 - S3.delete_file('aws-test-boto',"pipeline2.yml") - - @follows(countWords) - def full(): - pass diff --git a/docs_readthedocs/remote/SFTP.rst b/docs_readthedocs/remote/SFTP.rst deleted file mode 100644 index da9c8d98..00000000 --- a/docs_readthedocs/remote/SFTP.rst +++ /dev/null @@ -1,53 +0,0 @@ -.. _SFTP: - - -======================== -File transfer using SFTP -======================== - -Cgat-core can access files on a remote server vis SFTP. This functionality is provided -by the `pysftp `_ python library. - -Given that you have already set up your SSH key pairs correctly for your server then -accessing the server is easy::: - - from cgatcore.remote.sftp import * - sftp = SFTPRemoteObject() - - -Download from SFTP ------------------- - -Using remote files with SFTP can be achieved easily by using `download` function that -is written into a RemoteClass. - -In order to download a file and use it within the decorator you can follows the example:: - - from cgatcore.remote.SFTP import * - sftp = SFTPRemoteObject() - - @transform(sftp.download('example.com/path/to/file.txt'), - regex("(.*)\.txt"), - r"\1.counts") - - -The remote address must be specified with the host (domain or IP address) and the absolute -path to the file on the remote server. A port may be specified if the SSH daemon on the server -is listening on a port other than 22.:: - - from cgatcore.remote.SFTP import * - sftp = SFTPRemoteObject(port=4040) - - @transform(sftp.download('example.com/path/to/file.txt'), - regex("(.*)\.txt"), - r"\1.counts") - -You can specify standard arguments used by `pysftp `_. For -example:: - - from cgatcore.remote.SFTP import * - sftp = SFTPRemoteObject(username= "cgatpassword", password="cgatpassword") - - @transform(sftp.download('example.com/path/to/file.txt'), - regex("(.*)\.txt"), - r"\1.counts") diff --git a/readthedocs.yml b/readthedocs.yml deleted file mode 100644 index 8dd73203..00000000 --- a/readthedocs.yml +++ /dev/null @@ -1,2 +0,0 @@ -conda: - file: conda/environments/readthedocs.yml