autopep8 scripts (#75)

mccalluc · pkerpedjiev · commit 5d0341beee18 · 2019-05-17T12:00:13.000-07:00
diff --git a/.flake8 b/.flake8
@@ -20,11 +20,6 @@ exclude =
   notebooks/ENSEMBL annotations and RNAseq.ipynb
   notebooks/h37rv gene annotations.ipynb
   pyprof.sh
-  scripts/exonU.py
-  scripts/gff_to_chromsizes.py
-  scripts/gff_to_genepred.py
-  scripts/replace_importances.py
-  scripts/tsv_to_mrmatrix.py
   setup.py
 
 ignore =
diff --git a/scripts/exonU.py b/scripts/exonU.py
@@ -8,10 +8,12 @@
 import sys
 import argparse
 
+
 class GeneInfo:
     def __init__(self):
         pass
 
+
 def merge_gene_info(gene_infos, gene_info):
     '''
     Add a new gene_info. If it's txStart and txEnd overlap with a previous entry for this
@@ -20,15 +22,15 @@ def merge_gene_info(gene_infos, gene_info):
     merged = False
 
     for existing_gene_info in gene_infos[gene_info.geneId]:
-        if (existing_gene_info.chrName == gene_info.chrName and 
+        if (existing_gene_info.chrName == gene_info.chrName and
                 existing_gene_info.txEnd > gene_info.txStart and
                 gene_info.txEnd > existing_gene_info.txStart):
 
             # overlapping genes, merge the exons of the second into the first
             existing_gene_info.txStart = min(existing_gene_info.txStart,
                                              gene_info.txStart)
             existing_gene_info.txEnd = max(existing_gene_info.txEnd,
-                                             gene_info.txEnd)
+                                           gene_info.txEnd)
 
             for (exon_start, exon_end) in gene_info.exonUnions:
                 existing_gene_info.exonUnions.add((exon_start, exon_end))
@@ -51,9 +53,9 @@ def main():
 """)
 
     parser.add_argument('transcript_bed')
-    #parser.add_argument('-o', '--options', default='yo',
+    # parser.add_argument('-o', '--options', default='yo',
     #					 help="Some option", type='str')
-    #parser.add_argument('-u', '--useless', action='store_true', 
+    # parser.add_argument('-u', '--useless', action='store_true',
     #					 help='Another useless option')
     args = parser.parse_args()
 
@@ -85,29 +87,26 @@ def main():
             print("ERROR: line:", line, file=sys.stderr)
             continue
 
-
         # for some reason, exon starts and ends have trailing commas
         gene_info.exonStartParts = gene_info.exonStarts.strip(",").split(',')
         gene_info.exonEndParts = gene_info.exonEnds.strip(",").split(',')
-        gene_info.exonUnions = set([(int(s), int(e)) for (s,e) in zip(gene_info.exonStartParts, gene_info.exonEndParts)])
+        gene_info.exonUnions = set([(int(s), int(e)) for (s, e) in zip(
+            gene_info.exonStartParts, gene_info.exonEndParts)])
 
         # add this gene info by checking whether it overlaps with any existing ones
         gene_infos = merge_gene_info(gene_infos, gene_info)
 
     for gene_id in gene_infos:
         for contig in gene_infos[gene_id]:
             output = "\t".join(map(str, [contig.chrName, contig.txStart, contig.txEnd,
-                                contig.geneName, contig.score, contig.strand,
-                                'union_' + gene_id, gene_id, contig.geneType, contig.geneDesc,
-                                contig.cdsStart, contig.cdsEnd, 
-                                ",".join([str(e[0]) for e in sorted(contig.exonUnions)]),
-                                ",".join([str(e[1]) for e in sorted(contig.exonUnions)])]))
+                                         contig.geneName, contig.score, contig.strand,
+                                         'union_' + gene_id, gene_id, contig.geneType, contig.geneDesc,
+                                         contig.cdsStart, contig.cdsEnd,
+                                         ",".join(
+                                             [str(e[0]) for e in sorted(contig.exonUnions)]),
+                                         ",".join([str(e[1]) for e in sorted(contig.exonUnions)])]))
             print(output)
 
 
-
 if __name__ == '__main__':
     main()
-
-
-
diff --git a/scripts/gff_to_chromsizes.py b/scripts/gff_to_chromsizes.py
@@ -3,22 +3,21 @@
 import sys
 import argparse
 
+
 def main():
     parser = argparse.ArgumentParser(description="""
     
     python gff_to_chromsizes.py
 """)
 
     #parser.add_argument('argument', nargs=1)
-    #parser.add_argument('-o', '--options', default='yo',
+    # parser.add_argument('-o', '--options', default='yo',
     #                     help="Some option", type='str')
-    #parser.add_argument('-u', '--useless', action='store_true', 
+    # parser.add_argument('-u', '--useless', action='store_true',
     #                     help='Another useless option')
 
     args = parser.parse_args()
-    
+
 
 if __name__ == '__main__':
     main()
-
-
diff --git a/scripts/gff_to_genepred.py b/scripts/gff_to_genepred.py
@@ -5,36 +5,39 @@
 import sys
 import argparse
 
-def dump_transcript(gene_name, 
-        gene_id,
-        gene_type,
-        gene_description,
-        gene_importance,
-        gene_start,
-        gene_end,
-        transcript_id,
-        chrom, start, end, strand,cdss, exons):
+
+def dump_transcript(gene_name,
+                    gene_id,
+                    gene_type,
+                    gene_description,
+                    gene_importance,
+                    gene_start,
+                    gene_end,
+                    transcript_id,
+                    chrom, start, end, strand, cdss, exons):
     '''
     Print out a set of transcripts for this gene
     '''
     if int(end) < int(start):
-        print("WARNING: end < start:", transcript_id, start, end, file=sys.stderr)
+        print("WARNING: end < start:", transcript_id,
+              start, end, file=sys.stderr)
 
     print('{chrom}\t{start}\t{end}\t{gene_name}\t{importance}\t{strand}\t{transcript_id}\t{gene_id}\t{gene_type}\t{gene_description}\t{cds_start}\t{cds_end}\t{exon_starts}\t{exon_ends}'.format(
-            chrom=chrom,
-            start=gene_start,
-            end=gene_end,
-            gene_name=gene_name,
-            importance=gene_importance,
-            strand=strand,
-            transcript_id=transcript_id,
-            gene_id=gene_id,
-            gene_type=gene_type,
-            gene_description=gene_description,
-            cds_start=start,
-            cds_end=end,
-            exon_starts=','.join([str(e[1]) for e in exons]),
-            exon_ends=','.join([str(e[2]) for e in exons])))
+        chrom=chrom,
+        start=gene_start,
+        end=gene_end,
+        gene_name=gene_name,
+        importance=gene_importance,
+        strand=strand,
+        transcript_id=transcript_id,
+        gene_id=gene_id,
+        gene_type=gene_type,
+        gene_description=gene_description,
+        cds_start=start,
+        cds_end=end,
+        exon_starts=','.join([str(e[1]) for e in exons]),
+        exon_ends=','.join([str(e[2]) for e in exons])))
+
 
 def main():
     parser = argparse.ArgumentParser(description="""
@@ -44,11 +47,11 @@ def main():
 
     parser.add_argument('gff_file')
     parser.add_argument('--save-chromsizes', default=None,
-            help='Store the chromsizes in a separate file',
-            type=str)
-    #parser.add_argument('-o', '--options', default='yo',
+                        help='Store the chromsizes in a separate file',
+                        type=str)
+    # parser.add_argument('-o', '--options', default='yo',
     #					 help="Some option", type='str')
-    #parser.add_argument('-u', '--useless', action='store_true', 
+    # parser.add_argument('-u', '--useless', action='store_true',
     #					 help='Another useless option')
 
     args = parser.parse_args()
@@ -57,7 +60,7 @@ def main():
     with open(args.gff_file, 'r') as f:
         transcript_id = None
         chromsizes = []
-        
+
         for line in f:
             counter += 1
             if line.strip()[0] == '#':
@@ -114,7 +117,8 @@ def main():
                     x_split = x.split('=')
                     attrs[x_split[0]] = x_split[1]
                 except IndexError as ve:
-                    print("WARNING: Strange Parts:", to_split, ve, file=sys.stderr)
+                    print("WARNING: Strange Parts:",
+                          to_split, ve, file=sys.stderr)
 
             if annotation_type == 'chromosome':
                 id_parts = attrs['ID'].split(':')
@@ -123,23 +127,22 @@ def main():
 
                 chromsizes += [(chromname, chromsize)]
 
-
             if annotation_type == 'gene' or annotation_type == 'tRNA_gene':
                 if transcript_id is not None:
                     dump_transcript(gene_name,
-                            gene_id,
-                            gene_type,
-                            gene_description,
-                            gene_importance,
-                            gene_start,
-                            gene_end,
-                            transcript_id,
-                            transcript_chrom,
-                            transcript_start,
-                            transcript_end,
-                            transcript_strand,
-                            transcript_cdss,
-                            transcript_exons)
+                                    gene_id,
+                                    gene_type,
+                                    gene_description,
+                                    gene_importance,
+                                    gene_start,
+                                    gene_end,
+                                    transcript_id,
+                                    transcript_chrom,
+                                    transcript_start,
+                                    transcript_end,
+                                    transcript_strand,
+                                    transcript_cdss,
+                                    transcript_exons)
 
                 split_id = attrs['ID'].split(':')
                 gene_id = attrs['ID']
@@ -149,20 +152,23 @@ def main():
                 elif 'Name' in attrs:
                     split_name = attrs['Name'].split(':')
                     print("split_name", split_name, file=sys.stderr)
-                    gene_name = split_name[0] if len(split_name) == 1 else split_name[1]
+                    gene_name = split_name[0] if len(
+                        split_name) == 1 else split_name[1]
                 else:
-                    gene_name = split_id[0] if len(split_id) == 1 else split_id[1]
+                    gene_name = split_id[0] if len(
+                        split_id) == 1 else split_id[1]
                     print("WARNING: no gene name:", to_split, file=sys.stderr)
 
                 if 'GENE_TYPE' in attrs:
                     gene_type = attrs['GENE_TYPE']
                 elif 'biotype' in attrs:
                     gene_type = attrs['biotype']
                 else:
-                    print("WARNING: no gene type (GENE_TYPE or biotype attribute)", to_split, file=sys.stderr)
+                    print("WARNING: no gene type (GENE_TYPE or biotype attribute)",
+                          to_split, file=sys.stderr)
 
                 gene_description = attrs['description'] if 'description' in attrs else '-'
-                gene_importance = random.randint(0,10000)
+                gene_importance = random.randint(0, 10000)
                 gene_start = start_pos
                 gene_end = end_pos
 
@@ -181,19 +187,19 @@ def main():
             if annotation_type == 'transcript' or annotation_type == 'mRNA':
                 if transcript_id is not None:
                     dump_transcript(gene_name,
-                            gene_id,
-                            gene_type,
-                            gene_description,
-                            gene_importance,
-                            gene_start,
-                            gene_end,
-                            transcript_id,
-                            transcript_chrom,
-                            transcript_start,
-                            transcript_end,
-                            transcript_strand,
-                            transcript_cdss,
-                            transcript_exons)
+                                    gene_id,
+                                    gene_type,
+                                    gene_description,
+                                    gene_importance,
+                                    gene_start,
+                                    gene_end,
+                                    transcript_id,
+                                    transcript_chrom,
+                                    transcript_start,
+                                    transcript_end,
+                                    transcript_strand,
+                                    transcript_cdss,
+                                    transcript_exons)
 
                 transcript_exons = []
                 transcript_id = attrs['ID']
@@ -208,33 +214,29 @@ def main():
                 parent_id = attrs['Parent']
                 if parent_id != transcript_id:
                     print("Exon parent doesn't match transcript_id",
-                            parent_id, transcript_id, file=sys.stderr)
+                          parent_id, transcript_id, file=sys.stderr)
                 transcript_exons += [(chrom, start_pos, end_pos)]
 
     dump_transcript(gene_name,
-            gene_id,
-            gene_type,
-            gene_description,
-            gene_importance,
-            gene_start,
-            gene_end,
-            transcript_id,
-            transcript_chrom,
-            transcript_start,
-            transcript_end,
-            transcript_strand,
-            transcript_cdss,
-            transcript_exons)
+                    gene_id,
+                    gene_type,
+                    gene_description,
+                    gene_importance,
+                    gene_start,
+                    gene_end,
+                    transcript_id,
+                    transcript_chrom,
+                    transcript_start,
+                    transcript_end,
+                    transcript_strand,
+                    transcript_cdss,
+                    transcript_exons)
 
     if args.save_chromsizes:
         with open(args.save_chromsizes, 'w') as f:
             for (name, size) in chromsizes:
                 f.write("{}\t{}\n".format(name, size))
 
 
-
-
 if __name__ == '__main__':
     main()
-
-
diff --git a/scripts/replace_importances.py b/scripts/replace_importances.py
diff --git a/scripts/tsv_to_mrmatrix.py b/scripts/tsv_to_mrmatrix.py