Skip to content

Commit d14421c

Browse files
author
eric
committed
Initial commit.
0 parents  commit d14421c

29 files changed

+6972
-0
lines changed

LISCENCE

+675
Large diffs are not rendered by default.

Makefile

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# MAKEFILE SOURCE:
2+
3+
# Date : 2011-08-10
4+
5+
# project name (generate executable with this name)
6+
TARGET = pollux
7+
8+
CC = gcc
9+
# compiling flags here
10+
CFLAGS = -std=c99 -I.
11+
12+
LINKER = gcc -o
13+
# linking flags here
14+
LFLAGS = -Wall -I. -lm
15+
16+
# change these to set the proper directories where each files should be
17+
SRCDIR = source
18+
OBJDIR = source
19+
BINDIR = .
20+
21+
SOURCES := $(wildcard $(SRCDIR)/*.c)
22+
INCLUDES := $(wildcard $(SRCDIR)/*.h)
23+
OBJECTS := $(SOURCES:$(SRCDIR)/%.c=$(OBJDIR)/%.o)
24+
rm = rm -f
25+
26+
27+
$(BINDIR)/$(TARGET): $(OBJECTS)
28+
@$(LINKER) $@ $(LFLAGS) $(OBJECTS)
29+
@echo "Linking complete!"
30+
31+
$(OBJECTS): $(OBJDIR)/%.o : $(SRCDIR)/%.c
32+
@$(CC) $(CFLAGS) -c $< -o $@
33+
@echo "Compiled "$<" successfully!"
34+
35+
.PHONEY: clean
36+
clean:
37+
@$(rm) $(OBJECTS)
38+
@echo "Cleanup complete!"
39+
40+
.PHONEY: remove
41+
remove: clean
42+
@$(rm) $(BINDIR)/$(TARGET)
43+
@echo "Executable removed!"
44+
45+

README

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
======================================================================
2+
Pollux
3+
Copyright (C) 2014 Eric Marinier
4+
5+
This program is free software: you can redistribute it and/or modify
6+
it under the terms of the GNU General Public License as published by
7+
the Free Software Foundation, either version 3 of the License, or
8+
(at your option) any later version.
9+
10+
This program is distributed in the hope that it will be useful,
11+
but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
GNU General Public License for more details.
14+
15+
You should have received a copy of the GNU General Public License
16+
along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
18+
======================================================================
19+
20+
-- Description --
21+
22+
Pollux is a platform independent error corrector which targets errors
23+
produced by second-generation sequencing technologies.
24+
25+
-- Release --
26+
27+
Pollux 1.00
28+
4 November 2014
29+
30+
This is the initial release of Pollux.
31+
32+
-- Requirements --
33+
34+
Pollux requires a 64 bit Unix-based operating system.
35+
36+
-- Installation --
37+
38+
make
39+
40+
-- Running Pollux --
41+
42+
Pollux's command line arguments can be found by running:
43+
./pollux
44+
45+
Simple correction:
46+
./pollux -i <fastq_reads>
47+
48+
-- Contact --
49+
50+
Brendan McConkey: [email protected]
51+
Eric Marinier: [email protected]
52+
53+
-- Credits --
54+
55+
The Makefile is derived from [email protected] (2010-11-05).
56+
(http://stackoverflow.com/users/320700/yanick-rochon)
57+
58+
The source makes use of data structures provided by Simon Howard.
59+
(http://c-algorithms.sourceforge.net/)
60+
61+

source/Correction.c

+82
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
3+
Pollux
4+
Copyright (C) 2014 Eric Marinier
5+
6+
This program is free software: you can redistribute it and/or modify
7+
it under the terms of the GNU General Public License as published by
8+
the Free Software Foundation, either version 3 of the License, or
9+
(at your option) any later version.
10+
11+
This program is distributed in the hope that it will be useful,
12+
but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
GNU General Public License for more details.
15+
16+
You should have received a copy of the GNU General Public License
17+
along with this program. If not, see <http://www.gnu.org/licenses/>.
18+
19+
*/
20+
21+
#include <stdlib.h>
22+
#include "Correction.h"
23+
24+
Correction* createCorrection(Reads** reads, unsigned int numReadSets,
25+
KMerHashTable* kmers, unsigned int kmerSize, unsigned int lowKMerThreshold,
26+
char* outputDirectory, CorrectionFunction correctionFunction)
27+
{
28+
Correction* correction = (Correction*)malloc(sizeof(Correction));
29+
30+
correction->reads = reads;
31+
correction->numReadSets = numReadSets;
32+
33+
correction->kmers = kmers;
34+
correction->kmerSize = kmerSize;
35+
correction->lowKMerThreshold = lowKMerThreshold;
36+
37+
correction->substitutions = true;
38+
correction->insertions = true;
39+
correction->deletions = true;
40+
correction->homopolymers = true;
41+
42+
correction->outputDirectory = outputDirectory;
43+
44+
correction->correctionFunction = correctionFunction;
45+
46+
return correction;
47+
}
48+
49+
Reads** correctionGetReads(Correction* correction)
50+
{
51+
return correction->reads;
52+
}
53+
54+
unsigned int correctionGetNumReadSets(Correction* correction)
55+
{
56+
return correction->numReadSets;
57+
}
58+
59+
KMerHashTable* correctionGetKMers(Correction* correction)
60+
{
61+
return correction->kmers;
62+
}
63+
64+
unsigned int correctionGetKMerSize(Correction* correction)
65+
{
66+
return correction->kmerSize;
67+
}
68+
69+
unsigned int correctionGetLowThreshold(Correction* correction)
70+
{
71+
return correction->lowKMerThreshold;
72+
}
73+
74+
CorrectionFunction correctionGetFunction(Correction* correction)
75+
{
76+
return correction->correctionFunction;
77+
}
78+
79+
char* correctionGetOutputDirectory(Correction* correction)
80+
{
81+
return correction->outputDirectory;
82+
}

source/Correction.h

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/*
2+
3+
Pollux
4+
Copyright (C) 2014 Eric Marinier
5+
6+
This program is free software: you can redistribute it and/or modify
7+
it under the terms of the GNU General Public License as published by
8+
the Free Software Foundation, either version 3 of the License, or
9+
(at your option) any later version.
10+
11+
This program is distributed in the hope that it will be useful,
12+
but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
GNU General Public License for more details.
15+
16+
You should have received a copy of the GNU General Public License
17+
along with this program. If not, see <http://www.gnu.org/licenses/>.
18+
19+
*/
20+
21+
#include "Reads.h"
22+
#include "KMerHashTable.h"
23+
#include "Utility.h"
24+
25+
#ifndef CORRECTION_H
26+
#define CORRECTION_H
27+
28+
#ifdef __cplusplus
29+
extern "C" {
30+
#endif
31+
32+
typedef struct Correction Correction;
33+
typedef bool (*CorrectionFunction)(struct read* read, Correction* correction);
34+
35+
struct Correction
36+
{
37+
Reads** reads;
38+
unsigned int numReadSets;
39+
40+
KMerHashTable* kmers;
41+
unsigned int kmerSize;
42+
unsigned int lowKMerThreshold;
43+
44+
// Enabled Corrections:
45+
bool substitutions;
46+
bool insertions;
47+
bool deletions;
48+
bool homopolymers;
49+
50+
bool filtering;
51+
bool qualityUpdating;
52+
53+
char* outputDirectory;
54+
55+
CorrectionFunction correctionFunction; // Correction function pointer.
56+
57+
};
58+
59+
Correction* createCorrection(Reads** reads, unsigned int numReadSets,
60+
KMerHashTable* kmers, unsigned int kmerSize, unsigned int lowKMerThreshold,
61+
char* outputDirectory, CorrectionFunction correctionFunction);
62+
63+
Reads** correctionGetReads(Correction* correction);
64+
unsigned int correctionGetNumReadSets(Correction* correction);
65+
KMerHashTable* correctionGetKMers(Correction* correction);
66+
unsigned int correctionGetKMerSize(Correction* correction);
67+
unsigned int correctionGetLowThreshold(Correction* correction);
68+
CorrectionFunction correctionGetFunction(Correction* correction);
69+
char* correctionGetOutputDirectory(Correction* correction);
70+
71+
#ifdef __cplusplus
72+
}
73+
#endif
74+
75+
#endif /* CORRECTION_H */
76+

source/Counting.c

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*
2+
3+
Pollux
4+
Copyright (C) 2014 Eric Marinier
5+
6+
This program is free software: you can redistribute it and/or modify
7+
it under the terms of the GNU General Public License as published by
8+
the Free Software Foundation, either version 3 of the License, or
9+
(at your option) any later version.
10+
11+
This program is distributed in the hope that it will be useful,
12+
but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
GNU General Public License for more details.
15+
16+
You should have received a copy of the GNU General Public License
17+
along with this program. If not, see <http://www.gnu.org/licenses/>.
18+
19+
*/
20+
21+
#include "Counting.h"
22+
#include "Utility.h"
23+
24+
void getKMerCounts(unsigned long long int* sequence, unsigned int length,
25+
KMerHashTable* kmers, unsigned int kmerSize, unsigned int* counts)
26+
{
27+
// Variables:
28+
unsigned long long int kmer;
29+
int total = length - kmerSize + 1;
30+
31+
// Iterate over all k-mers within the read:
32+
for(int i = 0; i < total; i++)
33+
{
34+
// Get the next k-mer:
35+
kmer = getKMer(sequence, i, i + kmerSize);
36+
37+
// Get the count:
38+
counts[i] = KMerTableLookup(kmers, kmer);
39+
}
40+
}
41+
42+
unsigned int areCountsBelowThreshold(unsigned int* counts, unsigned int start,
43+
unsigned int end, const unsigned int THRESHOLD)
44+
{
45+
for(int i = start; i < end; i++)
46+
{
47+
if(counts[i] > THRESHOLD)
48+
{
49+
return 0;
50+
}
51+
}
52+
53+
return 1;
54+
}

source/Counting.h

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
3+
Pollux
4+
Copyright (C) 2014 Eric Marinier
5+
6+
This program is free software: you can redistribute it and/or modify
7+
it under the terms of the GNU General Public License as published by
8+
the Free Software Foundation, either version 3 of the License, or
9+
(at your option) any later version.
10+
11+
This program is distributed in the hope that it will be useful,
12+
but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
GNU General Public License for more details.
15+
16+
You should have received a copy of the GNU General Public License
17+
along with this program. If not, see <http://www.gnu.org/licenses/>.
18+
19+
*/
20+
21+
#include "KMerHashTable.h"
22+
23+
#ifndef COUNTING_H
24+
#define COUNTING_H
25+
26+
#ifdef __cplusplus
27+
extern "C" {
28+
#endif
29+
30+
/**
31+
* This function fills the passed array with the number of the occurances of a
32+
* given k-mer over the entire length of the sequence.
33+
*
34+
* @param sequence The sequence to get the k-mer counts for.
35+
* @param length The length of the sequence.
36+
* @param kmers The k-mer hash table data structure.
37+
* @param kmerSize The length of the kmers.
38+
* @param counts The counts array to fill. There will be (length - kmerSize + 1)
39+
* entries expected to be filled.
40+
*/
41+
void getKMerCounts(unsigned long long int* sequence, unsigned int length,
42+
KMerHashTable* kmers, unsigned int kmerSize, unsigned int* counts);
43+
44+
/**
45+
* This function determines whether or not all the entries between the specified
46+
* range in the passed array are below the given threshold.
47+
*
48+
* @param counts The array to examine.
49+
* @param start The starting index.
50+
* @param end The ending index.
51+
* @param THRESHOLD The threshold. Less than or equal to the threshold!
52+
*
53+
* @return Whether or not the values are less than or equal to the threshold.
54+
*/
55+
unsigned int areCountsBelowThreshold(unsigned int* counts, unsigned int start,
56+
unsigned int end, const unsigned int THRESHOLD);
57+
58+
59+
#ifdef __cplusplus
60+
}
61+
#endif
62+
63+
#endif /* COUNTING_H */
64+

0 commit comments

Comments
 (0)