@@ -2,15 +2,26 @@ General guide to R scripts for analyzing your transcriptome data
2
2
3
3
## Gene expression analysis
4
4
5
- ### Compare gene expression using DESeq2
5
+ ### Normalize gene expression (DeSeq)
6
6
```
7
7
library(DESeq2)
8
- counts<-read.delim(‘counts.txt,row.names=1)
9
- cds<-DESeqDataSetFromMatrix(counts.txt,meta,~cluster)
10
- cds<-DESeq(cds)
11
- res<-results(cds)
12
- sig<-res[which(res$padj<0.05),]
13
- write.table(sig,file=‘DEcontigs.txt’,quote=F,sep=‘\t')
8
+ #get merged_counts.txt from get-bam-counts.sh script
9
+ counts<-read.delim("merged_counts.txt")
10
+
11
+ #make a meta file with samples in the same order as how they are listed in your directory
12
+ meta<-read.delim("meta.txt")
13
+
14
+ #make contig labels into row names and remove that column
15
+ rownames(counts)<-counts[,1]
16
+ colnames(counts)<-meta$sample
17
+ counts<-as.matrix(counts[,-1])
18
+
19
+ #normalize read counts and filter for high counts (i)e. more than 10 reads/site)
20
+ normCounts<-t(counts)/estimateSizeFactorsForMatrix(counts)
21
+ normCounts_10<-normCounts[,colMeans(normCounts)>10]
22
+
23
+ transposed<-t(normCounts_10)
24
+ write.table(transposed,file="normCounts_10.txt", quote=FALSE,row.names=TRUE,col.names=TRUE,eol="\n")
14
25
```
15
26
16
27
### WGCNA
@@ -26,29 +37,41 @@ write.table(sig,file=‘DEcontigs.txt’,quote=F,sep=‘\t')
26
37
27
38
### A simple way to format your 012 SNP matrix
28
39
```
40
+ #create 012 files from a your vcf with vcftools-012genotype-matrix.sh
29
41
snps<-read.delim('file.012', header=F)
30
42
pos<-read.delim('file.012.pos',header=F)
31
43
indv<-read.delim<-('file.012.indv',header=F)
32
44
45
+ #the order of samples in your meta file should match how they are listed in your computer's directory. this example has a meta file with: sample, pop
46
+ meta<-read.delim('meta.txt', header=TRUE)
47
+ rownames(snps)<-meta$sample
33
48
colnames(snps)<-paste(pos[,1],pos[,2],sep='-')
34
- rownames(snps)<-indv[,1]
35
- snps<-as.matrix(snps)
36
49
37
- #PCA of SNPs
50
+ #create a PCA of SNPs
38
51
pc.out<-prcomp(snps)
39
52
summary(pc.out)
40
- plot(pc.out$x[,1],pc.out$x[,2]) #PC1 v PC2
41
53
54
+ #plot with samples colored by population
55
+ plot(pc.out$x[,1],pc.out$x[,2], col=meta$pop,main="Title of your PCA", xlab="PC1", ylab="PC2", pch=16, cex=1.5)
56
+
57
+ #add sample IDs to points
58
+ text(pc.out$x[,1],pc.out$x[,2],labels=meta$id,pos=4,cex=0.7, offset=0.1)
59
+
60
+ #add a legend box
61
+ legend(x="topright",legend=unique(meta$pop),fill=unique(meta$pop))
42
62
```
43
63
44
- ### Add meta data to your SNP matrix
45
- - make a meta data file with info about individuals (location, date, etc.)
46
- - make sure your meta file is ordered the same as your vcfs! (i.e. ls your samples in the terminal to see their order)
47
- - script TBD
64
+
65
+ ###Detect outliers with Bayescan
66
+ - this is an FST based outlier detection method
67
+ - warning: we do not use the FST outputs from this program
68
+
69
+
70
+ ### Detect outliers with PCAdapt
71
+ - this detects outliers off the first and second principal component
72
+ - it may be usefule to use both Bayescan and PCAdapt to remove potential loci under selection to create a neutral SNP dataset
48
73
49
74
50
- ### Allow for missing SNP data with SNPrelate
51
- -
52
75
53
76
### Look at ancestry with admixture
54
77
- Make a plink file from your VCF
0 commit comments