Skip to content

Commit 6cbe215

Browse files
authored
Merge pull request #73 from STRIDES/drafts
Publishing Amazon Q tutorial
2 parents e04f30c + b6f9286 commit 6cbe215

21 files changed

+706
-0
lines changed

docs/images/Q-IAM-role.png

126 KB
Loading

docs/images/Q-R-script.png

71.3 KB
Loading

docs/images/Q-amazon-q-jup.png

90.1 KB
Loading

docs/images/Q-code-completion-1.png

36.4 KB
Loading

docs/images/Q-code-completion.png

44.8 KB
Loading

docs/images/Q-domain-name.png

27.5 KB
Loading

docs/images/Q-explain.png

83.1 KB
Loading

docs/images/Q-fix.png

75.3 KB
Loading

docs/images/Q-iam-policy-review.png

99.8 KB
Loading

docs/images/Q-jupy-lab.png

83.6 KB
Loading

docs/images/Q-optimize-script.png

65.6 KB
Loading

docs/images/Q-optimize.png

74.8 KB
Loading

docs/images/Q-parallel-processing.png

99.4 KB
Loading

docs/images/Q-role-policy.png

79.7 KB
Loading
103 KB
Loading

docs/images/Q-snakemake-cloud.png

58.2 KB
Loading

docs/images/Q-snakemake-cluod.png

58.2 KB
Loading

docs/images/Q-snakemake-wf.png

51.7 KB
Loading

notebooks/GenAI/AWS_Amazon_Q_Developer.ipynb

+547
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import pandas as pd
2+
import subprocess
3+
4+
# Step 1: Read the sample sheet
5+
sample_sheet = pd.read_csv('samplesheet.csv')
6+
7+
# Step 2: Run FastQC
8+
for index, row in sample_sheet.iterrows():
9+
fastqc_command = f"fastqc {row['file_path']} -o ./fastqc_results/"
10+
subprocess.run(fastqc_command, shell=True)
11+
12+
# Step 3: Run MultiQC
13+
multiqc_command = "multiqc ./fastqc_results/ -o ./multiqc_report/"
14+
subprocess.run(multiqc_command, shell=True)
15+
16+
# Step 4: Run STAR aligner
17+
for index, row in sample_sheet.iterrows():
18+
star_command = f"STAR --genomeDir /path/to/genome --readFilesIn {row['file_path']} --outFileNamePrefix ./star_results/{row['sample_id']}"
19+
subprocess.run(star_command, shell=True)
20+
21+
# Step 5: Index BAM files with Samtools
22+
for index, row in sample_sheet.iterrows():
23+
bam_file = f"./star_results/{row['sample_id']}.bam"
24+
samtools_command = f"samtools index {bam_file}"
25+
subprocess.run(samtools_command, shell=True)
26+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "bed9c0c9-4756-4161-b4be-e32ce3a58bff",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"#Cell 1\n",
11+
"#import libraries\n",
12+
"\n",
13+
"import pandas as pd\n",
14+
"import numpy as np"
15+
]
16+
},
17+
{
18+
"cell_type": "code",
19+
"execution_count": 2,
20+
"id": "64f8d006-28f3-4d83-ae3a-9e23cccff5d7",
21+
"metadata": {},
22+
"outputs": [
23+
{
24+
"name": "stdout",
25+
"output_type": "stream",
26+
"text": [
27+
" Gene Expression_Level Sample_ID Condition\n",
28+
"0 GeneA 12.5 S1 Control\n",
29+
"1 GeneB 8.3 S2 Treated\n",
30+
"2 GeneC 15.2 S3 Control\n",
31+
"3 GeneD 7.8 S4 Treated\n"
32+
]
33+
}
34+
],
35+
"source": [
36+
"#Cell 2\n",
37+
"\n",
38+
"# Initialize data of lists\n",
39+
"data = {\n",
40+
" 'Gene': ['GeneA', 'GeneB', 'GeneC', 'GeneD'],\n",
41+
" 'Expression_Level': [12.5, 8.3, 15.2, 7.8],\n",
42+
" 'Sample_ID': ['S1', 'S2', 'S3', 'S4'],\n",
43+
" 'Condition': ['Control', 'Treated', 'Control', 'Treated']\n",
44+
"}\n",
45+
"\n",
46+
"# Create DataFrame\n",
47+
"df = pd.DataFrame(data)\n",
48+
"\n",
49+
"# Display the DataFrame\n",
50+
"print(df)"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": 5,
56+
"id": "8ca19300-1635-4a8a-9ef8-f9554bc1baac",
57+
"metadata": {},
58+
"outputs": [
59+
{
60+
"ename": "NameError",
61+
"evalue": "name 'describe' is not defined",
62+
"output_type": "error",
63+
"traceback": [
64+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
65+
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
66+
"Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# View summary statistics\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[43mdescribe\u001b[49m()\u001b[38;5;241m.\u001b[39mdf\n",
67+
"\u001b[0;31mNameError\u001b[0m: name 'describe' is not defined"
68+
]
69+
}
70+
],
71+
"source": [
72+
"#Cell 3\n",
73+
"\n",
74+
"# Error debugging test /fix\n",
75+
"# View summary statistics\n",
76+
"describe().df"
77+
]
78+
},
79+
{
80+
"cell_type": "code",
81+
"execution_count": null,
82+
"id": "550a402e-66bd-4890-a063-e3d82679c0a8",
83+
"metadata": {},
84+
"outputs": [],
85+
"source": [
86+
"#Cell 4\n",
87+
"\n",
88+
"# Optimize selection test /optimize\n",
89+
"# Add additional data to the dataframe\n",
90+
"df['Sample_Type'] = ['Tissue1', 'Tissue2', 'Tissue1', 'Tissue2']\n",
91+
"df['P_Value'] = [0.05, 0.01, 0.03, 0.07]"
92+
]
93+
},
94+
{
95+
"cell_type": "code",
96+
"execution_count": null,
97+
"id": "0d45c1aa-2075-4c8a-9ecc-94fb03a71f78",
98+
"metadata": {},
99+
"outputs": [],
100+
"source": [
101+
"#Cell 5\n",
102+
"\n",
103+
"#Explain selection test /explain\n",
104+
"#Plot results\n",
105+
"import matplotlib.pyplot as plt\n",
106+
"\n",
107+
"df.plot(x='Sample_ID', y='Expression_Level', kind='line')\n",
108+
"plt.show()"
109+
]
110+
}
111+
],
112+
"metadata": {
113+
"kernelspec": {
114+
"display_name": "Python 3 (ipykernel)",
115+
"language": "python",
116+
"name": "python3"
117+
},
118+
"language_info": {
119+
"codemirror_mode": {
120+
"name": "ipython",
121+
"version": 3
122+
},
123+
"file_extension": ".py",
124+
"mimetype": "text/x-python",
125+
"name": "python",
126+
"nbconvert_exporter": "python",
127+
"pygments_lexer": "ipython3",
128+
"version": "3.11.10"
129+
}
130+
},
131+
"nbformat": 4,
132+
"nbformat_minor": 5
133+
}

0 commit comments

Comments
 (0)