Skip to content

Commit 64ca61a

Browse files
committed
Added task 3475
1 parent 07ecc0e commit 64ca61a

File tree

3 files changed

+261
-0
lines changed

3 files changed

+261
-0
lines changed
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
3475\. DNA Pattern Recognition
2+
3+
Medium
4+
5+
Table: `Samples`
6+
7+
+----------------+---------+
8+
| Column Name | Type |
9+
+----------------+---------+
10+
| sample_id | int |
11+
| dna_sequence | varchar |
12+
| species | varchar |
13+
+----------------+---------+
14+
sample_id is the unique key for this table.
15+
Each row contains a DNA sequence represented as a string of characters (A, T, G, C) and the species it was collected from.
16+
17+
Biologists are studying basic patterns in DNA sequences. Write a solution to identify `sample_id` with the following patterns:
18+
19+
* Sequences that **start** with **ATG** (a common **start codon**)
20+
* Sequences that **end** with either **TAA**, **TAG**, or **TGA** (**stop codons**)
21+
* Sequences containing the motif **ATAT** (a simple repeated pattern)
22+
* Sequences that have **at least** `3` **consecutive** **G** (like **GGG** or **GGGG**)
23+
24+
Return _the result table ordered by __sample\_id in **ascending** order_.
25+
26+
The result format is in the following example.
27+
28+
**Example:**
29+
30+
**Input:**
31+
32+
Samples table:
33+
34+
+-----------+------------------+-----------+
35+
| sample_id | dna_sequence | species |
36+
+-----------+------------------+-----------+
37+
| 1 | ATGCTAGCTAGCTAA | Human |
38+
| 2 | GGGTCAATCATC | Human |
39+
| 3 | ATATATCGTAGCTA | Human |
40+
| 4 | ATGGGGTCATCATAA | Mouse |
41+
| 5 | TCAGTCAGTCAG | Mouse |
42+
| 6 | ATATCGCGCTAG | Zebrafish |
43+
| 7 | CGTATGCGTCGTA | Zebrafish |
44+
+-----------+------------------+-----------+
45+
46+
**Output:**
47+
48+
+-----------+------------------+-------------+-------------+------------+------------+------------+
49+
| sample_id | dna_sequence | species | has_start | has_stop | has_atat | has_ggg |
50+
+-----------+------------------+-------------+-------------+------------+------------+------------+
51+
| 1 | ATGCTAGCTAGCTAA | Human | 1 | 1 | 0 | 0 |
52+
| 2 | GGGTCAATCATC | Human | 0 | 0 | 0 | 1 |
53+
| 3 | ATATATCGTAGCTA | Human | 0 | 0 | 1 | 0 |
54+
| 4 | ATGGGGTCATCATAA | Mouse | 1 | 1 | 0 | 1 |
55+
| 5 | TCAGTCAGTCAG | Mouse | 0 | 0 | 0 | 0 |
56+
| 6 | ATATCGCGCTAG | Zebrafish | 0 | 1 | 1 | 0 |
57+
| 7 | CGTATGCGTCGTA | Zebrafish | 0 | 0 | 0 | 0 |
58+
+-----------+------------------+-------------+-------------+------------+------------+------------+
59+
60+
**Explanation:**
61+
62+
* Sample 1 (ATGCTAGCTAGCTAA):
63+
* Starts with ATG (has\_start = 1)
64+
* Ends with TAA (has\_stop = 1)
65+
* Does not contain ATAT (has\_atat = 0)
66+
* Does not contain at least 3 consecutive 'G's (has\_ggg = 0)
67+
* Sample 2 (GGGTCAATCATC):
68+
* Does not start with ATG (has\_start = 0)
69+
* Does not end with TAA, TAG, or TGA (has\_stop = 0)
70+
* Does not contain ATAT (has\_atat = 0)
71+
* Contains GGG (has\_ggg = 1)
72+
* Sample 3 (ATATATCGTAGCTA):
73+
* Does not start with ATG (has\_start = 0)
74+
* Does not end with TAA, TAG, or TGA (has\_stop = 0)
75+
* Contains ATAT (has\_atat = 1)
76+
* Does not contain at least 3 consecutive 'G's (has\_ggg = 0)
77+
* Sample 4 (ATGGGGTCATCATAA):
78+
* Starts with ATG (has\_start = 1)
79+
* Ends with TAA (has\_stop = 1)
80+
* Does not contain ATAT (has\_atat = 0)
81+
* Contains GGGG (has\_ggg = 1)
82+
* Sample 5 (TCAGTCAGTCAG):
83+
* Does not match any patterns (all fields = 0)
84+
* Sample 6 (ATATCGCGCTAG):
85+
* Does not start with ATG (has\_start = 0)
86+
* Ends with TAG (has\_stop = 1)
87+
* Starts with ATAT (has\_atat = 1)
88+
* Does not contain at least 3 consecutive 'G's (has\_ggg = 0)
89+
* Sample 7 (CGTATGCGTCGTA):
90+
* Does not start with ATG (has\_start = 0)
91+
* Does not end with TAA, "TAG", or "TGA" (has\_stop = 0)
92+
* Does not contain ATAT (has\_atat = 0)
93+
* Does not contain at least 3 consecutive 'G's (has\_ggg = 0)
94+
95+
**Note:**
96+
97+
* The result is ordered by sample\_id in ascending order
98+
* For each pattern, 1 indicates the pattern is present and 0 indicates it is not present
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Write your MySQL query statement below
2+
# #Medium #2025_03_04_Time_645_ms_(100.00%)_Space_0.0_MB_(100.00%)
3+
WITH SampleAnalysisCte AS (
4+
SELECT sample_id, dna_sequence, species,
5+
dna_sequence REGEXP '^ATG' AS has_start,
6+
dna_sequence REGEXP 'TAA$|TAG$|TGA$' AS has_stop,
7+
dna_sequence REGEXP '.*ATAT.*' AS has_atat,
8+
dna_sequence REGEXP '.*GGG.*' AS has_ggg
9+
FROM Samples
10+
)
11+
12+
SELECT * FROM SampleAnalysisCte
13+
ORDER BY sample_id;
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
package g3401_3500.s3475_dna_pattern_recognition
2+
3+
import org.hamcrest.CoreMatchers.equalTo
4+
import org.hamcrest.MatcherAssert.assertThat
5+
import org.junit.jupiter.api.Test
6+
import org.zapodot.junit.db.annotations.EmbeddedDatabase
7+
import org.zapodot.junit.db.annotations.EmbeddedDatabaseTest
8+
import org.zapodot.junit.db.common.CompatibilityMode
9+
import java.io.BufferedReader
10+
import java.io.FileNotFoundException
11+
import java.io.FileReader
12+
import java.sql.SQLException
13+
import java.util.stream.Collectors
14+
import javax.sql.DataSource
15+
16+
@EmbeddedDatabaseTest(
17+
compatibilityMode = CompatibilityMode.MySQL,
18+
initialSqls = [
19+
(
20+
" CREATE TABLE Samples (" +
21+
" sample_id INT," +
22+
" dna_sequence VARCHAR(100)," +
23+
" species VARCHAR(100)" +
24+
");" +
25+
"insert into Samples (sample_id, dna_sequence, species) values " +
26+
"(1, 'ATGCTAGCTAGCTAA', 'Human');" +
27+
"insert into Samples (sample_id, dna_sequence, species) values " +
28+
"(2, 'GGGTCAATCATC', 'Human');" +
29+
"insert into Samples (sample_id, dna_sequence, species) values " +
30+
"(3, 'ATATATCGTAGCTA', 'Human');" +
31+
"insert into Samples (sample_id, dna_sequence, species) values " +
32+
"(4, 'ATGGGGTCATCATAA', 'Human');" +
33+
"insert into Samples (sample_id, dna_sequence, species) values " +
34+
"(5, 'TCAGTCAGTCAG', 'Human');" +
35+
"insert into Samples (sample_id, dna_sequence, species) values " +
36+
"(6, 'ATATCGCGCTAG', 'Zebrafish');" +
37+
"insert into Samples (sample_id, dna_sequence, species) values " +
38+
"(7, 'CGTATGCGTCGTA', 'Zebrafish');"
39+
),
40+
],
41+
)
42+
internal class MysqlTest {
43+
@Test
44+
@Throws(SQLException::class, FileNotFoundException::class)
45+
fun testScript(@EmbeddedDatabase dataSource: DataSource) {
46+
dataSource.connection.use { connection ->
47+
connection.createStatement().use { statement ->
48+
statement.executeQuery(
49+
BufferedReader(
50+
FileReader(
51+
(
52+
"src/main/kotlin/g3401_3500/" +
53+
"s3475_dna_pattern_recognition/" +
54+
"script.sql"
55+
),
56+
),
57+
)
58+
.lines()
59+
.collect(Collectors.joining("\n"))
60+
.replace("#.*?\\r?\\n".toRegex(), ""),
61+
).use { resultSet ->
62+
assertThat<Boolean>(resultSet.next(), equalTo<Boolean>(true))
63+
assertThat<String>(resultSet.getNString(1), equalTo<String>("1"))
64+
assertThat<String>(
65+
resultSet.getNString(2),
66+
equalTo<String>("ATGCTAGCTAGCTAA"),
67+
)
68+
assertThat<String>(resultSet.getNString(3), equalTo<String>("Human"))
69+
assertThat<String>(resultSet.getNString(4), equalTo<String>("TRUE"))
70+
assertThat<String>(resultSet.getNString(5), equalTo<String>("TRUE"))
71+
assertThat<String>(resultSet.getNString(6), equalTo<String>("FALSE"))
72+
assertThat<String>(resultSet.getNString(7), equalTo<String>("FALSE"))
73+
assertThat<Boolean>(resultSet.next(), equalTo<Boolean>(true))
74+
assertThat<String>(resultSet.getNString(1), equalTo<String>("2"))
75+
assertThat<String>(
76+
resultSet.getNString(2),
77+
equalTo<String>("GGGTCAATCATC"),
78+
)
79+
assertThat<String>(resultSet.getNString(3), equalTo<String>("Human"))
80+
assertThat<String>(resultSet.getNString(4), equalTo<String>("FALSE"))
81+
assertThat<String>(resultSet.getNString(5), equalTo<String>("FALSE"))
82+
assertThat<String>(resultSet.getNString(6), equalTo<String>("FALSE"))
83+
assertThat<String>(resultSet.getNString(7), equalTo<String>("TRUE"))
84+
assertThat<Boolean>(resultSet.next(), equalTo<Boolean>(true))
85+
assertThat<String>(resultSet.getNString(1), equalTo<String>("3"))
86+
assertThat<String>(
87+
resultSet.getNString(2),
88+
equalTo<String>("ATATATCGTAGCTA"),
89+
)
90+
assertThat<String>(resultSet.getNString(3), equalTo<String>("Human"))
91+
assertThat<String>(resultSet.getNString(4), equalTo<String>("FALSE"))
92+
assertThat<String>(resultSet.getNString(5), equalTo<String>("FALSE"))
93+
assertThat<String>(resultSet.getNString(6), equalTo<String>("TRUE"))
94+
assertThat<String>(resultSet.getNString(7), equalTo<String>("FALSE"))
95+
assertThat<Boolean>(resultSet.next(), equalTo<Boolean>(true))
96+
assertThat<String>(resultSet.getNString(1), equalTo<String>("4"))
97+
assertThat<String>(
98+
resultSet.getNString(2),
99+
equalTo<String>("ATGGGGTCATCATAA"),
100+
)
101+
assertThat<String>(resultSet.getNString(3), equalTo<String>("Human"))
102+
assertThat<String>(resultSet.getNString(4), equalTo<String>("TRUE"))
103+
assertThat<String>(resultSet.getNString(5), equalTo<String>("TRUE"))
104+
assertThat<String>(resultSet.getNString(6), equalTo<String>("FALSE"))
105+
assertThat<String>(resultSet.getNString(7), equalTo<String>("TRUE"))
106+
assertThat<Boolean>(resultSet.next(), equalTo<Boolean>(true))
107+
assertThat<String>(resultSet.getNString(1), equalTo<String>("5"))
108+
assertThat<String>(
109+
resultSet.getNString(2),
110+
equalTo<String>("TCAGTCAGTCAG"),
111+
)
112+
assertThat<String>(resultSet.getNString(3), equalTo<String>("Human"))
113+
assertThat<String>(resultSet.getNString(4), equalTo<String>("FALSE"))
114+
assertThat<String>(resultSet.getNString(5), equalTo<String>("FALSE"))
115+
assertThat<String>(resultSet.getNString(6), equalTo<String>("FALSE"))
116+
assertThat<String>(resultSet.getNString(7), equalTo<String>("FALSE"))
117+
assertThat<Boolean>(resultSet.next(), equalTo<Boolean>(true))
118+
assertThat<String>(resultSet.getNString(1), equalTo<String>("6"))
119+
assertThat<String>(
120+
resultSet.getNString(2),
121+
equalTo<String>("ATATCGCGCTAG"),
122+
)
123+
assertThat<String>(
124+
resultSet.getNString(3),
125+
equalTo<String>("Zebrafish"),
126+
)
127+
assertThat<String>(resultSet.getNString(4), equalTo<String>("FALSE"))
128+
assertThat<String>(resultSet.getNString(5), equalTo<String>("TRUE"))
129+
assertThat<String>(resultSet.getNString(6), equalTo<String>("TRUE"))
130+
assertThat<String>(resultSet.getNString(7), equalTo<String>("FALSE"))
131+
assertThat<Boolean>(resultSet.next(), equalTo<Boolean>(true))
132+
assertThat<String>(resultSet.getNString(1), equalTo<String>("7"))
133+
assertThat<String>(
134+
resultSet.getNString(2),
135+
equalTo<String>("CGTATGCGTCGTA"),
136+
)
137+
assertThat<String>(
138+
resultSet.getNString(3),
139+
equalTo<String>("Zebrafish"),
140+
)
141+
assertThat<String>(resultSet.getNString(4), equalTo<String>("FALSE"))
142+
assertThat<String>(resultSet.getNString(5), equalTo<String>("FALSE"))
143+
assertThat<String>(resultSet.getNString(6), equalTo<String>("FALSE"))
144+
assertThat<String>(resultSet.getNString(7), equalTo<String>("FALSE"))
145+
assertThat<Boolean>(resultSet.next(), equalTo<Boolean>(false))
146+
}
147+
}
148+
}
149+
}
150+
}

0 commit comments

Comments
 (0)