Skip to content

Commit ae63436

Browse files
committed
standardized fracture functions
1 parent 176fb35 commit ae63436

File tree

3 files changed

+65
-27
lines changed

3 files changed

+65
-27
lines changed

rogtk/__init__.py

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,14 @@ def nn(expr: IntoExpr, base: int=33) -> pl.Expr:
4141
#@pl.api.register_expr_namespace("fracture")
4242
def assemble_sequences(
4343
expr: IntoExpr,
44-
k: int = 31,
45-
min_coverage: int = 1,
46-
method: str = 'compression', # Changed from enum to str
47-
start_anchor: str | None = None, # New parameter
48-
end_anchor: str | None = None, # New parameter
44+
k: int = 10,
45+
min_coverage: int = 5,
46+
method: str = 'shortest_path',
47+
start_anchor: str | None = None,
48+
end_anchor: str | None = None,
49+
min_length: int | None = None,
4950
export_graphs: bool = False,
5051
only_largest: bool = False,
51-
min_length: int | None = None,
5252
auto_k: bool = False,
5353
prefix: str | None = None
5454
) -> pl.Expr:
@@ -83,9 +83,9 @@ def assemble_sequences(
8383
"method": method,
8484
"start_anchor": start_anchor,
8585
"end_anchor": end_anchor,
86+
"min_length": min_length,
8687
"export_graphs": export_graphs,
8788
"only_largest": only_largest,
88-
"min_length": min_length,
8989
"auto_k": auto_k,
9090
"prefix": prefix
9191
},
@@ -101,11 +101,13 @@ def sweep_assembly_params(
101101
cov_start: int = 1,
102102
cov_end: int = 150,
103103
cov_step: int = 1,
104-
method: str = 'compression', # Changed from enum to str
105-
start_anchor: str | None = None, # New parameter
106-
end_anchor: str | None = None, # New parameter
104+
method: str = 'shortest_path',
105+
start_anchor: str | None = None,
106+
end_anchor: str | None = None,
107+
min_length: int | None = None,
107108
export_graphs: bool = False,
108-
prefix: str | None = None
109+
prefix: str | None = None,
110+
auto_k: bool = False,
109111
) -> pl.Expr:
110112
"""
111113
Run sequence assembly across ranges of k-mer size and minimum coverage parameters.
@@ -135,32 +137,44 @@ def sweep_assembly_params(
135137
"method": method,
136138
"start_anchor": start_anchor,
137139
"end_anchor": end_anchor,
140+
"min_length": min_length,
138141
"export_graphs": export_graphs,
139-
"prefix": prefix
142+
"prefix": prefix,
143+
"auto_k": auto_k
140144
},
141145
returns_scalar=True,
142146
is_elementwise=False,
143147
)
144148

145149
def optimize_assembly(
146150
expr: IntoExpr,
147-
start_k: int,
148-
start_min_coverage: int,
149-
start_anchor: str,
150-
end_anchor: str,
151+
method: str = 'shortest_path',
152+
start_anchor: str | None = None,
153+
end_anchor: str | None = None,
154+
start_k: int = 31,
155+
start_min_coverage: int = 1,
156+
min_length: int | None = None,
157+
export_graphs: bool = False,
158+
prefix: str | None = None,
151159
max_iterations: int | None = None,
152160
explore_k: bool | None = None,
153161
prioritize_length: bool | None = None,
154162
) -> pl.Expr:
163+
if start_anchor is None or end_anchor is None:
164+
raise ValueError("Both start_anchor and e®d_anchor are required")
155165
return register_plugin_function(
156166
plugin_path=Path(__file__).parent,
157167
function_name="optimize_assembly_expr",
158168
args=expr,
159169
kwargs={
160-
"start_k": start_k,
161-
"start_min_coverage": start_min_coverage,
170+
"method": method,
162171
"start_anchor": start_anchor,
163172
"end_anchor": end_anchor,
173+
"start_k": start_k,
174+
"start_min_coverage": start_min_coverage,
175+
"min_length": min_length,
176+
"export_graphs": export_graphs,
177+
"prefix": prefix,
164178
"max_iterations": max_iterations,
165179
"explore_k": explore_k,
166180
"prioritize_length": prioritize_length

src/expressions.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -231,12 +231,18 @@ struct AssemblyKwargs {
231231
k: usize,
232232
min_coverage: usize,
233233
method: String,
234+
// optional parameters
234235
start_anchor: Option<String>,
235236
end_anchor: Option<String>,
236-
export_graphs: Option<bool>,
237237
min_length: Option<usize>,
238+
export_graphs: Option<bool>,
239+
only_largest: Option<bool>,
238240
auto_k: Option<bool>,
239241
prefix: Option<String>,
242+
// optimization related
243+
max_iterations: Option<bool>,
244+
explore_k: Option<bool>,
245+
prioritize_lenth: Option<bool>,
240246
}
241247

242248
// Default string output type for the expression
@@ -321,8 +327,10 @@ struct SweepParams {
321327
method: String,
322328
start_anchor: Option<String>,
323329
end_anchor: Option<String>,
330+
min_length: Option<usize>,
324331
export_graphs: Option<bool>,
325-
prefix: Option<String>
332+
prefix: Option<String>,
333+
auto_k: Option<bool>,
326334
}
327335

328336
fn struct_output_type(input_fields: &[Field]) -> PolarsResult<Field> {

src/fracture_opt.rs

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,19 @@ use log::*;
99

1010
#[derive(Deserialize)]
1111
pub struct OptimizeParams {
12+
pub start_anchor: Option<String>,
13+
pub end_anchor: Option<String>,
14+
15+
pub method: String,
16+
pub min_length: Option<usize>,
17+
pub export_graphs: Option<bool>,
18+
pub prefix: Option<String>,
19+
1220
pub start_k: usize,
1321
pub start_min_coverage: usize,
14-
pub start_anchor: String,
15-
pub end_anchor: String,
1622
pub max_iterations: Option<usize>,
1723
pub explore_k: Option<bool>,
1824
pub prioritize_length: Option<bool>,
19-
pub method: AssemblyMethod,
20-
2125
}
2226

2327
mod types {
@@ -289,6 +293,18 @@ pub fn optimize_assembly_expr(inputs: &[Series], kwargs: OptimizeParams) -> Pola
289293
let explore_k = kwargs.explore_k.unwrap_or(false);
290294
let prioritize_length = kwargs.prioritize_length.unwrap_or(false);
291295

296+
let start_anchor = kwargs.start_anchor.ok_or_else(||
297+
PolarsError::ComputeError("start_anchor is required".into()))?;
298+
299+
let end_anchor = kwargs.end_anchor.ok_or_else(||
300+
PolarsError::ComputeError("end_anchor is required".into()))?;
301+
302+
let assembly_method = AssemblyMethod::from_str(
303+
&kwargs.method,
304+
Some(start_anchor.clone()),
305+
Some(end_anchor.clone())
306+
).map_err(|e| PolarsError::ComputeError(e.into()))?;
307+
292308
let start_params = ParamPoint {
293309
k: kwargs.start_k,
294310
min_coverage: kwargs.start_min_coverage,
@@ -300,12 +316,12 @@ pub fn optimize_assembly_expr(inputs: &[Series], kwargs: OptimizeParams) -> Pola
300316
match optimize_assembly(
301317
&sequences,
302318
start_params,
303-
&kwargs.start_anchor,
304-
&kwargs.end_anchor,
319+
&start_anchor,
320+
&end_anchor,
305321
max_iterations,
306322
explore_k,
307323
prioritize_length,
308-
kwargs.method,
324+
assembly_method,
309325
) {
310326
Ok(Some(result)) => {
311327
info!("Optimization successful");

0 commit comments

Comments
 (0)