Skip to content

Commit

Permalink
Merge pull request #738 from LCSB-BioCore/develop
Browse files Browse the repository at this point in the history
Develop → master merge for 1.4.4
  • Loading branch information
exaexa authored Feb 13, 2023
2 parents 57e0c38 + 28ddfbd commit 4927191
Show file tree
Hide file tree
Showing 14 changed files with 189 additions and 193 deletions.
17 changes: 0 additions & 17 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,6 @@ variables:
- Invoke-Expression $Env:ARTENOLIS_SOFT_PATH"\julia\"$Env:JULIA_VER"\bin\julia --inline=yes --check-bounds=yes --color=yes --project=@. -e 'import Pkg; Pkg.test(; coverage = true)'"
- exit $LASTEXITCODE

.global_env_win8: &global_env_win8
tags:
- windows8
<<: *global_env_win

.global_env_win10: &global_env_win10
tags:
- windows10
Expand Down Expand Up @@ -153,12 +148,6 @@ linux:julia1.6:
# Additional platform&environment compatibility tests
#

windows8:julia1.8:
stage: test-compat
<<: *global_trigger_compat_tests
<<: *global_julia18
<<: *global_env_win8

windows10:julia1.8:
stage: test-compat
<<: *global_trigger_compat_tests
Expand All @@ -171,12 +160,6 @@ mac:julia1.8:
<<: *global_julia18
<<: *global_env_mac

windows8:julia1.6:
stage: test-compat
<<: *global_trigger_compat_tests
<<: *global_julia16
<<: *global_env_win8

windows10:julia1.6:
stage: test-compat
<<: *global_trigger_compat_tests
Expand Down
6 changes: 4 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "COBREXA"
uuid = "babc4406-5200-4a30-9033-bf5ae714c842"
authors = ["The developers of COBREXA.jl"]
version = "1.4.3"
version = "1.4.4"

[deps]
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
Expand All @@ -14,6 +14,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
PikaParser = "3bbf5609-3e7b-44cd-8549-7c69f321e792"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SBML = "e5567a89-2604-4b09-9718-f5f78e97c3bb"
Expand All @@ -32,7 +33,8 @@ JuMP = "1"
MAT = "0.10"
MacroTools = "0.5.6"
OrderedCollections = "1.4"
SBML = "~1.3"
PikaParser = "0.5"
SBML = "~1.3, ~1.4"
StableRNGs = "1.0"
Tulip = "0.7.0, 0.8.0, 0.9.2"
julia = "1.5"
Expand Down
5 changes: 3 additions & 2 deletions src/COBREXA.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,22 @@ module COBREXA

using Distributed
using DistributedData
using DocStringExtensions
using HDF5
using JSON
using JuMP
using LinearAlgebra
using MAT
using MacroTools
using MAT
using OrderedCollections
using Random
using Serialization
using SparseArrays
using StableRNGs
using Statistics
using DocStringExtensions

import Base: findfirst, getindex, show
import PikaParser as PP
import Pkg
import SBML # conflict with Reaction struct name

Expand Down
2 changes: 1 addition & 1 deletion src/base/types/SBMLModel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ function bounds(model::SBMLModel)::Tuple{Vector{Float64},Vector{Float64}}
if unit != common_unit
throw(
DomainError(
units_in_sbml,
unit,
"The SBML file uses multiple units; loading would need conversion",
),
)
Expand Down
2 changes: 1 addition & 1 deletion src/base/types/StandardModel.jl
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ Return the gene reaction rule in string format for reaction with `id` in `model`
Return `nothing` if not available.
"""
reaction_gene_association(model::StandardModel, id::String)::Maybe{GeneAssociation} =
_maybemap(identity, model.reactions[id].grr)
model.reactions[id].grr

"""
$(TYPEDSIGNATURES)
Expand Down
225 changes: 150 additions & 75 deletions src/base/utils/gene_associations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,56 @@
"""
$(TYPEDSIGNATURES)
Parse `SBML.GeneProductAssociation` structure to the simpler GeneAssociation.
The input must be (implicitly) in a positive DNF.
A helper for producing predictable unique sequences. Might be faster if
compacting would be done directly in sort().
"""
function _sortunique(x)
o = collect(x)
sort!(o)
put = prevind(o, firstindex(o))
for i in eachindex(o)
if put >= firstindex(o) && o[i] == o[put]
# we already have this one
continue
else
put = nextind(o, put)
if put != i
o[put] = o[i]
end
end
end
o[begin:put]
end

"""
$(TYPEDSIGNATURES)
Parse `SBML.GeneProductAssociation` structure and convert it to a strictly
positive DNF [`GeneAssociation`](@ref). Negation (`SBML.GPANot`) is not
supported.
"""
function _parse_grr(gpa::SBML.GeneProductAssociation)::GeneAssociation
parse_ref(x) =
typeof(x) == SBML.GPARef ? [x.gene_product] :
begin
@_models_log @warn "Could not parse a part of gene association, ignoring: $x"
String[]

function fold_and(dnfs::Vector{Vector{Vector{String}}})::Vector{Vector{String}}
if isempty(dnfs)
[String[]]
else
_sortunique(
_sortunique(String[l; r]) for l in dnfs[1] for r in fold_and(dnfs[2:end])
)
end
parse_and(x) =
typeof(x) == SBML.GPAAnd ? vcat([parse_and(i) for i in x.terms]...) : parse_ref(x)
parse_or(x) =
typeof(x) == SBML.GPAOr ? vcat([parse_or(i) for i in x.terms]...) : [parse_and(x)]
return parse_or(gpa)
end

dnf(x::SBML.GPARef) = [[x.gene_product]]
dnf(x::SBML.GPAOr) = _sortunique(vcat(dnf.(x.terms)...))
dnf(x::SBML.GPAAnd) = fold_and(dnf.(x.terms))
dnf(x) = throw(
DomainError(
x,
"unsupported gene product association contents of type $(typeof(x))",
),
)
return dnf(gpa)
end

"""
Expand Down Expand Up @@ -49,69 +84,109 @@ julia> _parse_grr("(YIL010W and YLR043C) or (YIL010W and YGR209C)")
_parse_grr(s::String)::Maybe{GeneAssociation} = _maybemap(_parse_grr, _parse_grr_to_sbml(s))

"""
$(TYPEDSIGNATURES)
Internal helper for parsing the string GRRs into SBML data structures. More
general than [`_parse_grr`](@ref).
PikaParser grammar for stringy GRR expressions.
"""
function _parse_grr_to_sbml(str::String)::Maybe{SBML.GeneProductAssociation}
s = str
toks = String[]
m = Nothing
while !isnothing(
begin
m = match(r"( +|[a-zA-Z0-9_-]+|[^ a-zA-Z0-9_()-]+|[(]|[)])(.*)", s)
end,
)
tok = strip(m.captures[1])
!isempty(tok) && push!(toks, tok)
s = m.captures[2]
const _grr_grammar = begin
# characters that typically form the identifiers
isident(x::Char) =
isletter(x) ||
isdigit(x) ||
x == '_' ||
x == '-' ||
x == ':' ||
x == '.' ||
x == '\'' ||
x == '[' ||
x == ']' ||
x == '\x03' # a very ugly exception for badly parsed MAT files

# scanner helpers
eat(p) = m -> begin
last = 0
for i in eachindex(m)
p(m[i]) || break
last = i
end
last
end

fail() = throw(DomainError(str, "Could not parse GRR"))

# shunting yard
ops = Symbol[]
vals = SBML.GeneProductAssociation[]
fold(sym, op) =
while !isempty(ops) && last(ops) == sym
r = pop!(vals)
l = pop!(vals)
pop!(ops)
push!(vals, op([l, r]))
end
for tok in toks
if tok in ["and", "AND", "&", "&&"]
push!(ops, :and)
elseif tok in ["or", "OR", "|", "||"]
fold(:and, SBML.GPAAnd)
push!(ops, :or)
elseif tok == "("
push!(ops, :paren)
elseif tok == ")"
fold(:and, SBML.GPAAnd)
fold(:or, SBML.GPAOr)
if isempty(ops) || last(ops) != :paren
fail()
else
pop!(ops)
end
else
push!(vals, SBML.GPARef(tok))
end
# eat one of keywords
kws(w...) = m -> begin
last = eat(isident)(m)
m[begin:last] in w ? last : 0
end

fold(:and, SBML.GPAAnd)
fold(:or, SBML.GPAOr)
PP.make_grammar(
[:expr],
PP.flatten(
Dict(
:space => PP.first(PP.scan(eat(isspace)), PP.epsilon),
:id => PP.scan(eat(isident)),
:orop =>
PP.first(PP.tokens("||"), PP.token('|'), PP.scan(kws("OR", "or"))),
:andop => PP.first(
PP.tokens("&&"),
PP.token('&'),
PP.scan(kws("AND", "and")),
),
:expr => PP.seq(:space, :orexpr, :space, PP.end_of_input),
:orexpr => PP.first(
:or => PP.seq(:andexpr, :space, :orop, :space, :orexpr),
:andexpr,
),
:andexpr => PP.first(
:and => PP.seq(:baseexpr, :space, :andop, :space, :andexpr),
:baseexpr,
),
:baseexpr => PP.first(
:id,
:parenexpr => PP.seq(
PP.token('('),
:space,
:orexpr,
:space,
PP.token(')'),
),
),
),
Char,
),
)
end

if !isempty(ops) || length(vals) > 1
fail()
end
_grr_grammar_open(m, _) =
m.rule == :expr ? Bool[0, 1, 0, 0] :
m.rule == :parenexpr ? Bool[0, 0, 1, 0, 0] :
m.rule in [:or, :and] ? Bool[1, 0, 0, 0, 1] :
m.rule in [:andexpr, :orexpr, :notexpr, :baseexpr] ? Bool[1] :
(false for _ in m.submatches)

_grr_grammar_fold(m, _, subvals) =
m.rule == :id ? SBML.GPARef(m.view) :
m.rule == :and ? SBML.GPAAnd([subvals[1], subvals[5]]) :
m.rule == :or ? SBML.GPAOr([subvals[1], subvals[5]]) :
m.rule == :parenexpr ? subvals[3] :
m.rule == :expr ? subvals[2] : isempty(subvals) ? nothing : subvals[1]

if isempty(vals)
nothing
"""
$(TYPEDSIGNATURES)
Internal helper for parsing the string GRRs into SBML data structures. More
general than [`_parse_grr`](@ref).
"""
function _parse_grr_to_sbml(str::String)::Maybe{SBML.GeneProductAssociation}
all(isspace, str) && return nothing
tree = PP.parse_lex(_grr_grammar, str)
match = PP.find_match_at!(tree, :expr, 1)
if match > 0
return PP.traverse_match(
tree,
match,
open = _grr_grammar_open,
fold = _grr_grammar_fold,
)
else
first(vals)
throw(DomainError(str, "cannot parse GRR"))
end
end

Expand All @@ -124,14 +199,14 @@ string.
# Example
```
julia> _unparse_grr(String, [["YIL010W", "YLR043C"], ["YIL010W", "YGR209C"]])
"(YIL010W and YLR043C) or (YIL010W and YGR209C)"
"(YIL010W && YLR043C) || (YIL010W && YGR209C)"
```
"""
function _unparse_grr(::Type{String}, grr::GeneAssociation)::String
grr_strings = String[]
for gr in grr
push!(grr_strings, "(" * join([g for g in gr], " and ") * ")")
end
grr_string = join(grr_strings, " or ")
return grr_string
function _unparse_grr(
::Type{String},
grr::GeneAssociation;
and = " && ",
or = " || ",
)::String
return join(("(" * join(gr, and) * ")" for gr in grr), or)
end
4 changes: 2 additions & 2 deletions test/analysis/gecko.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
haskey(ecoli_core_reaction_kcats, rid) ?
collect(
Isozyme(
Dict(grr .=> ecoli_core_protein_stoichiometry[rid][i]),
Dict(grr .=> fill(1.0, size(grr))),
ecoli_core_reaction_kcats[rid][i]...,
) for (i, grr) in enumerate(reaction_gene_association(model, rid))
) : Isozyme[]
Expand Down Expand Up @@ -41,7 +41,7 @@

@test isapprox(
rxn_fluxes["BIOMASS_Ecoli_core_w_GAM"],
0.812827846796761,
0.8129179015245396,
atol = TEST_TOLERANCE,
)

Expand Down
Loading

2 comments on commit 4927191

@exaexa
Copy link
Collaborator Author

@exaexa exaexa commented on 4927191 Feb 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/77564

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v1.4.4 -m "<description of version>" 492719128ae39f806c90e05dd9f1febda5526917
git push origin v1.4.4

Please sign in to comment.