|
1 | 1 | # TODO 1: a using MLJModelInterface or import MLJModelInterface statement
|
| 2 | +# Expose all instances of user specified structs and package artifcats. |
| 3 | +const ParallelKMeans_Desc = "Parallel & lightning fast implementation of all variants of the KMeans clustering algorithm in native Julia." |
| 4 | + |
| 5 | +# availalbe variants for reference |
| 6 | +const MLJDICT = Dict(:Lloyd => Lloyd(), |
| 7 | + :Hamerly => Hamerly(), |
| 8 | + :LightElkan => LightElkan()) |
2 | 9 |
|
3 | 10 | ####
|
4 | 11 | #### MODEL DEFINITION
|
5 | 12 | ####
|
6 | 13 | # TODO 2: MLJ-compatible model types and constructors
|
7 |
| -@mlj_model mutable struct KMeans <: MLJModelInterface.Unsupervised |
8 |
| - # Hyperparameters of the model |
9 |
| - algo::Symbol = :Lloyd::(_ in (:Lloyd, :Hamerly, :LightElkan)) |
10 |
| - k_init::String = "k-means++"::(_ in ("k-means++", String)) # allow user seeding? |
11 |
| - k::Int = 3::(_ > 0) |
12 |
| - tol::Float64 = 1e-6::(_ < 1) |
13 |
| - max_iters::Int = 300::(_ > 0) |
14 |
| - copy::Bool = true |
15 |
| - threads::Int = Threads.nthreads()::(_ > 0) |
16 |
| - verbosity::Int = 0::(_ in (0, 1)) # Temp fix. Do we need to follow mlj verbosity style? |
17 |
| - init = nothing |
| 14 | + |
| 15 | +mutable struct KMeans <: MLJModelInterface.Unsupervised |
| 16 | + algo::Symbol |
| 17 | + k_init::String |
| 18 | + k::Int |
| 19 | + tol::Float64 |
| 20 | + max_iters::Int |
| 21 | + copy::Bool |
| 22 | + threads::Int |
| 23 | + verbosity::Int |
| 24 | + init |
18 | 25 | end
|
19 | 26 |
|
20 | 27 |
|
21 |
| -# Expose all instances of user specified structs and package artifcats. |
22 |
| -const ParallelKMeans_Desc = "Parallel & lightning fast implementation of all variants of the KMeans clustering algorithm in native Julia." |
| 28 | +function KMeans(; algo=:Lloyd, k_init="k-means++", |
| 29 | + k=3, tol=1e-6, max_iters=300, copy=true, |
| 30 | + threads=Threads.nthreads(), verbosity=0, init=nothing) |
| 31 | + |
| 32 | + model = KMeans(algo, k_init, k, tol, max_iters, copy, threads, verbosity, init) |
| 33 | + message = MLJModelInterface.clean!(model) |
| 34 | + isempty(message) || @warn message |
| 35 | + return model |
| 36 | +end |
| 37 | + |
| 38 | + |
| 39 | +function MLJModelInterface.clean!(m::KMeans) |
| 40 | + warning = "" |
| 41 | + |
| 42 | + if !(m.algo ∈ keys(MLJDICT)) |
| 43 | + warning *= "Unsuppored algorithm supplied. Defauting to KMeans++ seeding algorithm." |
| 44 | + m.algo = :Lloyd |
| 45 | + |
| 46 | + elseif m.k_init != "k-means++" |
| 47 | + warning *= "Only `k-means++` or random seeding algorithms are supported. Defaulting to random seeding." |
| 48 | + m.k_init = "random" |
| 49 | + |
| 50 | + elseif m.k < 1 |
| 51 | + warning *= "Number of clusters must be greater than 0. Defaulting to 3 clusters." |
| 52 | + m.k = 3 |
| 53 | + |
| 54 | + elseif !(m.tol < 1.0) |
| 55 | + warning *= "Tolerance level must be less than 1. Defaulting to tol of 1e-6." |
| 56 | + m.tol = 1e-6 |
| 57 | + |
| 58 | + elseif !(m.max_iters > 0) |
| 59 | + warning *= "Number of permitted iterations must be greater than 0. Defaulting to 300 iterations." |
| 60 | + m.max_iters = 300 |
| 61 | + |
| 62 | + elseif !(m.threads > 0) |
| 63 | + warning *= "Number of threads must be at least 1. Defaulting to all threads available." |
| 64 | + m.threads = Threads.nthreads() |
| 65 | + |
| 66 | + elseif !(m.verbosity ∈ (0, 1)) |
| 67 | + warning *= "Verbosity must be either 0 (no info) or 1 (info requested). Defaulting to 0." |
| 68 | + m.verbosity = 0 |
| 69 | + end |
| 70 | + return warning |
| 71 | +end |
23 | 72 |
|
24 |
| -# availalbe variants for reference |
25 |
| -const MLJDICT = Dict(:Lloyd => Lloyd(), |
26 |
| - :Hamerly => Hamerly(), |
27 |
| - :LightElkan => LightElkan()) |
28 | 73 |
|
29 | 74 | # TODO 3: implementation of fit, predict, and fitted_params of the model
|
30 | 75 | ####
|
31 | 76 | #### FIT FUNCTION
|
32 | 77 | ####
|
33 | 78 | """
|
34 | 79 | TODO 3.1: Docs
|
| 80 | + # fit the specified struct as a ParaKMeans model |
35 | 81 |
|
36 | 82 | See also the [package documentation](https://pydatablog.github.io/ParallelKMeans.jl/stable).
|
37 | 83 | """
|
38 | 84 | function MLJModelInterface.fit(m::KMeans, X)
|
39 |
| - # fit the specified struct as a ParaKMeans model |
40 |
| - |
41 | 85 | # convert tabular input data into the matrix model expects. Column assumed as features so input data is permuted
|
42 | 86 | if !m.copy
|
43 | 87 | # transpose input table without copying and pass to model
|
@@ -123,4 +167,4 @@ metadata_model(KMeans,
|
123 | 167 | output = MLJModelInterface.Table(MLJModelInterface.Count),
|
124 | 168 | weights = false,
|
125 | 169 | descr = ParallelKMeans_Desc,
|
126 |
| - path = "ParallelKMeans.src.mlj_interface.KMeans") |
| 170 | + path = "ParallelKMeans.KMeans") |
0 commit comments