Skip to content

Commit 9aa6b56

Browse files
committed
moving sklearn code
1 parent eb81411 commit 9aa6b56

File tree

2 files changed

+87
-86
lines changed

2 files changed

+87
-86
lines changed

src/SyntheticDatasets.jl

+1-86
Original file line numberDiff line numberDiff line change
@@ -10,92 +10,7 @@ function __init__()
1010
copy!(datasets, pyimport("sklearn.datasets"))
1111
end
1212

13-
"""
14-
generate_moons(; n_samples::Union{Tuple{Int, Int}, Int} = 100,
15-
shuffle = true,
16-
noise = nothing,
17-
random_state = nothing)::DataFrame
18-
Make two interleaving half circles. Sklearn interface to make_moons.
19-
# Arguments
20-
- `n_samples::Union{Tuple{Int, Int}, Int} = 100`: If int, the total number of points generated. If two-element tuple, number of points in each of two moons.
21-
- `shuffle::Bool = true`: Whether to shuffle the samples.
22-
- `noise::Union{Nothing, Float64} = nothing`: Standard deviation of Gaussian noise added to the data.
23-
- `random_state::Union{Int, Nothing} = nothing`: Determines random number generation for dataset shuffling and noise.
24-
Reference: [link](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_moons.html)
25-
"""
26-
function generate_moons(;n_samples::Union{Tuple{Int, Int}, Int} = 100,
27-
shuffle::Bool = true,
28-
noise::Union{Nothing, Float64} = nothing,
29-
random_state::Union{Int, Nothing} = nothing)::DataFrame
30-
31-
(features, labels) = datasets.make_moons( n_samples=n_samples,
32-
shuffle = shuffle,
33-
noise = noise,
34-
random_state = random_state)
35-
36-
return convert(features, labels)
37-
end
38-
39-
"""
40-
generate_blobs(; n_samples::Union{Int, Array{Int, 1}} = 100,
41-
n_features::Int = 2,
42-
centers::Union{Int, Union{Nothing, Array{Float64, 2}}} = nothing,
43-
cluster_std::Union{Float64, Array{Float64, 1}} = 1.0,
44-
center_box = (-10.0, 10.0),
45-
shuffle::Bool = true,
46-
random_state::Union{Int, Nothing} = nothing)::DataFrame
47-
Generate isotropic Gaussian blobs for clustering. Sklearn interface to make_blobs.
48-
# Arguments
49-
- `n_samples = 100`: If int, it is the total number of points equally divided among clusters. If array-like, each element of the sequence indicates the number of samples per cluster.
50-
- `n_features = 2`: The number of features for each sample.
51-
- `centers::Union{Int, Union{Nothing, Array{Float64, 2}}} = nothing`: The number of centers to generate, or the fixed center locations. If n_samples is an int and centers is None, 3 centers are generated. If n_samples is array-like, centers must be either None or an array of length equal to the length of n_samples.
52-
- `cluster_std::Union{Float64, Array{Float64, 1}} = 1.0`: The standard deviation of the clusters.
53-
- `center_box::Tuple{Float64, Float64} = (-10.0, 10.0)`: The bounding box for each cluster center when centers are generated at random.
54-
- `shuffle::Bool = true`: Shuffle the samples.
55-
- `random_state::Union{Int, Nothing} = nothing`: Determines random number generation for dataset shuffling and noise.
56-
Reference: [link](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_blobs.html)
57-
"""
58-
function generate_blobs(;n_samples::Union{Int, Array{Int, 1}} = 100,
59-
n_features::Int = 2,
60-
centers::Union{Int, Union{Nothing, Array{Float64, 2}}} = nothing,
61-
cluster_std::Union{Float64, Array{Float64, 1}} = 1.0,
62-
center_box::Tuple{Float64, Float64} = (-10.0, 10.0),
63-
shuffle::Bool = true,
64-
random_state::Union{Int, Nothing} = nothing)::DataFrame
65-
66-
(features, labels) = datasets.make_blobs( n_samples = n_samples,
67-
n_features = n_features,
68-
centers = centers,
69-
cluster_std = cluster_std,
70-
center_box = center_box,
71-
shuffle = shuffle,
72-
random_state = random_state,
73-
return_centers = false)
74-
75-
return convert(features, labels)
76-
end
77-
78-
"""
79-
generate_s_curve(; n_samples::Int = 100,
80-
noise = nothing,
81-
random_state = nothing)::DataFrame
82-
Generate an S curve dataset. Sklearn interface to make_s_curve.
83-
# Arguments
84-
- `n_samples::Int = 100`: The number of sample points on the S curve.
85-
- `noise::Union{Nothing, Float64} = nothing`: Standard deviation of Gaussian noise added to the data.
86-
- `random_state::Union{Int, Nothing} = nothing`: Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls.
87-
Reference: [link](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_s_curve.html)
88-
"""
89-
function generate_s_curve(; n_samples::Int = 100,
90-
noise::Float64 = 0.0,
91-
random_state::Union{Int, Nothing} = nothing)::DataFrame
92-
93-
(features, labels) = datasets.make_s_curve( n_samples = n_samples,
94-
noise = noise,
95-
random_state = random_state)
96-
97-
return convert(features, labels)
98-
end
13+
include("sklearn.jl")
9914

10015
function convert(features::Array{T, 2}, labels::Array{D, 1})::DataFrame where {T <: Number, D <: Number}
10116
df = DataFrame()

src/sklearn.jl

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
"""
2+
generate_moons(; n_samples::Union{Tuple{Int, Int}, Int} = 100,
3+
shuffle = true,
4+
noise = nothing,
5+
random_state = nothing)::DataFrame
6+
Make two interleaving half circles. Sklearn interface to make_moons.
7+
# Arguments
8+
- `n_samples::Union{Tuple{Int, Int}, Int} = 100`: If int, the total number of points generated. If two-element tuple, number of points in each of two moons.
9+
- `shuffle::Bool = true`: Whether to shuffle the samples.
10+
- `noise::Union{Nothing, Float64} = nothing`: Standard deviation of Gaussian noise added to the data.
11+
- `random_state::Union{Int, Nothing} = nothing`: Determines random number generation for dataset shuffling and noise.
12+
Reference: [link](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_moons.html)
13+
"""
14+
function generate_moons(;n_samples::Union{Tuple{Int, Int}, Int} = 100,
15+
shuffle::Bool = true,
16+
noise::Union{Nothing, Float64} = nothing,
17+
random_state::Union{Int, Nothing} = nothing)::DataFrame
18+
19+
(features, labels) = datasets.make_moons( n_samples=n_samples,
20+
shuffle = shuffle,
21+
noise = noise,
22+
random_state = random_state)
23+
24+
return convert(features, labels)
25+
end
26+
27+
"""
28+
generate_blobs(; n_samples::Union{Int, Array{Int, 1}} = 100,
29+
n_features::Int = 2,
30+
centers::Union{Int, Union{Nothing, Array{Float64, 2}}} = nothing,
31+
cluster_std::Union{Float64, Array{Float64, 1}} = 1.0,
32+
center_box = (-10.0, 10.0),
33+
shuffle::Bool = true,
34+
random_state::Union{Int, Nothing} = nothing)::DataFrame
35+
Generate isotropic Gaussian blobs for clustering. Sklearn interface to make_blobs.
36+
# Arguments
37+
- `n_samples = 100`: If int, it is the total number of points equally divided among clusters. If array-like, each element of the sequence indicates the number of samples per cluster.
38+
- `n_features = 2`: The number of features for each sample.
39+
- `centers::Union{Int, Union{Nothing, Array{Float64, 2}}} = nothing`: The number of centers to generate, or the fixed center locations. If n_samples is an int and centers is None, 3 centers are generated. If n_samples is array-like, centers must be either None or an array of length equal to the length of n_samples.
40+
- `cluster_std::Union{Float64, Array{Float64, 1}} = 1.0`: The standard deviation of the clusters.
41+
- `center_box::Tuple{Float64, Float64} = (-10.0, 10.0)`: The bounding box for each cluster center when centers are generated at random.
42+
- `shuffle::Bool = true`: Shuffle the samples.
43+
- `random_state::Union{Int, Nothing} = nothing`: Determines random number generation for dataset shuffling and noise.
44+
Reference: [link](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_blobs.html)
45+
"""
46+
function generate_blobs(;n_samples::Union{Int, Array{Int, 1}} = 100,
47+
n_features::Int = 2,
48+
centers::Union{Int, Union{Nothing, Array{Float64, 2}}} = nothing,
49+
cluster_std::Union{Float64, Array{Float64, 1}} = 1.0,
50+
center_box::Tuple{Float64, Float64} = (-10.0, 10.0),
51+
shuffle::Bool = true,
52+
random_state::Union{Int, Nothing} = nothing)::DataFrame
53+
54+
(features, labels) = datasets.make_blobs( n_samples = n_samples,
55+
n_features = n_features,
56+
centers = centers,
57+
cluster_std = cluster_std,
58+
center_box = center_box,
59+
shuffle = shuffle,
60+
random_state = random_state,
61+
return_centers = false)
62+
63+
return convert(features, labels)
64+
end
65+
66+
"""
67+
generate_s_curve(; n_samples::Int = 100,
68+
noise = nothing,
69+
random_state = nothing)::DataFrame
70+
Generate an S curve dataset. Sklearn interface to make_s_curve.
71+
# Arguments
72+
- `n_samples::Int = 100`: The number of sample points on the S curve.
73+
- `noise::Union{Nothing, Float64} = nothing`: Standard deviation of Gaussian noise added to the data.
74+
- `random_state::Union{Int, Nothing} = nothing`: Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls.
75+
Reference: [link](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_s_curve.html)
76+
"""
77+
function generate_s_curve(; n_samples::Int = 100,
78+
noise::Float64 = 0.0,
79+
random_state::Union{Int, Nothing} = nothing)::DataFrame
80+
81+
(features, labels) = datasets.make_s_curve( n_samples = n_samples,
82+
noise = noise,
83+
random_state = random_state)
84+
85+
return convert(features, labels)
86+
end

0 commit comments

Comments
 (0)