From 5d0d10a05b4acca51a1d86985458477bc6128c70 Mon Sep 17 00:00:00 2001 From: Pedro Date: Tue, 1 Sep 2020 17:32:30 -0300 Subject: [PATCH 1/4] Adding generate circles function --- src/sklearn.jl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/sklearn.jl b/src/sklearn.jl index 4a65409..b15e79e 100644 --- a/src/sklearn.jl +++ b/src/sklearn.jl @@ -83,4 +83,19 @@ function generate_s_curve(; n_samples::Int = 100, random_state = random_state) return convert(features, labels) +end + +function generate_circles(; n_samples::Union{Int, Tuple{Int, Int}} = 100, + shuffle::Bool = true, + noise::Union{Nothing, Float64} = nothing, + random_state::Union{Int, Nothing} = nothing, + factor::Float64 = 0.8)::DataFrame + +(features, labels) = datasets.make_circles( n_samples = n_samples, + shuffle = shuffle, + noise = noise, + random_state = random_state, + factor = factor) + +return convert(features, labels) end \ No newline at end of file From d51c3e8e19acee3f7f8ed9957d6b50896d2a081c Mon Sep 17 00:00:00 2001 From: Pedro Date: Tue, 1 Sep 2020 17:33:00 -0300 Subject: [PATCH 2/4] Adding generate circles docstring --- src/sklearn.jl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/sklearn.jl b/src/sklearn.jl index b15e79e..c39e566 100644 --- a/src/sklearn.jl +++ b/src/sklearn.jl @@ -85,6 +85,22 @@ function generate_s_curve(; n_samples::Int = 100, return convert(features, labels) end +""" + function generate_circles(; n_samples::Int = 100, + shuffle::Bool = true, + noise::Float64 = 0.0, + random_state::Union{Int, Nothing} = nothing, + factor::Float64 = 0.8)::DataFrame +Make a large circle containing a smaller circle in 2d. Sklearn interface to make_circles. +# Arguments +- `n_samples::Union{Int, Tuple{Int, Int}} = 100`: If int, it is the total number of points generated. For odd numbers, the inner circle will have one point more than the outer circle. If two-element tuple, number of points in outer circle and inner circle. +- `shuffle::Bool = true`: Whether to shuffle the samples. +- `noise::Union{Nothing, Float64} = nothing`: Standard deviation of Gaussian noise added to the data. +- `random_state::Union{Int, Nothing} = nothing`: Determines random number generation for dataset shuffling and noise. Pass an int for reproducible output across multiple function calls. +- `factor::Float64 = 0.8`: Scale factor between inner and outer circle. +Reference: [link](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_circles.html) + +""" function generate_circles(; n_samples::Union{Int, Tuple{Int, Int}} = 100, shuffle::Bool = true, noise::Union{Nothing, Float64} = nothing, From 6d3cfef5ffce9e47bd64e69811787c30af70c330 Mon Sep 17 00:00:00 2001 From: Pedro Date: Tue, 1 Sep 2020 17:37:16 -0300 Subject: [PATCH 3/4] Adding generate circles in the readme --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 39d7ada..3187588 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ Dataset | Title make_blobs | Generate isotropic Gaussian blobs for clustering. | [link](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_moons.html) make_moons | Make two interleaving half circles | [link](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_blobs.html) make_s_curve | Generate an S curve dataset. | [link](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_s_curve.html) +make_circles | Make a large circle containing a smaller circle in 2d | [link](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_circles.html]) **Disclaimer**: SyntheticDatasets.jl borrows code and documentation from [scikit-learn](https://scikit-learn.org/stable/modules/classes.html#samples-generator) in the dataset module, but *it is not an official part From 33be56d941ce58b51e2690f9d625af4735bbe569 Mon Sep 17 00:00:00 2001 From: Pedro Date: Wed, 2 Sep 2020 23:18:55 -0300 Subject: [PATCH 4/4] Adding tests for the generate circles --- test/runtests.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 9f8278d..271a054 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -25,4 +25,9 @@ using Test @test size(data)[1] == samples @test size(data)[2] == 4 + data = SyntheticDatasets.generate_circles(n_samples = samples) + + @test size(data)[1] == samples + @test size(data)[2] == 3 + end \ No newline at end of file