Skip to content

Commit e9eaa89

Browse files
committed
Merge branch 'master' into feature/generate_gaussian_quantiles
2 parents 56a079c + e09eaa2 commit e9eaa89

File tree

4 files changed

+52
-0
lines changed

4 files changed

+52
-0
lines changed

README.md

+6
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ make_gaussian_quantiles | Generate a swiss roll dataset.
4646
[scikit-learn](https://scikit-learn.org/stable/modules/classes.html#samples-generator) in the dataset module, but *it is not an official part
4747
of that project*. It is licensed under [MIT](LICENSE).
4848

49+
### Other Functions
50+
51+
Dataset | Title | Reference
52+
---------------------|-------------------------------------------------------------------------|--------------------------------------------------
53+
generate_twospirals | Generate two spirals dataset. | [link](https://la.mathworks.com/matlabcentral/fileexchange/41459-6-functions-for-generating-artificial-datasets)
54+
4955
[travis-img]: https://travis-ci.com/ATISLabs/SyntheticDatasets.jl.svg?branch=master
5056
[travis-url]: https://travis-ci.com/ATISLabs/SyntheticDatasets.jl
5157

src/SyntheticDatasets.jl

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ function __init__()
1111
end
1212

1313
include("sklearn.jl")
14+
include("matlab.jl")
1415

1516
function convert(features::Array{T, 2}, labels::Array{D, 1})::DataFrame where {T <: Number, D <: Number}
1617
df = DataFrame()

src/matlab.jl

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"""
2+
generate_twospirals(; n_samples::Int = 2000,
3+
start_degrees::Int = 90,
4+
total_degrees::Int = 570,
5+
noise::Float64 = 0.2
6+
Generate two spirals dataset. Return a Nx3 matrix, where each line contains the X,Y coordinates and the class of an instance.
7+
# Arguments
8+
- `n_samples::Int = 2000`: The total number of points generated.
9+
- `start_degrees::Int = 90`: Determines how far from the origin the spirals start.
10+
- `total_degrees::Int = 570`: Controls the lenght of the spirals.
11+
- `noise::Float64 = 0.2`: Determines the noise in the dataset.
12+
Reference: [link](https://la.mathworks.com/matlabcentral/fileexchange/41459-6-functions-for-generating-artificial-datasets)
13+
"""
14+
function generate_twospirals(; n_samples::Int = 2000,
15+
start_degrees::Int = 90,
16+
total_degrees::Int = 570,
17+
noise::Float64 = 0.2)
18+
start_degrees = deg2rad(start_degrees);
19+
20+
N1 = floor(Int, n_samples / 2);
21+
N2 = n_samples - N1;
22+
23+
n = start_degrees .+ sqrt.(rand(N1,1)) .* deg2rad(total_degrees);
24+
d1 = [-cos.(n).*n + rand(N1,1).*noise sin.(n).*n+rand(N1,1).*noise];
25+
26+
n = start_degrees .+ sqrt.(rand(N2,1)) .* deg2rad(total_degrees);
27+
d2 = [cos.(n).*n+rand(N2,1)*noise -sin.(n).*n+rand(N2,1)*noise];
28+
29+
features = [d1; d2]
30+
labels = [zeros(Int, N1); ones(Int, N1)]
31+
32+
return convert(features, labels);
33+
end

test/runtests.jl

+12
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ using Test
4747
@test size(data)[1] == samples
4848
@test size(data)[2] == features + 1
4949

50+
@test size(data)[1] == samples
51+
@test size(data)[2] == features + 1
52+
5053
data = SyntheticDatasets.generate_friedman1(n_samples = samples,
5154
n_features = features)
5255

@@ -86,3 +89,12 @@ using Test
8689
@test size(data)[1] == samples
8790
@test size(data)[2] == features + 1
8891
end
92+
93+
@testset "Matlab Generators" begin
94+
samples = 20000
95+
96+
data = SyntheticDatasets.generate_twospirals(n_samples = samples,
97+
noise = 2.2)
98+
99+
@test size(data)[1] == samples
100+
end

0 commit comments

Comments
 (0)