Skip to content

Commit cdb1c04

Browse files
authored
Batch of tests added (#45)
* renamed a few test files. tested the dropna function * indexer tests * subtraction * Subtract, divide, and equals * DataFrame operators - equality * added pop test * Sample test added. Added support for sample with replace.
1 parent 0560fb4 commit cdb1c04

File tree

4 files changed

+166
-11
lines changed

4 files changed

+166
-11
lines changed

src/Pandas.NET/DataFrames/DataFrame.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ public DataFrame(List<Series> data, Series index = null, List<Column> columns =
4545
index.size,
4646
columns.Count
4747
};
48+
49+
foreach (var s in _data)
50+
{
51+
s.SetIndex(_index);
52+
}
53+
4854
}
4955
}
5056
}
Lines changed: 59 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,76 @@
11
using System;
2+
using System.Collections;
23
using System.Collections.Generic;
34
using System.Linq;
5+
using System.Reflection.PortableExecutable;
6+
using Tensorflow.Util;
47

58
namespace PandasNet;
69

710
public partial class DataFrame
811
{
9-
public DataFrame sample(float frac= 0.8f, int random_state = 0)
12+
public DataFrame sample(int n = 0, float frac = 0, int random_state = 0, bool replace = false)
1013
{
11-
var rnd = new Random(random_state);
12-
var n = (int)Math.Ceiling((1 - frac) * _index.size);
13-
var excludeRowIndexArray = new int[n];
14-
for (int i = 0; i < n; i++)
14+
if (n == 0 && frac == 0)
15+
{
16+
throw new ArgumentException("Either n or frac should be greater than 0");
17+
}
18+
if (n != 0 && frac != 0)
19+
{
20+
throw new ArgumentException("Only one of n or frac should be greater than 0");
21+
}
22+
if (frac > 0)
1523
{
16-
excludeRowIndexArray[i] = rnd.Next(0, _index.size - 1);
24+
n = (int)Math.Ceiling(frac * _index.size);
1725
}
26+
if (n > _index.size)
27+
{
28+
throw new ArgumentException("n should be less than the size of the DataFrame");
29+
}
30+
31+
// treat axis as 0 for now. support for axis=1 should be added in the future
32+
var rnd = new Random(random_state);
1833

19-
var data = new List<Series>();
34+
// make a list that we can sample from
35+
List<int> sampleIndex = null;
36+
37+
if(!replace){
38+
// randomize the index and take the first n elements, no duplicates
39+
sampleIndex = Enumerable
40+
.Range(0, _index.size)
41+
.OrderBy(arg => rnd.Next())
42+
.Take(n).ToList();
43+
}
44+
else{
45+
// for each sample, randomly select an index allowing duplicates
46+
var sampleIndexes = Enumerable.Range(0, _index.size);
47+
for (int i = 0; i < n; i++)
48+
{
49+
sampleIndex.Add(sampleIndexes.ElementAt(rnd.Next(0, sampleIndexes.Count()-1)));
50+
}
51+
}
52+
53+
// initialize a dictionary to hold the data
54+
Dictionary<Column, ArrayList> data = new Dictionary<Column, ArrayList>();
2055
foreach (var s in _data)
2156
{
22-
var series = s.drop(excludeRowIndexArray);
23-
data.Add(series);
57+
// init the array based on the dtype
58+
ArrayList array =new ArrayList();
59+
data.Add(s.column, array);
60+
}
61+
62+
// fill the arrays with the sampled data
63+
for (int i = 0; i < sampleIndex.Count; i++)
64+
{
65+
foreach (var s in _data)
66+
{
67+
data[s.column].Add(s.data.GetValue(sampleIndex[i]));
68+
}
2469
}
25-
var index = _index.array<int>().Where(x => !excludeRowIndexArray.Contains(x)).ToArray();
26-
return new DataFrame(data, columns: _columns, index: new Series(index));
70+
71+
// create a new DataFrame with the sampled data
72+
DataFrame df = new DataFrame(data.Select(x => new Series(x.Value.ToArray(x.Key.DType), x.Key)).ToList(), index: new Series(sampleIndex.ToArray()));
73+
return df;
74+
2775
}
2876
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using PandasNet;
5+
using Tensorflow;
6+
7+
namespace Pandas.Test;
8+
9+
public class DataFramePopTest
10+
{
11+
[Fact]
12+
public void TestPopMethod()
13+
{
14+
// Arrange
15+
var dataFrameData = new List<Series>();
16+
dataFrameData.Add(new Series(new float[] { 1, 2, 3, 4, 5 }, new Column("column1", typeof(float))));
17+
dataFrameData.Add(new Series(new float[] { 6, 7, 8, 9, 10 }, new Column("column2", typeof(float))));
18+
var dataFrame = new DataFrame(dataFrameData);
19+
20+
// Act
21+
var poppedSeries = dataFrame.pop("column1");
22+
23+
// Assert
24+
Assert.True(poppedSeries.array<float>().SequenceEqual<float>([1, 2, 3, 4, 5]));
25+
Assert.False(dataFrame.columns.Where(c => c.Name == "column1").Any());
26+
Assert.True(dataFrame.columns.Where(c => c.Name == "column2").Any());
27+
}
28+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
using PandasNet;
2+
using System;
3+
using System.Collections.Generic;
4+
using System.Linq;
5+
6+
namespace Pandas.Test;
7+
public class DataFrameSampleTests
8+
{
9+
[Fact]
10+
public void TestSampleMethod()
11+
{
12+
// Arrange
13+
List<Series> data = new List<Series>
14+
{
15+
new Series(new double[] { 1, 2, 3, 4, 5 }, new Column { Name = "column1", DType = typeof(double) }),
16+
new Series(new double[] {6, 7, 8, 9, 10 }, new Column { Name = "column2", DType = typeof(double) })
17+
};
18+
var dataFrame = new DataFrame(data);
19+
20+
// Act
21+
var sampledDataFrame = dataFrame.sample(n: 3, random_state: 1);
22+
23+
// Assert
24+
Assert.Equal(3, sampledDataFrame.index.size);
25+
Assert.True(sampledDataFrame.columns.Where(x => x.Name == "column1").Any());
26+
Assert.True(sampledDataFrame.columns.Where(x => x.Name == "column2").Any());
27+
}
28+
29+
[Fact]
30+
public void TestSampleMethodWithFrac()
31+
{
32+
// Arrange
33+
List<Series> data = new List<Series>
34+
{
35+
new Series(new double[] { 1, 2, 3, 4, 5 }, new Column { Name = "column1", DType = typeof(double) }),
36+
new Series(new double[] {6, 7, 8, 9, 10 }, new Column { Name = "column2", DType = typeof(double) })
37+
};
38+
var dataFrame = new DataFrame(data);
39+
40+
// Act
41+
var sampledDataFrame = dataFrame.sample(frac: 0.4f, random_state: 1);
42+
var sampledDataFrame2 = dataFrame.sample(frac: 0.4f, random_state: 2);
43+
44+
// Assert
45+
Assert.Equal(2, sampledDataFrame.index.size); // 40% of 5 is 2
46+
Assert.True(sampledDataFrame.columns.Where(x => x.Name == "column1").Any());
47+
Assert.True(sampledDataFrame.columns.Where(x => x.Name == "column2").Any());
48+
Assert.Equal(2.0D, sampledDataFrame["column1"].GetValue(0));
49+
Assert.Equal(7.0D, sampledDataFrame["column2"].GetValue(0));
50+
51+
// assert for other state
52+
Assert.Equal(2, sampledDataFrame2.index.size); // 40% of 5 is 2
53+
Assert.Equal(5.0D, sampledDataFrame2["column1"].GetValue(0));
54+
Assert.Equal(10.0D, sampledDataFrame2["column2"].GetValue(0));
55+
}
56+
57+
[Fact]
58+
public void TestSampleMethodThrowsException()
59+
{
60+
// Arrange
61+
List<Series> data = new List<Series>
62+
{
63+
new Series(new double[] { 1, 2, 3, 4, 5 }, new Column { Name = "column1", DType = typeof(double) }),
64+
new Series(new double[] {6, 7, 8, 9, 10 }, new Column { Name = "column2", DType = typeof(double) })
65+
};
66+
var dataFrame = new DataFrame(data);
67+
68+
// Act & Assert
69+
Assert.Throws<ArgumentException>(() => dataFrame.sample());
70+
Assert.Throws<ArgumentException>(() => dataFrame.sample(n: 3, frac: 0.4f));
71+
Assert.Throws<ArgumentException>(() => dataFrame.sample(n: 6));
72+
}
73+
}

0 commit comments

Comments
 (0)