@@ -4,7 +4,8 @@ import io.tarantool.driver.api.conditions.Conditions
4
4
import io .tarantool .driver .api .tuple .{DefaultTarantoolTupleFactory , TarantoolTuple }
5
5
import io .tarantool .driver .mappers .DefaultMessagePackMapperFactory
6
6
import io .tarantool .spark .connector .toSparkContextFunctions
7
- import org .apache .spark .sql .{Encoders , Row }
7
+ import org .apache .spark .SparkException
8
+ import org .apache .spark .sql .{Encoders , Row , SaveMode }
8
9
import org .scalatest .funsuite .AnyFunSuite
9
10
import org .scalatest .matchers .should .Matchers
10
11
import org .scalatest .{BeforeAndAfterAll , BeforeAndAfterEach }
@@ -27,24 +28,25 @@ class TarantoolSparkWriteClusterTest
27
28
28
29
private val orderSchema = Encoders .product[Order ].schema
29
30
30
- test(" should write a list of objects to the space" ) {
31
+ test(" should write a dataset of objects to the specified space with different modes " ) {
31
32
32
33
val orders = Range (1 , 10 ).map(i => Order (i))
33
34
34
- val df = spark.createDataFrame(
35
+ var df = spark.createDataFrame(
35
36
spark.sparkContext.parallelize(orders.map(order => order.asRow())),
36
37
orderSchema
37
38
)
38
39
40
+ // Insert, the partition is empty at first
39
41
df.write
40
42
.format(" org.apache.spark.sql.tarantool" )
41
- .mode(" overwrite " )
43
+ .mode(SaveMode . Append )
42
44
.option(" tarantool.space" , SPACE_NAME )
43
45
.save()
44
46
45
- val actual = spark.sparkContext.tarantoolSpace(SPACE_NAME , Conditions .any()).collect()
46
-
47
+ var actual = spark.sparkContext.tarantoolSpace(SPACE_NAME , Conditions .any()).collect()
47
48
actual.length should be > 0
49
+
48
50
val sorted = actual.sorted[TarantoolTuple ](new Ordering [TarantoolTuple ]() {
49
51
override def compare (x : TarantoolTuple , y : TarantoolTuple ): Int =
50
52
x.getInteger(" id" ).compareTo(y.getInteger(" id" ))
@@ -70,20 +72,139 @@ class TarantoolSparkWriteClusterTest
70
72
)
71
73
actualItem.getBoolean(" cleared" ) should equal(expectedItem.getBoolean(6 ))
72
74
}
75
+
76
+ // Replace
77
+ df = spark.createDataFrame(
78
+ spark.sparkContext.parallelize(
79
+ orders
80
+ .map(order => order.changeOrderType(order.orderType + " 222" ))
81
+ .map(order => order.asRow())
82
+ ),
83
+ orderSchema
84
+ )
85
+
86
+ df.write
87
+ .format(" org.apache.spark.sql.tarantool" )
88
+ .mode(SaveMode .Overwrite )
89
+ .option(" tarantool.space" , SPACE_NAME )
90
+ .save()
91
+
92
+ actual = spark.sparkContext.tarantoolSpace(SPACE_NAME , Conditions .any()).collect()
93
+ actual.length should be > 0
94
+
95
+ actual.foreach(item => item.getString(" order_type" ) should endWith(" 222" ))
96
+
97
+ // Second insert with the same IDs produces an exception
98
+ var thrownException : Throwable = the[SparkException ] thrownBy {
99
+ df.write
100
+ .format(" org.apache.spark.sql.tarantool" )
101
+ .mode(SaveMode .Append )
102
+ .option(" tarantool.space" , SPACE_NAME )
103
+ .save()
104
+ }
105
+ thrownException.getMessage should include(" Duplicate key exists" )
106
+
107
+ // ErrorIfExists mode checks that partition is empty and provides an exception if it is not
108
+ thrownException = the[IllegalStateException ] thrownBy {
109
+ df.write
110
+ .format(" org.apache.spark.sql.tarantool" )
111
+ .mode(SaveMode .ErrorIfExists )
112
+ .option(" tarantool.space" , SPACE_NAME )
113
+ .save()
114
+ }
115
+ thrownException.getMessage should include(" already exists in Tarantool" )
116
+
117
+ // Clear the data and check that they are written in ErrorIfExists mode
118
+ container.executeScript(" test_teardown.lua" ).get()
119
+
120
+ df = spark.createDataFrame(
121
+ spark.sparkContext.parallelize(
122
+ orders
123
+ .map(order => order.changeOrderType(order.orderType + " 333" ))
124
+ .map(order => order.asRow())
125
+ ),
126
+ orderSchema
127
+ )
128
+
129
+ df.write
130
+ .format(" org.apache.spark.sql.tarantool" )
131
+ .mode(SaveMode .ErrorIfExists )
132
+ .option(" tarantool.space" , SPACE_NAME )
133
+ .save()
134
+
135
+ actual = spark.sparkContext.tarantoolSpace(SPACE_NAME , Conditions .any()).collect()
136
+ actual.length should be > 0
137
+
138
+ actual.foreach(item => item.getString(" order_type" ) should endWith(" 333" ))
139
+
140
+ // Check that new data are not written in Ignore mode if the partition is not empty
141
+ df = spark.createDataFrame(
142
+ spark.sparkContext.parallelize(
143
+ orders
144
+ .map(order => order.changeOrderType(order.orderType + " 444" ))
145
+ .map(order => order.asRow())
146
+ ),
147
+ orderSchema
148
+ )
149
+
150
+ df.write
151
+ .format(" org.apache.spark.sql.tarantool" )
152
+ .mode(SaveMode .Ignore )
153
+ .option(" tarantool.space" , SPACE_NAME )
154
+ .save()
155
+
156
+ actual = spark.sparkContext.tarantoolSpace(SPACE_NAME , Conditions .any()).collect()
157
+ actual.length should be > 0
158
+
159
+ actual.foreach(item => item.getString(" order_type" ) should endWith(" 333" ))
160
+
161
+ // Clear the data and check if they are written in Ignore mode
162
+ container.executeScript(" test_teardown.lua" ).get()
163
+
164
+ df.write
165
+ .format(" org.apache.spark.sql.tarantool" )
166
+ .mode(SaveMode .Ignore )
167
+ .option(" tarantool.space" , SPACE_NAME )
168
+ .save()
169
+
170
+ actual = spark.sparkContext.tarantoolSpace(SPACE_NAME , Conditions .any()).collect()
171
+ actual.length should be > 0
172
+
173
+ actual.foreach(item => item.getString(" order_type" ) should endWith(" 444" ))
73
174
}
74
175
176
+ test(" should throw an exception if the space name is not specified" ) {
177
+ assertThrows[IllegalArgumentException ] {
178
+ val orders = Range (1 , 10 ).map(i => Order (i))
179
+
180
+ val df = spark.createDataFrame(
181
+ spark.sparkContext.parallelize(orders.map(order => order.asRow())),
182
+ orderSchema
183
+ )
184
+
185
+ df.write
186
+ .format(" org.apache.spark.sql.tarantool" )
187
+ .mode(SaveMode .Overwrite )
188
+ .save()
189
+ }
190
+ }
75
191
}
76
192
77
193
case class Order (
78
194
id : Int ,
79
195
bucketId : Int ,
80
- orderType : String ,
196
+ var orderType : String ,
81
197
orderValue : BigDecimal ,
82
198
orderItems : List [Int ],
83
199
options : Map [String , String ],
84
200
cleared : Boolean
85
201
) {
86
202
203
+ def changeOrderType (newOrderType : String ): Order = {
204
+ orderType = newOrderType
205
+ this
206
+ }
207
+
87
208
def asRow (): Row =
88
209
Row (id, bucketId, orderType, orderValue, orderItems, options, cleared)
89
210
0 commit comments