17
17
// Omkar M Parkhi, Andrea Vedaldi, Andrew Zisserman and C. V. Jawahar
18
18
// https://www.robots.ox.ac.uk/~vgg/data/pets/
19
19
20
- import Batcher
21
20
import Foundation
22
21
import ModelSupport
23
22
import TensorFlow
24
23
25
- public typealias LazyDataSet = LazyMapSequence < [ URL ] , TensorPair < Float , Int32 > >
26
-
27
- public struct OxfordIIITPets : ImageSegmentationDataset {
28
- public typealias SourceDataSet = LazyDataSet
29
- public let training : Batcher < SourceDataSet >
30
- public let test : Batcher < SourceDataSet >
31
-
32
- public init ( batchSize: Int ) {
33
- self . init ( batchSize: batchSize, imageSize: 224 )
24
+ public struct OxfordIIITPets < Entropy: RandomNumberGenerator > {
25
+ /// Type of the collection of non-collated batches.
26
+ public typealias Batches = Slices < Sampling < [ ( file: URL , annotation: URL ) ] , ArraySlice < Int > > >
27
+ /// The type of the training data, represented as a sequence of epochs, which
28
+ /// are collection of batches.
29
+ public typealias Training = LazyMapSequence <
30
+ TrainingEpochs < [ ( file: URL , annotation: URL ) ] , Entropy > ,
31
+ LazyMapSequence < Batches , SegmentedImage >
32
+ >
33
+ /// The type of the validation data, represented as a collection of batches.
34
+ public typealias Validation = LazyMapSequence <
35
+ Slices < [ ( file: URL , annotation: URL ) ] > , LabeledImage
36
+ >
37
+ /// The training epochs.
38
+ public let training : Training
39
+ /// The validation batches.
40
+ public let validation : Validation
41
+
42
+ /// Creates an instance with `batchSize`.
43
+ ///
44
+ /// - Parameters:
45
+ /// - batchSize: Number of images provided per batch.
46
+ /// - entropy: A source of randomness used to shuffle sample
47
+ /// ordering. It will be stored in `self`, so if it is only pseudorandom
48
+ /// and has value semantics, the sequence of epochs is deterministic and not
49
+ /// dependent on other operations.
50
+ /// - device: The Device on which resulting Tensors from this dataset will be placed, as well
51
+ /// as where the latter stages of any conversion calculations will be performed.
52
+ public init ( batchSize: Int , entropy: Entropy , device: Device ) {
53
+ self . init (
54
+ batchSize: batchSize, entropy: entropy, device: device, imageSize: 224 )
34
55
}
35
56
57
+ /// Creates an instance with `batchSize` on `device` using `remoteBinaryArchiveLocation`.
58
+ ///
59
+ /// - Parameters:
60
+ /// - batchSize: Number of images provided per batch.
61
+ /// - entropy: A source of randomness used to shuffle sample ordering. It
62
+ /// will be stored in `self`, so if it is only pseudorandom and has value
63
+ /// semantics, the sequence of epochs is deterministic and not dependent
64
+ /// on other operations.
65
+ /// - device: The Device on which resulting Tensors from this dataset will be placed, as well
66
+ /// as where the latter stages of any conversion calculations will be performed.
67
+ /// - imageSize: The square width and height of the images returned from this dataset.
68
+ /// - localStorageDirectory: Where to place the downloaded and unarchived dataset.
36
69
public init (
37
- batchSize: Int ,
70
+ batchSize: Int , entropy : Entropy , device : Device , imageSize : Int ,
38
71
localStorageDirectory: URL = DatasetUtilities . defaultDirectory
39
- . appendingPathComponent ( " OxfordIIITPets " , isDirectory: true ) ,
40
- imageSize: Int
72
+ . appendingPathComponent ( " OxfordIIITPets " , isDirectory: true )
41
73
) {
42
74
do {
43
- training = Batcher < SourceDataSet > (
44
- on: try loadOxfordIITPetsTraining (
45
- imageSize: imageSize,
46
- localStorageDirectory: localStorageDirectory
47
- ) ,
48
- batchSize: batchSize,
49
- shuffle: true )
50
- test = Batcher < SourceDataSet > (
51
- on: try loadOxfordIIITPetsValidation (
52
- imageSize: imageSize,
53
- localStorageDirectory: localStorageDirectory
54
- ) ,
55
- batchSize: batchSize)
75
+ let trainingSamples = try loadOxfordIITPetsTraining (
76
+ localStorageDirectory: localStorageDirectory)
77
+
78
+ training = TrainingEpochs ( samples: trainingSamples, batchSize: batchSize, entropy: entropy)
79
+ . lazy. map { ( batches: Batches ) -> LazyMapSequence < Batches , LabeledImage > in
80
+ return batches. lazy. map {
81
+ makeBatch ( samples: $0, imageSize: imageSize, device: device)
82
+ }
83
+ }
84
+
85
+ let validationSamples = try loadOxfordIITPetsTraining (
86
+ localStorageDirectory: localStorageDirectory)
87
+
88
+ validation = validationSamples. inBatches ( of: batchSize) . lazy. map {
89
+ makeBatch ( samples: $0, imageSize: imageSize, device: device)
90
+ }
56
91
} catch {
57
- fatalError ( " Could not load Oxford IIIT Pets dataset: \( error) " )
92
+ fatalError ( " Could not load the Oxford IIIT Pets dataset: \( error) " )
58
93
}
59
94
}
60
95
}
61
96
97
+ extension OxfordIIITPets : ImageSegmentationData where Entropy == SystemRandomNumberGenerator {
98
+ /// Creates an instance with `batchSize`, using the SystemRandomNumberGenerator.
99
+ public init ( batchSize: Int , on device: Device = Device . default) {
100
+ self . init ( batchSize: batchSize, entropy: SystemRandomNumberGenerator ( ) , device: device)
101
+ }
102
+
103
+ /// Creates an instance with `batchSize`, `inputSize`, and `outputSize`, using the
104
+ /// SystemRandomNumberGenerator.
105
+ public init ( batchSize: Int , imageSize: Int , on device: Device = Device . default) {
106
+ self . init (
107
+ batchSize: batchSize, entropy: SystemRandomNumberGenerator ( ) , device: device,
108
+ imageSize: imageSize)
109
+ }
110
+ }
111
+
62
112
func downloadOxfordIIITPetsIfNotPresent( to directory: URL ) {
63
113
let downloadPath = directory. appendingPathComponent ( " images " , isDirectory: true ) . path
64
114
let directoryExists = FileManager . default. fileExists ( atPath: downloadPath)
@@ -80,21 +130,13 @@ func downloadOxfordIIITPetsIfNotPresent(to directory: URL) {
80
130
)
81
131
}
82
132
83
- func loadOxfordIIITPets( filename: String , in directory: URL , imageSize: Int ) throws -> LazyDataSet {
133
+ func loadOxfordIIITPets( filename: String , in directory: URL ) throws -> [ (
134
+ file: URL , annotation: URL
135
+ ) ] {
84
136
downloadOxfordIIITPetsIfNotPresent ( to: directory)
85
137
let imageURLs = getImageURLs ( filename: filename, directory: directory)
86
- return imageURLs. lazy. map { ( imageURL: URL ) -> TensorPair < Float , Int32 > in
87
- TensorPair < Float , Int32 > (
88
- first:
89
- Image ( jpeg: imageURL) . resized ( to: ( imageSize, imageSize) ) . tensor [ 0 ... , 0 ... , 0 ..< 3 ]
90
- / 255.0 ,
91
- second: Tensor < Int32 > (
92
- Image ( jpeg: makeAnnotationURL ( imageURL: imageURL, directory: directory) ) . resized (
93
- to: ( imageSize, imageSize)
94
- ) . tensor [ 0 ... , 0 ... , 0 ... 0 ] - 1
95
- )
96
- )
97
-
138
+ return imageURLs. lazy. map { ( imageURL: URL ) -> ( file: URL , annotation: URL ) in
139
+ ( file: imageURL, annotation: makeAnnotationURL ( imageURL: imageURL, directory: directory) )
98
140
}
99
141
}
100
142
@@ -114,20 +156,36 @@ func getImageURLs(filename: String, directory: URL) -> [URL] {
114
156
}
115
157
}
116
158
117
- func loadOxfordIITPetsTraining(
118
- imageSize: Int , localStorageDirectory: URL
119
- ) throws
120
- -> LazyDataSet
159
+ func loadOxfordIITPetsTraining( localStorageDirectory: URL ) throws -> [ ( file: URL , annotation: URL ) ]
121
160
{
122
161
return try loadOxfordIIITPets (
123
- filename: " trainval.txt " , in: localStorageDirectory, imageSize : imageSize )
162
+ filename: " trainval.txt " , in: localStorageDirectory)
124
163
}
125
164
126
- func loadOxfordIIITPetsValidation(
127
- imageSize: Int , localStorageDirectory: URL
128
- ) throws
129
- -> LazyDataSet
130
- {
165
+ func loadOxfordIIITPetsValidation( localStorageDirectory: URL ) throws -> [ (
166
+ file: URL , annotation: URL
167
+ ) ] {
131
168
return try loadOxfordIIITPets (
132
- filename: " test.txt " , in: localStorageDirectory, imageSize: imageSize)
169
+ filename: " test.txt " , in: localStorageDirectory)
170
+ }
171
+
172
+ fileprivate func makeBatch< BatchSamples: Collection > (
173
+ samples: BatchSamples , imageSize: Int , device: Device
174
+ ) -> SegmentedImage where BatchSamples. Element == ( file: URL , annotation: URL ) {
175
+ let images = samples. map ( \. file) . map { url -> Tensor < Float > in
176
+ Image ( jpeg: url) . resized ( to: ( imageSize, imageSize) ) . tensor [ 0 ... , 0 ... , 0 ..< 3 ]
177
+ }
178
+
179
+ var imageTensor = Tensor ( stacking: images)
180
+ imageTensor = Tensor ( copying: imageTensor, to: device)
181
+ imageTensor /= 255.0
182
+
183
+ let annotations = samples. map ( \. annotation) . map { url -> Tensor < Int32 > in
184
+ Tensor < Int32 > (
185
+ Image ( jpeg: url) . resized ( to: ( imageSize, imageSize) ) . tensor [ 0 ... , 0 ... , 0 ... 0 ] - 1 )
186
+ }
187
+ var annotationTensor = Tensor ( stacking: annotations)
188
+ annotationTensor = Tensor ( copying: annotationTensor, to: device)
189
+
190
+ return SegmentedImage ( data: imageTensor, label: annotationTensor)
133
191
}
0 commit comments