Skip to content
This repository was archived by the owner on Apr 23, 2025. It is now read-only.

Commit 6a687b4

Browse files
authored
Changing default download location for BostonHousing and CIFAR-10 datasets (#430)
* Replace links BostonHousing and CIFAR-10 datasets, along with RoBERTa checkpoint. * Formatting BostonHousing.swift.
1 parent 41f2cf8 commit 6a687b4

File tree

3 files changed

+25
-17
lines changed

3 files changed

+25
-17
lines changed

Datasets/BostonHousing/BostonHousing.swift

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import ModelSupport
2222
import TensorFlow
2323

2424
public struct BostonHousing {
25-
public let trainPercentage:Float = 0.8
25+
public let trainPercentage: Float = 0.8
2626
public let numRecords: Int
2727
public let numColumns: Int
2828
public let numTrainRecords: Int
@@ -33,35 +33,41 @@ public struct BostonHousing {
3333
public let yTest: Tensor<Float>
3434

3535
static func downloadBostonHousingIfNotPresent() -> String {
36-
let remoteURL = URL(string: "https://archive.ics.uci.edu/ml/machine-learning-databases/housing/")!
37-
let localURL = DatasetUtilities.defaultDirectory.appendingPathComponent("BostonHousing", isDirectory: true)
36+
let remoteURL = URL(
37+
string: "https://storage.googleapis.com/s4tf-hosted-binaries/datasets/BostonHousing/")!
38+
let localURL = DatasetUtilities.defaultDirectory.appendingPathComponent(
39+
"BostonHousing", isDirectory: true)
3840

3941
let downloadPath = localURL.path
4042
let directoryExists = FileManager.default.fileExists(atPath: downloadPath)
4143
let contentsOfDir = try? FileManager.default.contentsOfDirectory(atPath: downloadPath)
4244
let directoryEmpty = (contentsOfDir == nil) || (contentsOfDir!.isEmpty)
43-
45+
4446
if !directoryExists || directoryEmpty {
4547
let _ = DatasetUtilities.downloadResource(
4648
filename: "housing", fileExtension: "data",
4749
remoteRoot: remoteURL, localStorageDirectory: localURL,
4850
extract: false)
4951
}
5052

51-
return try! String(contentsOf: localURL.appendingPathComponent("housing.data"), encoding: String.Encoding.utf8)
53+
return try! String(
54+
contentsOf: localURL.appendingPathComponent("housing.data"),
55+
encoding: String.Encoding.utf8)
5256
}
53-
57+
5458
public init() {
5559
let data = BostonHousing.downloadBostonHousingIfNotPresent()
5660

5761
// Convert Space Separated CSV with no Header
58-
let dataRecords: [[Float]] = data.split(separator: "\n").map{ String($0).split(separator: " ").compactMap{ Float(String($0)) } }
62+
let dataRecords: [[Float]] = data.split(separator: "\n").map {
63+
String($0).split(separator: " ").compactMap { Float(String($0)) }
64+
}
5965

6066
let numRecords = dataRecords.count
6167
let numColumns = dataRecords[0].count
6268

63-
let dataFeatures = dataRecords.map{ Array($0[0..<numColumns-1]) }
64-
let dataLabels = dataRecords.map{ Array($0[(numColumns-1)...]) }
69+
let dataFeatures = dataRecords.map { Array($0[0..<numColumns - 1]) }
70+
let dataLabels = dataRecords.map { Array($0[(numColumns - 1)...]) }
6571

6672
self.numRecords = numRecords
6773
self.numColumns = numColumns
@@ -73,15 +79,17 @@ public struct BostonHousing {
7379
let yTrain = Array(Array(dataLabels[0..<numTrainRecords]).joined())
7480
let yTest = Array(Array(dataLabels[numTrainRecords...]).joined())
7581

76-
let xTrainDeNorm = Tensor<Float>(xTrain).reshaped(to: TensorShape([numTrainRecords, numColumns-1]))
77-
let xTestDeNorm = Tensor<Float>(xTest).reshaped(to: TensorShape([numTestRecords, numColumns-1]))
82+
let xTrainDeNorm = Tensor<Float>(xTrain).reshaped(
83+
to: TensorShape([numTrainRecords, numColumns - 1]))
84+
let xTestDeNorm = Tensor<Float>(xTest).reshaped(
85+
to: TensorShape([numTestRecords, numColumns - 1]))
7886

7987
// Normalize
8088
let mean = xTrainDeNorm.mean(alongAxes: 0)
8189
let std = xTrainDeNorm.standardDeviation(alongAxes: 0)
8290

83-
self.xTrain = (xTrainDeNorm - mean)/std
84-
self.xTest = (xTestDeNorm - mean)/std
91+
self.xTrain = (xTrainDeNorm - mean) / std
92+
self.xTest = (xTestDeNorm - mean) / std
8593
self.yTrain = Tensor<Float>(yTrain).reshaped(to: TensorShape([numTrainRecords, 1]))
8694
self.yTest = Tensor<Float>(yTest).reshaped(to: TensorShape([numTestRecords, 1]))
8795
}

Datasets/CIFAR10/CIFAR10.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ public struct CIFAR10: ImageClassificationDataset {
3131
self.init(
3232
batchSize: batchSize,
3333
remoteBinaryArchiveLocation: URL(
34-
string: "https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz")!,
34+
string: "https://storage.googleapis.com/s4tf-hosted-binaries/datasets/CIFAR10/cifar-10-binary.tar.gz")!,
3535
normalizing: true)
3636
}
3737

Models/Text/BERT.swift

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,7 @@ extension BERT {
510510
/// The URL where this pre-trained model can be downloaded from.
511511
public var url: URL {
512512
let bertPrefix = "https://storage.googleapis.com/bert_models/2018_"
513-
let robertaPrefix = "https://www.dropbox.com/s"
513+
let robertaPrefix = "https://storage.googleapis.com/s4tf-hosted-binaries/checkpoints/Text/RoBERTa"
514514
let albertPrefix = "https://storage.googleapis.com/tfhub-modules/google/albert"
515515
switch self {
516516
case .bertBase(false, false):
@@ -530,9 +530,9 @@ extension BERT {
530530
case .bertLarge(true, true):
531531
return URL(string: "\(bertPrefix)05_30/\(subDirectory).zip")!
532532
case .robertaBase:
533-
return URL(string: "\(robertaPrefix)/12ymhgwbfxm2ozf/base.zip?dl=1")!
533+
return URL(string: "\(robertaPrefix)/base.zip")!
534534
case .robertaLarge:
535-
return URL(string: "\(robertaPrefix)/jf6kxmdvxyfl4wz/large.zip?dl=1")!
535+
return URL(string: "\(robertaPrefix)/large.zip")!
536536
case .albertBase, .albertLarge, .albertXLarge, .albertXXLarge:
537537
return URL(string: "\(albertPrefix)_\(subDirectory)/1.tar.gz")!
538538
}

0 commit comments

Comments
 (0)