Skip to content

Commit bcb9a4a

Browse files
authored
Adds example exercising tfjs-data's CSV api (#210)
New example shows how to connect to remote CSVs uisng tfjs-data. Illustrates * counting the records in a remote CSV * determining the column names within a remote CSV * Extracting and rendering a selected record from a remote CSV
1 parent 4c02b54 commit bcb9a4a

File tree

6 files changed

+5933
-0
lines changed

6 files changed

+5933
-0
lines changed

data-csv/.babelrc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"presets": [
3+
[
4+
"env",
5+
{
6+
"esmodules": false,
7+
"targets": {
8+
"browsers": [
9+
"> 3%"
10+
]
11+
}
12+
}
13+
]
14+
],
15+
"plugins": [
16+
"transform-runtime"
17+
]
18+
}

data-csv/index.html

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
<!--
2+
@license
3+
Copyright 2019 Google LLC. All Rights Reserved.
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
=============================================================================
16+
-->
17+
18+
<html>
19+
20+
21+
<head>
22+
<meta charset="UTF-8">
23+
<meta name="viewport" content="width=device-width, initial-scale=1">
24+
<link rel="stylesheet" href="../shared/tfjs-examples.css" />
25+
<style>
26+
.status {
27+
border: none;
28+
font-size: 20px;
29+
text-align: center;
30+
resize: none;
31+
}
32+
33+
.query {
34+
font-size: 20px;
35+
text-align: left;
36+
resize: none;
37+
38+
}
39+
40+
.textarea {
41+
font-size: 20px;
42+
text-align: center;
43+
resize: none;
44+
}
45+
46+
.container {
47+
width: 80%;
48+
margin-left: auto;
49+
margin-right: auto;
50+
text-align: center;
51+
}
52+
53+
.divTable {
54+
display: table;
55+
width: 100%;
56+
}
57+
58+
.divTableRow {
59+
display: table-row;
60+
}
61+
62+
.divTableHeading {
63+
background-color: #EEE;
64+
display: table-header-group;
65+
}
66+
67+
.divTableCell,
68+
.divTableHead {
69+
border: 1px solid #999999;
70+
display: table-cell;
71+
padding: 3px 10px;
72+
}
73+
74+
.divTableCellKey {
75+
border: 1px solid #999999;
76+
display: table-cell;
77+
padding: 3px 10px;
78+
font-weight: bold;
79+
}
80+
81+
.divTableHeading {
82+
background-color: #EEE;
83+
display: table-header-group;
84+
font-weight: bold;
85+
}
86+
87+
.divTableFoot {
88+
background-color: #EEE;
89+
display: table-footer-group;
90+
font-weight: bold;
91+
}
92+
93+
.divTableBody {
94+
display: table-row-group;
95+
}
96+
</style>
97+
</head>
98+
99+
<body>
100+
<div class="container">
101+
<h3>TensorFlow.js: Working with CSV files in tfjs-data</h3>
102+
<div id="Preset URL buttons">
103+
<button id="boston-button">Boston Housing CSV</button>
104+
<button id="dresses-button">Dresses Sales OpenML CSV</button>
105+
<button id="jena-climate-button">Jena Climate CSV</button>
106+
<button id="suny-button">State University Of New York Campuses CSV</button>
107+
</div>
108+
<p></p>
109+
Enter the URL of your csv file below, or click one of the above buttons to
110+
explore some CSVs we've hosted. Note that you may need to update your web
111+
host to allow CORS requests in order to access your data this way.<p></p>
112+
<textarea class="query" rows="3" cols="64" id="query-url">http://path/to/your.csv</textarea>
113+
<p></p>
114+
<div id="button-div">
115+
</div>
116+
<p></p>
117+
<textarea class="status" rows="6" cols="60" readonly="true" id="status">
118+
Select a CSV URL and begin by counting the number of rows in the CSV.
119+
</textarea>
120+
<div class="divTable">
121+
<!-- Count rows-->
122+
<div class="divTableBody">
123+
<div class="divTableRow">
124+
<div class="divTableCell">
125+
<button id="count-rows">Count rows</button>
126+
</div>
127+
<div class="divTableCell" id="row-count-output">
128+
</div>
129+
</div>
130+
<!-- Get column names-->
131+
<div class="divTableRow">
132+
<div class="divTableCell">
133+
<button id="get-column-names">Get column names</button>
134+
</div>
135+
<div class="divTableCell" id="column-names-output">
136+
<div id="column-names-message">
137+
</div>
138+
<div id="column-names-output-container">
139+
</div>
140+
</div>
141+
</div>
142+
</div>
143+
<!-- Get sample row-->
144+
<div class="divTableRow">
145+
<div class="divTableCell">
146+
<button id="get-sample-row">Get a sample row</button><p></p>
147+
sample index (zero-index): <input type="number" id="which-sample-input" value="2">
148+
</div>
149+
<div class="divTableCell">
150+
<div id="sample-row-message">
151+
</div>
152+
<div id="sample-row-output-container">
153+
</div>
154+
</div>
155+
</div>
156+
</div>
157+
</div>
158+
</div>
159+
160+
</div>
161+
<script src="./index.js"></script>
162+
</body>
163+
164+
</html>

data-csv/index.js

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
/**
2+
* @license
3+
* Copyright 2019 Google LLC. All Rights Reserved.
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
* =============================================================================
16+
*/
17+
18+
import * as tf from '@tensorflow/tfjs';
19+
20+
import * as ui from './ui';
21+
22+
23+
// Boston Housing CSV
24+
const BOSTON_HOUSING_CSV_URL =
25+
'https://storage.googleapis.com/tfjs-examples/multivariate-linear-regression/data/train-data.csv';
26+
// Jena Climate CSV
27+
const JENA_CLIMATE_CSV_URL =
28+
'https://storage.googleapis.com/learnjs-data/jena_climate/jena_climate_2009_2016.csv';
29+
// Dresses Sales data
30+
// Originally from https://www.openml.org/d/23381
31+
const DRESSES_SALES_CSV_URL =
32+
'https://storage.googleapis.com/learnjs-data/csv-datasets/dresses-sales-openml.csv';
33+
// State University of New York Campus Data from NYS.gov
34+
// Originally from
35+
// https://data.ny.gov/Education/State-University-of-New-York-SUNY-Campus-Locations/3cij-nwhw
36+
const SUNY_CSV_URL =
37+
'https://storage.googleapis.com/learnjs-data/csv-datasets/State_University_of_New_York__SUNY__Campus_Locations_with_Websites__Enrollment_and_Select_Program_Offerings.csv';
38+
39+
40+
/**
41+
* Builds a CSV Dataset object using the URL specified in the UI. Then iterates
42+
* over all eleemnts in that dataset to count them. Updates the UI accordingly.
43+
*/
44+
async function countRowsHandler() {
45+
const url = ui.getQueryElement().value;
46+
ui.updateStatus(`Building data object to connect to ${url}`);
47+
const myData = tf.data.csv(url);
48+
let i = 0;
49+
ui.updateRowCountOutput(`counting...`);
50+
const updateFn = x => {
51+
i += 1;
52+
if (i % 1000 === 0) {
53+
ui.updateStatus(`Counting ... ${i} rows of data in the CSV so far...`);
54+
}
55+
};
56+
try {
57+
ui.updateStatus('Attempting to count records in CSV.');
58+
// Note that `tf.data.Dataset.forEach()` is an async function. Without the
59+
// `await` here, there is no control over when the updataFn's will execute,
60+
// thus, they will likely execute *after* we update the status with the
61+
// final count, resulting in a display of "Counted 0 rows.".
62+
await myData.forEach(x => updateFn(x));
63+
} catch (e) {
64+
const errorMsg = `Caught an error iterating over ${url}. ` +
65+
`This URL might not be valid or might not support CORS requests.` +
66+
` Check the developer console for CORS errors.` + e;
67+
ui.updateRowCountOutput(errorMsg);
68+
return;
69+
}
70+
ui.updateStatus(`Done counting rows.`);
71+
ui.updateRowCountOutput(`Counted ${i} rows of data in the CSV.`);
72+
};
73+
74+
/**
75+
* Builds a CSV Dataset object using the URL specified in the UI. Then connects
76+
* with the dataset object to retrieve the column names. Updates the UI
77+
* accordingly.
78+
*/
79+
async function getColumnNamesHandler() {
80+
ui.updateColumnNamesOutput([]);
81+
const url = ui.getQueryElement().value;
82+
ui.updateStatus(`Attempting to connect to CSV resource at ${url}`);
83+
const myData = tf.data.csv(url);
84+
ui.updateStatus('Got the data connection ... determining the column names');
85+
ui.updateColumnNamesMessage('Determining column names.');
86+
try {
87+
const columnNames = await myData.columnNames();
88+
ui.updateStatus('Done getting column names.');
89+
ui.updateColumnNamesMessage('');
90+
ui.updateColumnNamesOutput(columnNames);
91+
} catch (e) {
92+
const errorMsg = `Caught an error retrieving column names from ${url}. ` +
93+
`This URL might not be valid or might not support CORS requests.` +
94+
` Check the developer console for CORS errors.` + e;
95+
ui.updateColumnNamesMessage(errorMsg);
96+
return;
97+
}
98+
};
99+
100+
/**
101+
* Accesses the CSV to collect a single specified row. The row index
102+
* is specified by the UI element managed in the ui library.
103+
*/
104+
async function getSampleRowHandler() {
105+
const url = ui.getQueryElement().value;
106+
ui.updateStatus(`Attempting to connect to CSV resource at ${url}`);
107+
const myData = tf.data.csv(url);
108+
ui.updateStatus('Got the data connection ... getting requested sample');
109+
const sampleIndex = ui.getSampleIndex();
110+
if (sampleIndex < 0 || isNaN(sampleIndex)) {
111+
const msg = `Can not get samples with negative or NaN index. (Requested ${
112+
sampleIndex}).`;
113+
ui.updateStatus(msg);
114+
ui.updateSampleRowMessage(msg);
115+
ui.updateSampleRowOutput([]);
116+
return;
117+
}
118+
let sample;
119+
try {
120+
sample = await myData.skip(sampleIndex).take(1).toArray();
121+
} catch (e) {
122+
let errorMsg = `Caught an error retrieving sample from ${url}. `;
123+
errorMsg +=
124+
'This URL might not be valid or might not support CORS requests.';
125+
errorMsg += ' Check the developer console for CORS errors.';
126+
errorMsg += e;
127+
ui.updateSampleRowMessage(errorMsg);
128+
return;
129+
}
130+
if (sample.length === 0) {
131+
// When samples are requested beyond the end of the CSV, the data will
132+
// return empty.
133+
const msg = `Can not get sample index ${
134+
sampleIndex}. This may be beyond the end of the dataset.`;
135+
ui.updateStatus(msg);
136+
ui.updateSampleRowMessage(msg);
137+
ui.updateSampleRowOutput([]);
138+
return;
139+
}
140+
ui.updateStatus(`Done getting sample ${sampleIndex}.`);
141+
ui.updateSampleRowMessage(`Done getting sample ${sampleIndex}.`);
142+
ui.updateSampleRowOutput(sample[0]);
143+
};
144+
145+
/** Clears output messages and tables. */
146+
const resetOutputMessages = () => {
147+
ui.updateRowCountOutput('click "Count rows"');
148+
ui.updateColumnNamesMessage('click "Get column names"');
149+
ui.updateColumnNamesOutput([]);
150+
ui.updateSampleRowMessage('select an index and click "Get a sample row"');
151+
ui.updateSampleRowOutput([]);
152+
};
153+
154+
/** Sets up handlers for the user affordences, including all buttons. */
155+
document.addEventListener('DOMContentLoaded', async () => {
156+
resetOutputMessages();
157+
158+
// Helper to connect preset URL buttons.
159+
const connectURLButton = (buttonId, url, statusMessage) => {
160+
document.getElementById(buttonId).addEventListener('click', async () => {
161+
ui.getQueryElement().value = url;
162+
resetOutputMessages();
163+
ui.updateStatus(statusMessage);
164+
}, false);
165+
};
166+
167+
connectURLButton(
168+
'jena-climate-button', JENA_CLIMATE_CSV_URL,
169+
`Jena climate data is a record of atmospheric conditions taken over a ` +
170+
`period of time. In this dataset, 14 different quantities (such ` +
171+
`as air temperature, atmospheric pressure, humidity, wind ` +
172+
`direction, and so on) were recorded every 10 minutes, over ` +
173+
`several years. Note that counting all the rows of this dataset` +
174+
`might take a while`);
175+
connectURLButton(
176+
'boston-button', BOSTON_HOUSING_CSV_URL,
177+
`"Boston Housing" is a commonly used dataset in introductory ML problems.`);
178+
connectURLButton(
179+
'dresses-button', DRESSES_SALES_CSV_URL,
180+
`This dataset contains attributes of dresses and their recommendations ` +
181+
`according to their sales. Provided courtesy of OpenML. Find more ` +
182+
`curated ML datasets at https://www.openml.org/d/23381`);
183+
connectURLButton(
184+
'suny-button', SUNY_CSV_URL,
185+
`Campuses which comprise the State University of New York (SUNY) System. ` +
186+
`Highlights information on Undergraduate and Graduate enrollment ` +
187+
`as well as some program area offerings. Find more datasets at ` +
188+
`https://data.ny.gov/`);
189+
190+
// Connect action buttons.
191+
document.getElementById('count-rows')
192+
.addEventListener('click', countRowsHandler, false);
193+
document.getElementById('get-column-names')
194+
.addEventListener('click', getColumnNamesHandler, false);
195+
document.getElementById('get-sample-row')
196+
.addEventListener('click', getSampleRowHandler, false);
197+
198+
// Connect sample index to fetch on change.
199+
document.getElementById('which-sample-input')
200+
.addEventListener('change', getSampleRowHandler, false);
201+
}, false);

0 commit comments

Comments
 (0)