Skip to content

Commit 945d788

Browse files
committed
Allow exporting .parquet files without geometry
1 parent 454dd6e commit 945d788

2 files changed

Lines changed: 107 additions & 21 deletions

File tree

src/geoparquet/mapshaper-geoparquet-export.mjs

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { exportLayerAsGeoJSON } from '../geojson/geojson-export';
22
import { parseCrsString, parsePrj } from '../crs/mapshaper-projections';
33
import { runningInBrowser } from '../mapshaper-env';
44
import { getFileExtension } from '../utils/mapshaper-filename-utils';
5-
import { stop } from '../utils/mapshaper-logging';
5+
import { stop, warn } from '../utils/mapshaper-logging';
66
import utils from '../utils/mapshaper-utils';
77
import require from '../mapshaper-require';
88

@@ -19,22 +19,26 @@ export async function exportGeoParquet(dataset, opts, filenameOverride) {
1919
extension = getFileExtension(opts.file) || extension;
2020
}
2121
dataset.layers.forEach(function(lyr) {
22-
if (!lyr.geometry_type) {
23-
stop('GeoParquet export requires a geometry layer');
24-
}
2522
var features = exportLayerAsGeoJSON(lyr, dataset, opts, true, null);
26-
var output = buildGeoParquetColumns(features, writer);
27-
var geoMetadata = buildGeoMetadata(features, dataset);
28-
var content = writer.parquetWriteBuffer({
23+
var hasGeometry = features.some(function(feat) {
24+
return !!feat.geometry;
25+
});
26+
var output = buildGeoParquetColumns(features, hasGeometry);
27+
var writeOptions = {
2928
columnData: output.columnData,
3029
codec: compression.codec,
3130
compressors: compression.compressors,
32-
pageSize: compression.pageSize,
33-
kvMetadata: [{
31+
pageSize: compression.pageSize
32+
};
33+
if (hasGeometry) {
34+
writeOptions.kvMetadata = [{
3435
key: 'geo',
35-
value: JSON.stringify(geoMetadata)
36-
}]
37-
});
36+
value: JSON.stringify(buildGeoMetadata(features, dataset))
37+
}];
38+
} else {
39+
warn('GeoParquet export: layer has no geometry; writing attribute data only.');
40+
}
41+
var content = writer.parquetWriteBuffer(writeOptions);
3842
files.push({
3943
filename: filenameOverride || (lyr.name + '.' + extension),
4044
content: content
@@ -43,23 +47,31 @@ export async function exportGeoParquet(dataset, opts, filenameOverride) {
4347
return files;
4448
}
4549

46-
function buildGeoParquetColumns(features, writer) {
50+
function buildGeoParquetColumns(features, includeGeometry) {
4751
var geometryName = 'geometry';
4852
var names = getPropertyNames(features);
4953
var columnData = [];
50-
columnData.push({
51-
name: geometryName,
52-
data: features.map(function(feat) {
53-
return feat.geometry || null;
54-
}),
55-
type: 'GEOMETRY'
56-
});
54+
if (features.length === 0) {
55+
stop('GeoParquet export requires at least one record');
56+
}
57+
if (includeGeometry) {
58+
columnData.push({
59+
name: geometryName,
60+
data: features.map(function(feat) {
61+
return feat.geometry || null;
62+
}),
63+
type: 'GEOMETRY'
64+
});
65+
}
5766
names.forEach(function(name) {
5867
var values = features.map(function(feat) {
5968
return feat.properties ? feat.properties[name] : null;
6069
});
6170
columnData.push(buildAttributeColumn(name, values));
6271
});
72+
if (columnData.length === 0) {
73+
stop('GeoParquet export requires geometry or attribute data');
74+
}
6375
return {columnData: columnData, geometryColumn: geometryName};
6476
}
6577

test/geoparquet-export-test.mjs

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import api from '../mapshaper.js';
22
import assert from 'assert';
33
import { parquetMetadataAsync } from 'hyparquet';
4-
import { fixPath } from './helpers';
4+
import { fixPath, captureLogCallsAsync } from './helpers';
55

66
describe('geoparquet export', function() {
77
it('exports GeoParquet and round-trips via async import', async function() {
@@ -52,6 +52,74 @@ describe('geoparquet export', function() {
5252
assert(/\.parquet$/i.test(names[0]));
5353
});
5454

55+
it('exports tabular layers as Parquet without a geometry column', async function() {
56+
var input = [{
57+
name: 'alpha',
58+
value: 3
59+
}, {
60+
name: 'beta',
61+
value: 7
62+
}];
63+
var out = await captureLogCallsAsync(function() {
64+
return api.applyCommands('-i in.json -o format=geoparquet', {'in.json': input});
65+
});
66+
var output = out.result;
67+
var fileName = Object.keys(output)[0];
68+
var metadata = await parquetMetadataAsync(toArrayBuffer(output[fileName]));
69+
var fields = getParquetFieldNames(metadata);
70+
assert(!fields.includes('geometry'));
71+
assert(/writing attribute data only/.test(out.log.join('\n')));
72+
73+
var dataset = await api.internal.importContentAsync({
74+
parquet: {
75+
filename: fileName,
76+
content: output[fileName]
77+
}
78+
}, {});
79+
assert.equal(dataset.layers[0].geometry_type, null);
80+
assert.deepEqual(dataset.layers[0].data.getRecords(), input);
81+
});
82+
83+
it('exports null-geometry features with attributes as Parquet tables', async function() {
84+
var input = {
85+
type: 'FeatureCollection',
86+
features: [{
87+
type: 'Feature',
88+
properties: {name: 'alpha'},
89+
geometry: null
90+
}]
91+
};
92+
var out = await captureLogCallsAsync(function() {
93+
return api.applyCommands('-i in.json -o format=geoparquet', {'in.json': input});
94+
});
95+
var output = out.result;
96+
var fileName = Object.keys(output)[0];
97+
var metadata = await parquetMetadataAsync(toArrayBuffer(output[fileName]));
98+
assert(!getParquetFieldNames(metadata).includes('geometry'));
99+
assert(/writing attribute data only/.test(out.log.join('\n')));
100+
});
101+
102+
it('rejects empty GeoParquet output layers', async function() {
103+
var emptyInput = {
104+
type: 'FeatureCollection',
105+
features: []
106+
};
107+
var nullOnlyInput = {
108+
type: 'FeatureCollection',
109+
features: [{
110+
type: 'Feature',
111+
properties: null,
112+
geometry: null
113+
}]
114+
};
115+
await assert.rejects(function() {
116+
return api.applyCommands('-i in.json -o format=geoparquet', {'in.json': emptyInput});
117+
}, /requires at least one record/);
118+
await assert.rejects(function() {
119+
return api.applyCommands('-i in.json -o format=geoparquet', {'in.json': nullOnlyInput});
120+
}, /requires at least one record|requires geometry or attribute data/);
121+
});
122+
55123
it('exports ZSTD-compressed GeoParquet when requested', async function() {
56124
var text = 'abcdefghijklmnopqrstuvwxyz'.repeat(40);
57125
var input = {
@@ -137,6 +205,12 @@ function getParquetCodecs(metadata) {
137205
return Object.keys(index).sort();
138206
}
139207

208+
function getParquetFieldNames(metadata) {
209+
return metadata.schema.map(function(field) {
210+
return field.name;
211+
});
212+
}
213+
140214
function toArrayBuffer(content) {
141215
if (content instanceof ArrayBuffer) return content;
142216
if (content instanceof Uint8Array) {

0 commit comments

Comments
 (0)