Skip to content

Commit e8c317d

Browse files
authored
Extend CSV Formatter functionality to CovJSON (#2323)
* Port #1926 Port #1926 with updates for CovJSON Formatting * Add CovJSON CSV Formatter * Update EDR Content Type * Add original tests back * Do not throw error on additional keys * Change x,y columns order to be at the start * Fix CI * Respond to PR feedback
1 parent ea5c2f0 commit e8c317d

3 files changed

Lines changed: 301 additions & 18 deletions

File tree

pygeoapi/api/environmental_data_retrieval.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -494,8 +494,14 @@ def get_collection_edr_query(api: API, request: APIRequest,
494494
HTTPStatus.INTERNAL_SERVER_ERROR, headers, request.format,
495495
'NoApplicableCode', msg)
496496

497+
headers['Content-Type'] = formatter.mimetype
498+
497499
if formatter.attachment:
498-
filename = f'{dataset}.{formatter.extension}'
500+
if p.filename is None:
501+
filename = f'{dataset}.{formatter.extension}'
502+
else:
503+
filename = f'{p.filename}'
504+
499505
cd = f'attachment; filename="{filename}"'
500506
headers['Content-Disposition'] = cd
501507

pygeoapi/formatter/csv_.py

Lines changed: 124 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
import io
3232
import logging
3333

34+
from shapely.geometry import shape as geojson_to_geom
35+
3436
from pygeoapi.formatter.base import BaseFormatter, FormatterSerializationError
3537

3638
LOGGER = logging.getLogger(__name__)
@@ -60,12 +62,30 @@ def write(self, options: dict = {}, data: dict = None) -> str:
6062
Generate data in CSV format
6163
6264
:param options: CSV formatting options
63-
:param data: dict of GeoJSON data
65+
:param data: dict of data
6466
6567
:returns: string representation of format
6668
"""
69+
type = data.get('type', '')
70+
LOGGER.debug(f'Formatting CSV from data type: {type}')
71+
72+
if 'Feature' in type or 'features' in data:
73+
return self._write_from_geojson(options, data)
74+
elif 'Coverage' in type or 'coverages' in data:
75+
return self._write_from_covjson(options, data)
76+
77+
def _write_from_geojson(
78+
self, options: dict = {}, data: dict = None, is_point=False
79+
) -> str:
80+
"""
81+
Generate GeoJSON data in CSV format
6782
68-
is_point = False
83+
:param options: CSV formatting options
84+
:param data: dict of GeoJSON data
85+
:param is_point: whether the features are point geometries
86+
87+
:returns: string representation of format
88+
"""
6989
try:
7090
fields = list(data['features'][0]['properties'].keys())
7191
except IndexError:
@@ -75,32 +95,123 @@ def write(self, options: dict = {}, data: dict = None) -> str:
7595
if self.geom:
7696
LOGGER.debug('Including point geometry')
7797
if data['features'][0]['geometry']['type'] == 'Point':
98+
LOGGER.debug('point geometry detected, adding x,y columns')
7899
fields.insert(0, 'x')
79100
fields.insert(1, 'y')
80101
is_point = True
81102
else:
82-
# TODO: implement wkt geometry serialization
83-
LOGGER.debug('not a point geometry, skipping')
103+
LOGGER.debug('not a point geometry, adding wkt column')
104+
fields.append('wkt')
84105

85106
LOGGER.debug(f'CSV fields: {fields}')
107+
output = io.StringIO()
108+
writer = csv.DictWriter(output, fields, extrasaction='ignore')
109+
writer.writeheader()
86110

87-
try:
88-
output = io.StringIO()
89-
writer = csv.DictWriter(output, fields)
90-
writer.writeheader()
111+
for feature in data['features']:
112+
self._add_feature(writer, feature, is_point)
113+
114+
return output.getvalue().encode('utf-8')
115+
116+
def _add_feature(
117+
self, writer: csv.DictWriter, feature: dict, is_point: bool
118+
) -> None:
119+
"""
120+
Add feature data to CSV writer
91121
92-
for feature in data['features']:
93-
fp = feature['properties']
122+
:param writer: CSV DictWriter
123+
:param feature: dict of GeoJSON feature
124+
:param is_point: whether the feature is a point geometry
125+
"""
126+
fp = feature['properties']
127+
try:
128+
if self.geom:
94129
if is_point:
95130
fp['x'] = feature['geometry']['coordinates'][0]
96131
fp['y'] = feature['geometry']['coordinates'][1]
97-
LOGGER.debug(fp)
98-
writer.writerow(fp)
99-
except ValueError as err:
132+
else:
133+
geom = geojson_to_geom(feature['geometry'])
134+
fp['wkt'] = geom.wkt
135+
136+
LOGGER.debug(f'Writing feature to row: {fp}')
137+
writer.writerow(fp)
138+
except (ValueError, IndexError) as err:
100139
LOGGER.error(err)
101140
raise FormatterSerializationError('Error writing CSV output')
102141

142+
def _write_from_covjson(
143+
self, options: dict = {}, data: dict = None
144+
) -> str:
145+
"""
146+
Generate CovJSON data in CSV format
147+
148+
:param options: CSV formatting options
149+
:param data: dict of CovJSON data
150+
151+
:returns: string representation of format
152+
"""
153+
LOGGER.debug('Processing CovJSON data for CSV output')
154+
units = {}
155+
for p, v in data['parameters'].items():
156+
unit = v['unit']['symbol']
157+
if isinstance(unit, dict):
158+
unit = unit.get('value')
159+
160+
units[p] = unit
161+
162+
fields = ['parameter', 'datetime', 'value', 'unit', 'x', 'y']
163+
LOGGER.debug(f'CSV fields: {fields}')
164+
output = io.StringIO()
165+
writer = csv.DictWriter(output, fields)
166+
writer.writeheader()
167+
168+
if data['type'] == 'Coverage':
169+
is_point = 'point' in data['domain']['domainType'].lower()
170+
self._add_coverage(writer, units, data, is_point)
171+
else:
172+
[
173+
self._add_coverage(writer, units, coverage, True)
174+
for coverage in data['coverages']
175+
if 'point' in coverage['domain']['domainType'].lower()
176+
]
103177
return output.getvalue().encode('utf-8')
104178

179+
@staticmethod
180+
def _add_coverage(
181+
writer: csv.DictWriter, units: dict, data: dict, is_point: bool = False
182+
) -> None:
183+
"""
184+
Add coverage data to CSV writer
185+
186+
:param writer: CSV DictWriter
187+
:param units: dict of parameter units
188+
:param data: dict of CovJSON coverage data
189+
:param is_point: whether the coverage is a point coverage
190+
"""
191+
192+
if is_point is False:
193+
LOGGER.warning('Non-point coverages not supported for CSV output')
194+
return
195+
196+
axes = data['domain']['axes']
197+
time_range = range(len(axes['t']['values']))
198+
199+
try:
200+
[
201+
writer.writerow({
202+
'parameter': parameter,
203+
'datetime': axes['t']['values'][time_value],
204+
'value': data['ranges'][parameter]['values'][time_value],
205+
'unit': units[parameter],
206+
'x': axes['x']['values'][-1],
207+
'y': axes['y']['values'][-1]
208+
})
209+
for parameter in data['ranges']
210+
for time_value in time_range
211+
]
212+
except ValueError as err:
213+
LOGGER.error(err)
214+
raise FormatterSerializationError('Error writing CSV output')
215+
105216
def __repr__(self):
106217
return f'<CSVFormatter> {self.name}'

tests/formatter/test_csv__formatter.py

Lines changed: 170 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,17 @@
2727
#
2828
# =================================================================
2929

30-
import csv
31-
import io
30+
from csv import DictReader
31+
from io import StringIO
32+
import json
33+
3234
import pytest
3335

36+
from pygeoapi.formatter.base import FormatterSerializationError
3437
from pygeoapi.formatter.csv_ import CSVFormatter
3538

39+
from ..util import get_test_file_path
40+
3641

3742
@pytest.fixture()
3843
def fixture():
@@ -58,12 +63,87 @@ def fixture():
5863
return data
5964

6065

66+
@pytest.fixture
67+
def point_coverage_data():
68+
data = {
69+
'type': 'Coverage',
70+
'domain': {
71+
'type': 'Domain',
72+
'domainType': 'PointSeries',
73+
'axes': {
74+
'x': {'values': [-10.1]},
75+
'y': {'values': [-40.2]},
76+
't': {'values': [
77+
'2013-01-01', '2013-01-02', '2013-01-03',
78+
'2013-01-04', '2013-01-05', '2013-01-06']}
79+
}
80+
},
81+
'parameters': {
82+
'PSAL': {
83+
'type': 'Parameter',
84+
'description': {'en': 'The measured salinity'},
85+
'unit': {'symbol': 'psu'},
86+
'observedProperty': {
87+
'id': 'http://vocab.nerc.ac.uk/standard_name/sea_water_salinity/', # noqa
88+
'label': {'en': 'Sea Water Salinity'}
89+
}
90+
}
91+
},
92+
'ranges': {
93+
'PSAL': {
94+
'axisNames': ['t'],
95+
'shape': [6],
96+
'values': [
97+
43.9599, 43.9599, 43.9640, 43.9640, 43.9679, 43.987
98+
]
99+
}
100+
}
101+
}
102+
103+
return data
104+
105+
106+
@pytest.fixture
107+
def data():
108+
data_path = get_test_file_path('data/items.geojson')
109+
with open(data_path, 'r', encoding='utf-8') as fh:
110+
return json.load(fh)
111+
112+
113+
@pytest.fixture(scope='function')
114+
def csv_reader_geom_enabled(data):
115+
"""csv_reader with geometry enabled"""
116+
formatter = CSVFormatter({'geom': True})
117+
output = formatter.write(data=data)
118+
return DictReader(StringIO(output.decode('utf-8')))
119+
120+
121+
@pytest.fixture
122+
def invalid_geometry_data():
123+
return {
124+
'features': [
125+
{
126+
'id': 1,
127+
'type': 'Feature',
128+
'properties': {
129+
'id': 1,
130+
'title': 'Invalid Point Feature'
131+
},
132+
'geometry': {
133+
'type': 'Point',
134+
'coordinates': [-130.44472222222223]
135+
}
136+
}
137+
]
138+
}
139+
140+
61141
def test_csv__formatter(fixture):
62142
f = CSVFormatter({'geom': True})
63143
f_csv = f.write(data=fixture)
64144

65-
buffer = io.StringIO(f_csv.decode('utf-8'))
66-
reader = csv.DictReader(buffer)
145+
buffer = StringIO(f_csv.decode('utf-8'))
146+
reader = DictReader(buffer)
67147

68148
header = list(reader.fieldnames)
69149

@@ -80,3 +160,89 @@ def test_csv__formatter(fixture):
80160
assert data['id'] == '1972'
81161
assert data['foo'] == 'bar'
82162
assert data['title'] == ''
163+
164+
165+
def test_write_with_geometry_enabled(csv_reader_geom_enabled):
166+
"""Test CSV output with geometry enabled"""
167+
rows = list(csv_reader_geom_enabled)
168+
169+
# Verify the header
170+
header = list(csv_reader_geom_enabled.fieldnames)
171+
assert len(header) == 4
172+
173+
# Verify number of rows
174+
assert len(rows) == 9
175+
176+
177+
def test_write_without_geometry(data):
178+
formatter = CSVFormatter({'geom': False})
179+
output = formatter.write(data=data)
180+
csv_reader = DictReader(StringIO(output.decode('utf-8')))
181+
182+
"""Test CSV output with geometry disabled"""
183+
rows = list(csv_reader)
184+
185+
# Verify headers don't include geometry
186+
headers = csv_reader.fieldnames
187+
assert 'geometry' not in headers
188+
189+
# Verify data
190+
first_row = rows[0]
191+
assert first_row['uri'] == \
192+
'http://localhost:5000/collections/objects/items/1'
193+
assert first_row['name'] == 'LineString'
194+
195+
196+
def test_write_empty_features():
197+
"""Test handling of empty feature collection"""
198+
formatter = CSVFormatter({'geom': True})
199+
data = {
200+
'features': []
201+
}
202+
output = formatter.write(data=data)
203+
assert output == ''
204+
205+
206+
@pytest.mark.parametrize(
207+
'row_index,expected_wkt',
208+
[
209+
(2, 'POINT (-85 33)'),
210+
(3, 'MULTILINESTRING ((10 10, 20 20, 10 40), (40 40, 30 30, 40 20, 30 10))'), # noqa
211+
(4, 'POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))'),
212+
(5, 'POLYGON ((35 10, 45 45, 15 40, 10 20, 35 10), (20 30, 35 35, 30 20, 20 30))'), # noqa
213+
(6, 'MULTIPOLYGON (((30 20, 45 40, 10 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))') # noqa
214+
]
215+
)
216+
def test_wkt(csv_reader_geom_enabled, row_index, expected_wkt):
217+
"""Test CSV output of multi-point geometry"""
218+
rows = list(csv_reader_geom_enabled)
219+
220+
# Verify data
221+
geometry_row = rows[row_index]
222+
assert geometry_row['wkt'] == expected_wkt
223+
224+
225+
def test_invalid_geometry_data(invalid_geometry_data):
226+
formatter = CSVFormatter({'geom': True})
227+
with pytest.raises(FormatterSerializationError):
228+
formatter.write(data=invalid_geometry_data)
229+
230+
231+
def test_point_coverage_csv(point_coverage_data):
232+
"""Test CSV output of point coverage data"""
233+
formatter = CSVFormatter({'geom': True})
234+
output = formatter.write(data=point_coverage_data)
235+
csv_reader = DictReader(StringIO(output.decode('utf-8')))
236+
rows = list(csv_reader)
237+
238+
# Verify number of rows
239+
assert len(rows) == 6
240+
241+
# Verify data
242+
first_row = rows[0]
243+
assert first_row['parameter'] == 'PSAL'
244+
assert first_row['datetime'] == '2013-01-01'
245+
assert first_row['value'] == '43.9599'
246+
assert first_row['unit'] == 'psu'
247+
assert first_row['x'] == '-10.1'
248+
assert first_row['y'] == '-40.2'

0 commit comments

Comments
 (0)