Source code for pygeoapi.provider.geojson

# =================================================================
#
# Authors: Matthew Perry <perrygeo@gmail.com>
#
# Copyright (c) 2018 Matthew Perry
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# =================================================================

import json
import logging
import os
import uuid

from pygeoapi.provider.base import (BaseProvider, ProviderItemNotFoundError,
                                    ProviderSchemaError,
                                    ProviderItemAlreadyExistsError)

LOGGER = logging.getLogger(__name__)


[docs]class GeoJSONProvider(BaseProvider): """ Provider class backed by local GeoJSON files This is meant to be simple (no external services, no dependencies, no schema) at the expense of performance (no indexing, full serialization roundtrip on each request) Not thread safe, a single server process is assumed This implementation uses the feature 'id' heavily and will override any 'id' provided in the original data. The feature 'properties' will be preserved. TODO: * query method should take bbox * instead of methods returning FeatureCollections, we should be yielding Features and aggregating in the view * there are strict id semantics; all features in the input GeoJSON file must be present and be unique strings. Otherwise it will break. * How to raise errors in the provider implementation such that * appropriate HTTP responses will be raised """ def __init__(self, provider_def): """initializer""" BaseProvider.__init__(self, provider_def) self.fields = self.get_fields()
[docs] def get_fields(self): """ Get provider field information (names, types) :returns: dict of fields """ LOGGER.debug('Treating all columns as string types') if os.path.exists(self.data): with open(self.data) as src: data = json.loads(src.read()) fields = {} for f in data['features'][0]['properties'].keys(): fields[f] = 'string' return fields
def get_all_fields(self, dict): fields = set() for f in dict: fields = fields.union(set(f['properties'].keys())) return fields
[docs] def _load(self): """Load and validate the source GeoJSON file at self.data Yes loading from disk, deserializing and validation happens on every request. This is not efficient. """ if os.path.exists(self.data): with open(self.data) as src: data = json.loads(src.read()) else: data = { 'type': 'FeatureCollection', 'features': []} # Must be a FeatureCollection assert data['type'] == 'FeatureCollection' # All features must have ids, TODO must be unique strings for i in data['features']: if 'id' not in i and self.id_field in i['properties']: i['id'] = i['properties'][self.id_field] return data
[docs] def _load_without_null(self): """Load and validate the source GeoJSON file at self.data with None values abscent Yes loading from disk, deserializing and validation happens on every request. This is not efficient. """ data = self._load() for feature in data['features']: for prop in feature['properties']: if feature['properties'][prop] is None: feature['properties'].pop(prop) return data
[docs] def query(self, startindex=0, limit=10, resulttype='results', bbox=[], datetime=None, properties=[], sortby=[]): """ query the provider :param startindex: starting record to return (default 0) :param limit: number of records to return (default 10) :param resulttype: return results or hit limit (default results) :param bbox: bounding box [minx,miny,maxx,maxy] :param datetime: temporal (datestamp or extent) :param properties: list of tuples (name, value) :param sortby: list of dicts (property, order) :returns: FeatureCollection dict of 0..n GeoJSON features """ # TODO filter by bbox without resorting to third-party libs data = self._load() data['numberMatched'] = len(data['features']) if resulttype == 'hits': data['features'] = [] else: data['features'] = data['features'][startindex:startindex+limit] data['numberReturned'] = len(data['features']) return data
def generate_unique_id(self): feats = self._load()['features'] samp_id_type = type(feats[0].get('id')) if isinstance(samp_id_type, int): ids = set([feat.get('id', None) or feat['properties'].get(self.id_field) for feat in feats]) id = 0 while True: if id not in ids: return id id = id + 1 if isinstance(samp_id_type, str): return str(uuid.uuid4())
[docs] def get(self, identifier): """ query the provider by id :param identifier: feature id :returns: dict of single GeoJSON feature """ all_data = self._load() samp_feat = all_data['features'][0] id_type = type(samp_feat['id']) for feature in all_data['features']: if feature['id'] == id_type(identifier): return feature # default, no match err = 'item {} not found'.format(identifier) LOGGER.error(err) raise ProviderItemNotFoundError(err)
[docs] def create(self, new_feature): """ create a new feature item :param new_feature: new GeoJSON feature dictionary :returns: feature id """ all_data = self._load() samp_feat = all_data['features'][0] id_field = self.id_field nfid = new_feature.get('id', None) or\ new_feature['properties'].get(id_field, None) if nfid is not None: for feature in all_data['features']: if feature['id'] == nfid: err = 'provider item {} already exists'\ .format(nfid) LOGGER.error(err) raise ProviderItemAlreadyExistsError(err) else: nfid = self.generate_unique_id() curr_cols = self.get_all_fields(all_data['features']) - {id_field} new_cols = set(new_feature['properties'].keys()) - {id_field} # if given data has extra properties not in schema if bool(new_cols - curr_cols): err = 'properties {} not prescent in provider schema'\ .format(new_cols - curr_cols) LOGGER.error(err) raise ProviderSchemaError(err) # set id field as per schema in file if id_field in samp_feat['properties']: new_feature['properties'][id_field] = nfid else: new_feature['id'] = nfid # set missing properties to empty for prop in curr_cols - new_cols: new_feature['properties'][prop] = None all_data['features'].append(new_feature) with open(self.data, 'w') as dst: dst.write(json.dumps(all_data, indent=2, sort_keys=True)) return nfid
[docs] def replace(self, identifier, new_feature): """ replace an existing feature item with new_feature item :param identifier: feature id :param new_feature: new GeoJSON feature dictionary """ all_data = self._load() id_field = self.id_field samp_feat = all_data['features'][0] id_type = type(samp_feat['id']) # flag if id is already prescent in collection found_feature = False for index, feature in enumerate(all_data['features']): if feature['id'] == id_type(identifier): found_feature = True break # id is abscent in collection if not found_feature: err = 'item {} not found'.format(identifier) LOGGER.error(err) raise ProviderItemNotFoundError(err) # if given data has extra properties not in schema curr_cols = self.get_all_fields(self._load()['features']) - {id_field} new_cols = set(new_feature['properties'].keys()) - {id_field} if bool(new_cols - curr_cols): err = 'properties {} not prescent in provider schema'\ .format(new_cols - curr_cols) LOGGER.error(err) raise ProviderSchemaError(err) # set id field if id_field in samp_feat['properties']: new_feature['properties'][id_field] = feature['id'] else: new_feature['id'] = feature['id'] # set missing properties to empty for prop in curr_cols - new_cols: new_feature['properties'][prop] = None all_data['features'][index] = new_feature # clean up empty attributes remove_set = set() for attrib in curr_cols - new_cols: empt = True for feature in all_data['features']: if feature['properties'][attrib] is not None: empt = False break if empt: remove_set.add(attrib) for attrib in remove_set: for feature in all_data['features']: feature['properties'].pop(attrib) with open(self.data, 'w') as dst: dst.write(json.dumps(all_data, indent=2, sort_keys=True))
[docs] def update(self, identifier, updates): """ update an existing feature item :param identifier: feature id :param updates: updates dictionary :returns: feature item """ id_field = self.id_field all_data = self._load() samp_feat = all_data['features'][0] id_type = type(samp_feat['id']) curr_cols = self.get_all_fields(all_data['features']) - {id_field} found_feature = False for index, feature in enumerate(all_data['features']): if feature['id'] == id_type(identifier): found_feature = True break if not found_feature: err = 'item {} not found'.format(identifier) LOGGER.error(err) raise ProviderItemNotFoundError(err) else: # add an attribute if its not already prescent in the feature if 'add' in updates: for name_val_pair in updates['add']: name = name_val_pair['name'] value = name_val_pair['value'] if name not in curr_cols: for f in all_data['features']: f['properties'][name] = None feature['properties'][name] = value else: err = 'property {} exists in given provider item'\ .format(name) LOGGER.error(err) raise ProviderSchemaError(err) # modify an attribute if its already prescent in the feature if 'modify' in updates: for name_val_pair in updates['modify']: name = name_val_pair['name'] value = name_val_pair['value'] if name in self.get_all_fields(all_data['features']): feature['properties'][name] = value else: err = 'property {} dont exist in given provider item'\ .format(name) raise ProviderSchemaError(err) # delete an attribute if its prescent in the feature if 'remove' in updates: for name in updates['remove']: if name in curr_cols and \ feature['properties'][name] is not None: feature['properties'][name] = None empt = True for f in all_data['features']: if f['properties'][name] is not None: empt = False break if empt: for f in all_data['features']: f['properties'].pop(name) else: err = 'property {} doesnt exists for given \ provider item'.format(name) raise ProviderSchemaError(err) all_data['features'][index] = feature # clean up empty attributes curr_cols = self.get_all_fields(all_data['features']) - {id_field} remove_set = set() for attrib in curr_cols: empt = True for feature in all_data['features']: if feature['properties'][attrib] is not None: empt = False break if empt: remove_set.add(attrib) for attrib in remove_set: for feature in all_data['features']: feature['properties'].pop(attrib) with open(self.data, 'w') as dst: dst.write(json.dumps(all_data, indent=2, sort_keys=True)) feature = all_data['features'][index] return feature
[docs] def delete(self, identifier): """ deletes an existing feature item :param identifier: feature id """ id_field = self.id_field all_data = self._load() samp_feat = all_data['features'][0] id_type = type(samp_feat['id']) found_feature = False for index, feature in enumerate(all_data['features']): if feature['id'] == id_type(identifier): found_feature = True break if not found_feature: err = 'item {} not found'.format(identifier) LOGGER.error(err) raise ProviderItemNotFoundError(err) all_data['features'].pop(index) # clean up empty attributes curr_cols = self.get_all_fields(all_data['features']) - {id_field} remove_set = set() for attrib in curr_cols: empt = True for feature in all_data['features']: if feature['properties'][attrib] is not None: empt = False break if empt: remove_set.add(attrib) for attrib in remove_set: for feature in all_data['features']: feature['properties'].pop(attrib) with open(self.data, 'w') as dst: dst.write(json.dumps(all_data, indent=2, sort_keys=True))
def __repr__(self): return '<GeoJSONProvider> {}'.format(self.data)