Skip to content

Commit a1221f8

Browse files
authored
Add option to return non-hydrated items (#101)
* add option to return non-hydrated items * add migration * add docs and test * fix version number in changelog
1 parent 6a29c8f commit a1221f8

File tree

12 files changed

+2978
-7
lines changed

12 files changed

+2978
-7
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,14 @@
11
# Changelog
2+
## [v0.5.1]
3+
4+
### Fixed
5+
6+
### Changed
7+
8+
### Added
9+
10+
- Add conf item to search to allow returning results without hydrating. This allows an application using pgstac to shift the CPU load of rehydrating items from the database onto the application server.
11+
212
## [v0.5.0]
313
Version 0.5.0 is a major refactor of how data is stored. It is recommended to start a new database from scratch and to move data over rather than to use the inbuilt migration which will be very slow for larger amounts of data.
414

README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,17 @@ ALTER ROLE <username> SET pgstac.context_estimated_cost TO '<estimated query cos
8989
ALTER ROLE <username> SET pgstac.context_stats_ttl TO '<an interval string ie "1 day" after which pgstac search will force recalculation of it's estimates>>';
9090
```
9191
92+
#### Runtime Configurations
93+
94+
Runtime configuration of variables can be made with search by passing in configuration in the search json "conf" item.
95+
96+
Runtime configuration is available for context, context_estimated_count, context_estimated_cost, context_stats_ttl, and nohydrate.
97+
98+
The nohydrate conf item returns an unhydrated item bypassing the CPU intensive step of rehydrating data with data from the collection metadata. When using the nohydrate conf, the only fields that are respected in the fields extension are geometry and bbox.
99+
```sql
100+
SELECT search('{"conf":{"nohydrate"=true}}');
101+
```
102+
92103
#### PGStac Partitioning
93104
By default PGStac partitions data by collection (note: this is a change starting with version 0.5.0). Each collection can further be partitioned by either year or month. **Partitioning must be set up prior to loading any data!** Partitioning can be configured by setting the partition_trunc flag on a collection in the database.
94105
```sql

pypgstac/pypgstac/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
"""PyPGStac Version."""
2-
__version__ = "0.5.0"
2+
__version__ = "0.5.1"
Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
SET SEARCH_PATH to pgstac, public;
2+
set check_function_bodies = off;
3+
4+
CREATE OR REPLACE FUNCTION pgstac.content_nonhydrated(_item pgstac.items, fields jsonb DEFAULT '{}'::jsonb)
5+
RETURNS jsonb
6+
LANGUAGE plpgsql
7+
STABLE PARALLEL SAFE
8+
AS $function$
9+
DECLARE
10+
geom jsonb;
11+
bbox jsonb;
12+
output jsonb;
13+
BEGIN
14+
IF include_field('geometry', fields) THEN
15+
geom := ST_ASGeoJson(_item.geometry)::jsonb;
16+
END IF;
17+
IF include_field('bbox', fields) THEN
18+
bbox := geom_bbox(_item.geometry)::jsonb;
19+
END IF;
20+
output := jsonb_build_object(
21+
'id', _item.id,
22+
'geometry', geom,
23+
'bbox',bbox,
24+
'collection', _item.collection
25+
) || _item.content;
26+
RETURN output;
27+
END;
28+
$function$
29+
;
30+
31+
CREATE OR REPLACE FUNCTION pgstac.search(_search jsonb DEFAULT '{}'::jsonb)
32+
RETURNS jsonb
33+
LANGUAGE plpgsql
34+
SECURITY DEFINER
35+
SET search_path TO 'pgstac', 'public'
36+
AS $function$
37+
DECLARE
38+
searches searches%ROWTYPE;
39+
_where text;
40+
token_where text;
41+
full_where text;
42+
orderby text;
43+
query text;
44+
token_type text := substr(_search->>'token',1,4);
45+
_limit int := coalesce((_search->>'limit')::int, 10);
46+
curs refcursor;
47+
cntr int := 0;
48+
iter_record items%ROWTYPE;
49+
first_record jsonb;
50+
last_record jsonb;
51+
out_records jsonb := '[]'::jsonb;
52+
prev_query text;
53+
next text;
54+
prev_id text;
55+
has_next boolean := false;
56+
has_prev boolean := false;
57+
prev text;
58+
total_count bigint;
59+
context jsonb;
60+
collection jsonb;
61+
includes text[];
62+
excludes text[];
63+
exit_flag boolean := FALSE;
64+
batches int := 0;
65+
timer timestamptz := clock_timestamp();
66+
pstart timestamptz;
67+
pend timestamptz;
68+
pcurs refcursor;
69+
search_where search_wheres%ROWTYPE;
70+
id text;
71+
BEGIN
72+
CREATE TEMP TABLE results (content jsonb) ON COMMIT DROP;
73+
-- if ids is set, short circuit and just use direct ids query for each id
74+
-- skip any paging or caching
75+
-- hard codes ordering in the same order as the array of ids
76+
IF _search ? 'ids' THEN
77+
INSERT INTO results
78+
SELECT
79+
CASE WHEN _search->'conf'->>'nohydrate' IS NOT NULL AND (_search->'conf'->>'nohydrate')::boolean = true THEN
80+
content_nonhydrated(items, _search->'fields')
81+
ELSE
82+
content_hydrate(items, _search->'fields')
83+
END
84+
FROM items WHERE
85+
items.id = ANY(to_text_array(_search->'ids'))
86+
AND
87+
CASE WHEN _search ? 'collections' THEN
88+
items.collection = ANY(to_text_array(_search->'collections'))
89+
ELSE TRUE
90+
END
91+
ORDER BY items.datetime desc, items.id desc
92+
;
93+
SELECT INTO total_count count(*) FROM results;
94+
ELSE
95+
searches := search_query(_search);
96+
_where := searches._where;
97+
orderby := searches.orderby;
98+
search_where := where_stats(_where);
99+
total_count := coalesce(search_where.total_count, search_where.estimated_count);
100+
101+
IF token_type='prev' THEN
102+
token_where := get_token_filter(_search, null::jsonb);
103+
orderby := sort_sqlorderby(_search, TRUE);
104+
END IF;
105+
IF token_type='next' THEN
106+
token_where := get_token_filter(_search, null::jsonb);
107+
END IF;
108+
109+
full_where := concat_ws(' AND ', _where, token_where);
110+
RAISE NOTICE 'FULL QUERY % %', full_where, clock_timestamp()-timer;
111+
timer := clock_timestamp();
112+
113+
FOR query IN SELECT partition_queries(full_where, orderby, search_where.partitions) LOOP
114+
timer := clock_timestamp();
115+
query := format('%s LIMIT %s', query, _limit + 1);
116+
RAISE NOTICE 'Partition Query: %', query;
117+
batches := batches + 1;
118+
-- curs = create_cursor(query);
119+
OPEN curs FOR EXECUTE query;
120+
LOOP
121+
FETCH curs into iter_record;
122+
EXIT WHEN NOT FOUND;
123+
cntr := cntr + 1;
124+
125+
IF _search->'conf'->>'nohydrate' IS NOT NULL AND (_search->'conf'->>'nohydrate')::boolean = true THEN
126+
last_record := content_nonhydrated(iter_record, _search->'fields');
127+
ELSE
128+
last_record := content_hydrate(iter_record, _search->'fields');
129+
END IF;
130+
IF cntr = 1 THEN
131+
first_record := last_record;
132+
END IF;
133+
IF cntr <= _limit THEN
134+
INSERT INTO results (content) VALUES (last_record);
135+
ELSIF cntr > _limit THEN
136+
has_next := true;
137+
exit_flag := true;
138+
EXIT;
139+
END IF;
140+
END LOOP;
141+
CLOSE curs;
142+
RAISE NOTICE 'Query took %.', clock_timestamp()-timer;
143+
timer := clock_timestamp();
144+
EXIT WHEN exit_flag;
145+
END LOOP;
146+
RAISE NOTICE 'Scanned through % partitions.', batches;
147+
END IF;
148+
149+
SELECT jsonb_agg(content) INTO out_records FROM results WHERE content is not NULL;
150+
151+
DROP TABLE results;
152+
153+
154+
-- Flip things around if this was the result of a prev token query
155+
IF token_type='prev' THEN
156+
out_records := flip_jsonb_array(out_records);
157+
first_record := last_record;
158+
END IF;
159+
160+
-- If this query has a token, see if there is data before the first record
161+
IF _search ? 'token' THEN
162+
prev_query := format(
163+
'SELECT 1 FROM items WHERE %s LIMIT 1',
164+
concat_ws(
165+
' AND ',
166+
_where,
167+
trim(get_token_filter(_search, to_jsonb(content_dehydrate(first_record))))
168+
)
169+
);
170+
RAISE NOTICE 'Query to get previous record: % --- %', prev_query, first_record;
171+
EXECUTE prev_query INTO has_prev;
172+
IF FOUND and has_prev IS NOT NULL THEN
173+
RAISE NOTICE 'Query results from prev query: %', has_prev;
174+
has_prev := TRUE;
175+
END IF;
176+
END IF;
177+
has_prev := COALESCE(has_prev, FALSE);
178+
179+
IF has_prev THEN
180+
prev := out_records->0->>'id';
181+
END IF;
182+
IF has_next OR token_type='prev' THEN
183+
next := out_records->-1->>'id';
184+
END IF;
185+
186+
IF context(_search->'conf') != 'off' THEN
187+
context := jsonb_strip_nulls(jsonb_build_object(
188+
'limit', _limit,
189+
'matched', total_count,
190+
'returned', coalesce(jsonb_array_length(out_records), 0)
191+
));
192+
ELSE
193+
context := jsonb_strip_nulls(jsonb_build_object(
194+
'limit', _limit,
195+
'returned', coalesce(jsonb_array_length(out_records), 0)
196+
));
197+
END IF;
198+
199+
collection := jsonb_build_object(
200+
'type', 'FeatureCollection',
201+
'features', coalesce(out_records, '[]'::jsonb),
202+
'next', next,
203+
'prev', prev,
204+
'context', context
205+
);
206+
207+
RETURN collection;
208+
END;
209+
$function$
210+
;
211+
212+
213+
214+
SELECT set_version('0.5.1');

0 commit comments

Comments
 (0)