Data generation
Generate 500 points, roughly on circle of radius 1. Jitter x and y by +/-0.25:
SELECT
ST_MakePoint(
Cos(2 * Pi() * d / 500.0) -- x
* (1 + Random() * 0.5 - 0.25), -- jitter-x
Sin(2 * Pi() * d / 500.0) -- y
* (1 + Random() * 0.5 - 0.25) -- jitter-y
) as wkb_geometry
FROM
generate_series(0, 500) d;
Create a synthetic star ratings table:
CREATE TABLE ratings AS
SELECT round(greatest(least(x,5), 0)*100)/100.0 AS stars FROM normal_rand(1000000, 3, 1) x
Create a set of 1.000.000 random bounding boxes in the world (epsg:3857) with normally distributed sizes:
CREATE TABLE placenames_synthetic AS
select
row_number() OVER () as id,
ST_SetSRID(
ST_Intersection(
ST_Expand(
ST_MakePoint(
(random()*40075016.68 - 20037508.34) * 0.99999,
(random()*40075016.68 - 20037508.34) * 0.99999
),
greatest(100, abs(size))
),
ST_Expand(ST_MakePoint(0,0), 20037508.34)
),
3857
) as geometry,
random() as importance
FROM
normal_rand(1000000, 100, 1000) size;
Special joins
Merge two tables side-by-side:
SELECT * FROM
(SELECT *, row_number() OVER() AS rn FROM table1) t1
INNER JOIN
(SELECT *, row_number() OVER() AS rn FROM table1) t2
ON t1.rn = t2.rn
Distinct selects
Select the biggest airport for each country in the world (Openflights Airports dataset):
select distinct on (country) * from openflights_airports order by country asc, num_routes desc;
Special selects
Computing a join between a custom list of tuples (named “foo” below) and an existing table (named “bar” below):
with foo (x, y) as (
values (1.1, 'a'), (2.2, 'b'), (3.3, 'c')
)
select * from foo, bar
Window functions
Rank countries of the world by area per continent:
SELECT name, rank() OVER
(
PARTITION BY continent
ORDER BY area DESC
)
FROM countries;
Recursive
Random subsets query
WITH RECURSIVE t(i, n) AS (
SELECT 1, * FROM generate_series(1,10)
UNION ALL
SELECT i+1, n FROM t WHERE random() < .5
)
SELECT * FROM t;
Example 1 (from PostgreSQL documentation):
WITH RECURSIVE t(n) AS (
SELECT 1 -- Initial Subquery
UNION ALL
SELECT n+1
FROM t -- Recursive Subquery
WHERE n < 100
)
SELECT sum(n) FROM t;
Example 2 (from PostgreSQL documentation):
WITH RECURSIVE temp (n, fact) AS (
SELECT 0, 1 -- Initial Subquery
UNION ALL
SELECT n+1, (n+1)*fact
FROM temp -- Recursive Subquery
WHERE n < 9)
SELECT * FROM temp;
Writing a loop in SQL ("Marcos" loop):
CREATE TEMP VIEW vw AS
WITH t (zoom, iter) AS (
SELECT zoom, row_number() OVER () AS iter FROM generate_series(18, 0, -1) zoom
)
SELECT * FROM t;
WITH RECURSIVE t AS (
SELECT * FROM vw WHERE iter = 1
UNION ALL
SELECT orig.* FROM t, vw orig WHERE orig.iter = t.iter + 1
)
SELECT * FROM t;
Table creation
Create random table with two columns, id and a random number:
create table foo as select generate_series(1,100) as id, random() as num;
Make a copy of a table:
select * into bar from foo
Make a copy of a table, but don't copy rows:
select * into bar from foo where 1=10
Create random point cloud in polygon
Read this one in the WKT Raster Tutorial.
CREATE OR REPLACE FUNCTION ST_RandomPoints(geom geometry, nb int)
RETURNS SETOF geometry AS
$$
DECLARE
pt geometry;
xmin float8;
xmax float8;
ymin float8;
ymax float8;
xrange float8;
yrange float8;
srid int;
count integer := 0;
bcontains boolean := FALSE;
gtype text;
BEGIN
SELECT ST_GeometryType(geom)
INTO gtype;
IF ( gtype != 'ST_Polygon' ) AND ( gtype != 'ST_MultiPolygon' ) THEN
RAISE EXCEPTION 'Attempting to get random point in a non polygon geometry';
END IF;
SELECT ST_XMin(geom), ST_XMax(geom), ST_YMin(geom), ST_YMax(geom), ST_SRID(geom)
INTO xmin, xmax, ymin, ymax, srid;
SELECT xmax - xmin, ymax - ymin
INTO xrange, yrange;
WHILE count < nb LOOP
SELECT ST_SetSRID(ST_MakePoint(xmin + xrange * random(), ymin + yrange * random()), srid)
INTO pt;
SELECT ST_Contains(geom, pt)
INTO bcontains;
IF bcontains THEN
count := count + 1;
RETURN NEXT pt;
END IF;
END LOOP;
RETURN;
END;
$$
LANGUAGE 'plpgsql' IMMUTABLE STRICT;
K-way cross joins, limiting output
Select rows for monotonically increasing ID's.
select r.id,s.id,t.id from foo r, foo s, foo t where r.id < s.id and s.id < t.id
-- For 100 input rows: 161,700 rows VS 1,000,000 rows
Get names and column types for table
For PostgreSQL it is:
SELECT column_name, data_type FROM information_schema.columns
WHERE table_name = 'table_name';
For PostGIS a geometry column will have data_type = 'USER-DEFINED' (not unique for geometry columns).
For SQLite3 it is:
SELECT sql FROM sqlite_master WHERE tbl_name = 'table_name' AND type = 'table';
Modify spatial tables
Change projection of data:
ALTER TABLE mytable
ALTER COLUMN geom
TYPE Geometry(Point, 32644)
USING ST_Transform(geom, 32644);
-- changing Point, geom and projection to whatever is appropriate
Small spatial queries
Bounding box for an entire table in PostGIS:
SELECT ST_Extent(the_geom) FROM table;
Bounding box query:
SELECT * FROM table
WHERE the_geom && ST_SetSRID(
'BOX(903377.0 7283641.0,1691764.0 7914275.0)'::box2d,
3857
)
OSM Streets near the place I live:
SELECT *
FROM denmark_highway
WHERE wkb_geometry && (
SELECT st_envelope(st_buffer(wkb_geometry,256))
FROM denmark_highway
WHERE name='Anders Henriksens Gade'
)
Polygon from box:
SELECT ST_AsText(ST_SetSRID('BOX(572126 5671279,1258837 6154972)'::box2d, 3857))
Insert a point feature into table w. srid 3857:
INSERT INTO table_1(the_geom) VALUES (ST_GeomFromText('POINT(127 42)', 3857))
Creating a multiscale database from input tables:
select r.*, generalize(r.the_geom, s.scale) as the_geom_generalized
from (
(select *, the_geom from table_1 where name<>'' limit 3)
union
(select *, the_geom from table_2 where name<>'' limit 3)
) r
cross join scales s;
Spatial join
Select records from R that intersect union of S:
with S as (
SELECT st_union(the_geom) as the_geom FROM table_2
)
SELECT R.* FROM table_1 R,S where st_intersects(R.the_geom,S.the_geom)
Intersects join:
SELECT R.*, S.*
FROM table1 AS R
JOIN table2 AS S
ON ST_Intersects(r.the_geom, S.the_geom)
Within distance join:
SELECT R.*, S.*
FROM table1 AS R
JOIN table2 AS S
ON ST_DWithin(R.the_geom, S.the_geom, 1000) -- distance 1000
Indexing
Spatial index:
CREATE INDEX [indexname] ON [tablename] USING GIST ( [geometrycolumn] );
B-Tree index on a column:
CREATE INDEX [indexname] ON [tablename] ([columnname]);
Primary key:
ALTER TABLE rappers_id ADD PRIMARY KEY (id);
Using window functions to retro-fit an id on a table:
CREATE TABLE table_with_id AS
select row_number() OVER (ORDER BY 1) as id, * FROM table_without_id;
ALTER TABLE table_with_id ADD PRIMARY KEY (id);