SQL Snippets

Part_III_Analysis_on_maps

Data generation

Generate 500 points, roughly on circle of radius 1. Jitter x and y by +/-0.25:

SELECT
  ST_MakePoint(
    Cos(2 * Pi() * d / 500.0)      -- x
    * (1 + Random() * 0.5 - 0.25), -- jitter-x
    Sin(2 * Pi() * d / 500.0)      -- y
    * (1 + Random() * 0.5 - 0.25)  -- jitter-y
  ) AS wkb_geometry
FROM
  generate_series(0, 500) d;

Create a synthetic star ratings table:

CREATE TABLE ratings AS
SELECT round(greatest(least(x,5), 0)*100)/100.0 AS stars FROM normal_rand(1000000, 3, 1) x

Create a set of 1.000.000 random bounding boxes in the world (epsg:3857) with normally distributed sizes:

CREATE TABLE placenames_synthetic AS
SELECT 
  ROW_NUMBER() OVER () AS id, 
    ST_SetSRID(
      ST_Intersection(
        ST_Expand(
          ST_MakePoint(
            (random()*40075016.68 - 20037508.34) * 0.99999, 
            (random()*40075016.68 - 20037508.34) * 0.99999
          ), 
          greatest(100, abs(SIZE))
        ),
        ST_Expand(ST_MakePoint(0,0), 20037508.34)
      ),
      3857
    ) AS geometry, 
  random() AS importance
FROM
  normal_rand(1000000, 100, 1000) SIZE;

Special joins

Merge two tables side-by-side:

SELECT * FROM
(SELECT *, ROW_NUMBER() OVER() AS rn FROM table1) t1
INNER JOIN
(SELECT *, ROW_NUMBER() OVER() AS rn FROM table1) t2
ON t1.rn = t2.rn

Distinct selects

Select the biggest airport for each country in the world (Openflights Airports dataset):

SELECT DISTINCT ON (country) * FROM openflights_airports ORDER BY country ASC, num_routes DESC;

Special selects

Computing a join between a custom list of tuples (named "foo" below) and an existing table (named "bar" below):

WITH foo (x, y) AS (
  VALUES (1.1, 'a'), (2.2, 'b'), (3.3, 'c')
)
SELECT * FROM foo, bar

Window functions

Rank countries of the world by area per continent:

SELECT name, rank() OVER
  (
    PARTITION BY continent 
    ORDER BY area DESC
  ) 
FROM countries;

Recursive

Random subsets query

WITH RECURSIVE t(i, n) AS (
	SELECT 1, * FROM generate_series(1,10)
UNION ALL
	SELECT i+1, n FROM t WHERE random() < .5
)
SELECT * FROM t;

Example 1 (from PostgreSQL documentation):

WITH RECURSIVE t(n) AS (
    SELECT 1 -- Initial Subquery
  UNION ALL
    SELECT n+1
    FROM t -- Recursive Subquery
    WHERE n < 100
)
SELECT SUM(n) FROM t;

Example 2 (from PostgreSQL documentation):

WITH RECURSIVE temp (n, fact) AS (
    SELECT 0, 1 -- Initial Subquery
  UNION ALL 
    SELECT n+1, (n+1)*fact 
    FROM temp -- Recursive Subquery 
    WHERE n < 9)
SELECT * FROM temp;

Writing a loop in SQL ("Marcos" loop):

CREATE TEMP VIEW vw AS
WITH t (zoom, iter) AS (
SELECT zoom, ROW_NUMBER() OVER () AS iter FROM generate_series(18, 0, -1) zoom
)
SELECT * FROM t;
 
WITH RECURSIVE t AS (
	SELECT * FROM vw WHERE iter = 1
UNION ALL
	SELECT orig.* FROM t, vw orig WHERE orig.iter = t.iter + 1
)
SELECT * FROM t;

Table creation

Create random table with two columns, id and a random number:

CREATE TABLE foo AS SELECT generate_series(1,100) AS id, random() AS num;

Make a copy of a table:

SELECT * INTO bar FROM foo

Make a copy of a table, but don't copy rows:

SELECT * INTO bar FROM foo WHERE 1=10

Create random point cloud in polygon

Read this one in the WKT Raster Tutorial.

CREATE OR REPLACE FUNCTION ST_RandomPoints(geom geometry, nb INT) 
 RETURNS SETOF geometry AS
 $$
 DECLARE
     pt geometry;
     xmin float8;
     xmax float8;
     ymin float8;
     ymax float8;
     xrange float8;
     yrange float8;
     srid INT;
     COUNT INTEGER := 0;
     bcontains BOOLEAN := FALSE;
     gtype text;
 BEGIN
     SELECT ST_GeometryType(geom)
     INTO gtype;
     IF ( gtype != 'ST_Polygon' ) AND ( gtype != 'ST_MultiPolygon' ) THEN
         RAISE EXCEPTION 'Attempting to get random point in a non polygon geometry';
     END IF;
     SELECT ST_XMin(geom), ST_XMax(geom), ST_YMin(geom), ST_YMax(geom), ST_SRID(geom)
     INTO xmin, xmax, ymin, ymax, srid;
     SELECT xmax - xmin, ymax - ymin
     INTO xrange, yrange;
     WHILE COUNT < nb LOOP
         SELECT ST_SetSRID(ST_MakePoint(xmin + xrange * random(), ymin + yrange * random()), srid)
         INTO pt;
         SELECT ST_Contains(geom, pt)
         INTO bcontains;
         IF bcontains THEN
             COUNT := COUNT + 1;
             RETURN NEXT pt;
         END IF;
     END LOOP;
     RETURN;
 END;
 $$
 LANGUAGE 'plpgsql' IMMUTABLE STRICT;

K-way cross joins, limiting output

Select rows for monotonically increasing ID's.

SELECT r.id,s.id,t.id FROM foo r, foo s, foo t WHERE r.id < s.id AND s.id < t.id
-- For 100 input rows: 161,700 rows VS 1,000,000 rows

Get names and column types for table

For PostgreSQL it is:

SELECT column_name, data_type FROM information_schema.columns
WHERE TABLE_NAME = 'table_name';

For PostGIS a geometry column will have data_type = 'USER-DEFINED' (not unique for geometry columns).

For SQLite3 it is:

SELECT SQL FROM sqlite_master WHERE tbl_name = 'table_name' AND TYPE = 'table';

Modify spatial tables

Change projection of data:

ALTER TABLE mytable 
   ALTER COLUMN geom 
   TYPE Geometry(Point, 32644) 
   USING ST_Transform(geom, 32644);
   -- changing Point, geom and projection to whatever is appropriate

Small spatial queries

Bounding box for an entire table in PostGIS:

SELECT ST_Extent(the_geom) FROM TABLE;

Bounding box query:

SELECT * FROM TABLE 
WHERE the_geom && ST_SetSRID(
    'BOX(903377.0 7283641.0,1691764.0 7914275.0)'::box2d, 
    3857
)

OSM Streets near the place I live:

SELECT * 
FROM denmark_highway 
WHERE wkb_geometry && (
    SELECT st_envelope(st_buffer(wkb_geometry,256)) 
    FROM denmark_highway 
    WHERE name='Anders Henriksens Gade'
)

Polygon from box:

SELECT ST_AsText(ST_SetSRID('BOX(572126 5671279,1258837 6154972)'::box2d, 3857))

Insert a point feature into table w. srid 3857:

INSERT INTO table_1(the_geom) VALUES (ST_GeomFromText('POINT(127 42)', 3857))

Creating a multiscale database from input tables:

SELECT r.*, generalize(r.the_geom, s.scale) AS the_geom_generalized
FROM (
    (SELECT *, the_geom FROM table_1 WHERE name<>'' LIMIT 3)
    UNION
    (SELECT *, the_geom FROM table_2 WHERE name<>'' LIMIT 3)
) r
CROSS JOIN scales s;

Spatial join

Select records from R that intersect union of S:

WITH S AS (
SELECT st_union(the_geom) AS the_geom FROM table_2
)
 
SELECT R.* FROM table_1 R,S WHERE st_intersects(R.the_geom,S.the_geom)

Intersects join:

SELECT R.*, S.* 
FROM table1 AS R 
JOIN table2 AS S 
ON ST_Intersects(r.the_geom, S.the_geom)

Within distance join:

SELECT R.*, S.* 
FROM table1 AS R 
JOIN table2 AS S 
ON ST_DWithin(R.the_geom, S.the_geom, 1000) -- distance 1000

Indexing

Spatial index:

CREATE INDEX [indexname] ON [tablename] USING GIST ( [geometrycolumn] );

B-Tree index on a column:

CREATE INDEX [indexname] ON [tablename] ([columnname]);

Primary key:

ALTER TABLE rappers_id ADD PRIMARY KEY (id);

Using window functions to retro-fit an id on a table:

CREATE TABLE table_with_id AS
SELECT ROW_NUMBER() OVER (ORDER BY 1) AS id, *  FROM table_without_id;
ALTER TABLE table_with_id ADD PRIMARY KEY (id);

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.