1 CTE

explain analyze

with
tt_aoi as (
    select st_buffer(st_collect (
        'SRID=4326;
        POLYGON ((-67.06819 44.99416, -67.1857 44.94707, 
                  -67.21651 44.88058, -67.15834 44.78871, 
                  -67.04385 44.81789, -66.91015 44.86279, 
                  -67.06819 44.99416))'::geometry), 
        5) as aoi),
                  
tt_seldata as (
    SELECT 
      sitename_l, lifestage, geometry 
    FROM 
      shp_nationwide_efh as ds
)

SELECT 
    sitename_l AS Species, string_agg(lifestage, ', ') AS Lifestage 
FROM 
    tt_seldata as ds
    inner join tt_aoi on st_intersects(ds.geometry, tt_aoi.aoi)
GROUP BY 
     sitename_l;
Displaying records 1 - 10
QUERY PLAN
GroupAggregate (cost=173.78..173.80 rows=1 width=64) (actual time=910.999..911.059 rows=62 loops=1)
Group Key: ds.sitename_l
CTE tt_aoi
-> Aggregate (cost=0.02..0.03 rows=1 width=32) (actual time=0.466..0.467 rows=1 loops=1)
-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.001..0.001 rows=1 loops=1)
CTE tt_seldata
-> Seq Scan on shp_nationwide_efh ds_1 (cost=0.00..42.64 rows=464 width=540808) (actual time=0.022..1.186 rows=464 loops=1)
-> Sort (cost=131.11..131.12 rows=1 width=64) (actual time=910.984..910.997 rows=195 loops=1)
Sort Key: ds.sitename_l
Sort Method: quicksort Memory: 39kB

2 Temporary Tables

2.1 Option 1

2.1.1 Drop Temp Tables

drop table if exists tt_aoi cascade;
drop table if exists tt_seldata cascade;

2.1.2 Select AOI

--drop table if exists tt_aoi cascade;
explain analyze
--drop table if exists tt_aoi cascade;
create temporary table tt_aoi as
select st_buffer(st_collect (
    'SRID=4326;
    POLYGON ((-67.06819 44.99416, -67.1857 44.94707, 
              -67.21651 44.88058, -67.15834 44.78871, 
              -67.04385 44.81789, -66.91015 44.86279, 
              -67.06819 44.99416))'::geometry), 5) as aoi;
4 records
QUERY PLAN
Aggregate (cost=0.02..0.03 rows=1 width=32) (actual time=0.272..0.272 rows=1 loops=1)
-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.001..0.001 rows=1 loops=1)
Planning Time: 0.064 ms
Execution Time: 2.239 ms

2.1.3 Select Data

explain analyze
create temporary table tt_seldata as
SELECT 
  sitename_l, lifestage, geometry 
FROM 
  shp_nationwide_efh as ds
3 records
QUERY PLAN
Seq Scan on shp_nationwide_efh ds (cost=0.00..42.64 rows=464 width=540808) (actual time=0.008..2.348 rows=464 loops=1)
Planning Time: 0.045 ms
Execution Time: 7377.148 ms

2.1.4 Summarize

explain analyze
select 
  ds.sitename_l AS Species, string_agg(ds.lifestage, ', ') AS Lifestage 
from
  tt_seldata as ds
  inner join tt_aoi on st_intersects(ds.geometry, tt_aoi.aoi)
group by
  sitename_l
Displaying records 1 - 10
QUERY PLAN
GroupAggregate (cost=913763.75..913774.87 rows=200 width=96) (actual time=917.781..917.840 rows=62 loops=1)
Group Key: ds.sitename_l
-> Sort (cost=913763.75..913766.63 rows=1149 width=64) (actual time=917.769..917.781 rows=195 loops=1)
Sort Key: ds.sitename_l
Sort Method: quicksort Memory: 39kB
-> Nested Loop (cost=0.00..913705.35 rows=1149 width=64) (actual time=3.506..917.020 rows=195 loops=1)
Join Filter: ((ds.geometry && tt_aoi.aoi) AND _st_intersects(ds.geometry, tt_aoi.aoi))
Rows Removed by Join Filter: 269
-> Seq Scan on tt_seldata ds (cost=0.00..64.35 rows=2535 width=96) (actual time=0.062..1.258 rows=464 loops=1)
-> Materialize (cost=0.00..30.40 rows=1360 width=32) (actual time=0.000..0.001 rows=1 loops=464)

2.2 Option 2 (indexing)

Standard database indexes create a hierarchical tree based on the values of the column being indexed. Spatial indexes are a little different – they are unable to index the geometric features themselves and instead index the bounding boxes of the features.

Reference

2.2.1 Drop Temp Tables

drop table if exists tt_aoi2 cascade;
drop table if exists tt_seldata2 cascade;

2.2.2 Select AOI

--drop table if exists tt_aoi cascade;
explain analyze
--drop table if exists tt_aoi cascade;
create temporary table tt_aoi2 as
select st_buffer(st_collect (
    'SRID=4326;
    POLYGON ((-67.06819 44.99416, -67.1857 44.94707, 
              -67.21651 44.88058, -67.15834 44.78871, 
              -67.04385 44.81789, -66.91015 44.86279, 
              -67.06819 44.99416))'::geometry), 5) as aoi;
4 records
QUERY PLAN
Aggregate (cost=0.02..0.03 rows=1 width=32) (actual time=0.264..0.264 rows=1 loops=1)
-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.001..0.001 rows=1 loops=1)
Planning Time: 0.063 ms
Execution Time: 2.150 ms

2.2.3 Select Data

explain analyze
create temporary table tt_seldata2 as
SELECT 
  sitename_l, lifestage, geometry 
FROM 
  shp_nationwide_efh as ds;
3 records
QUERY PLAN
Seq Scan on shp_nationwide_efh ds (cost=0.00..42.64 rows=464 width=540808) (actual time=0.009..2.319 rows=464 loops=1)
Planning Time: 0.048 ms
Execution Time: 7751.798 ms

2.2.4 Create index

create index tt_seldata_geom_idx
  on tt_seldata2
  using gist (geometry)

2.2.5 Summarize

explain analyze
select 
  ds.sitename_l AS Species, string_agg(ds.lifestage, ', ') AS Lifestage 
from
  tt_seldata2 as ds
  inner join tt_aoi2 on st_intersects(ds.geometry, tt_aoi2.aoi)
group by
  sitename_l
Displaying records 1 - 10
QUERY PLAN
HashAggregate (cost=685.85..688.35 rows=200 width=96) (actual time=754.159..754.204 rows=62 loops=1)
Group Key: ds.sitename_l
-> Nested Loop (cost=0.14..684.80 rows=210 width=64) (actual time=319.166..753.417 rows=195 loops=1)
-> Seq Scan on tt_aoi2 (cost=0.00..23.60 rows=1360 width=32) (actual time=0.032..0.034 rows=1 loops=1)
-> Index Scan using tt_seldata_geom_idx on tt_seldata2 ds (cost=0.14..0.48 rows=1 width=96) (actual time=319.127..753.181 rows=195 loops=1)
Index Cond: (geometry && tt_aoi2.aoi)
Filter: _st_intersects(geometry, tt_aoi2.aoi)
Rows Removed by Filter: 31
Planning Time: 0.471 ms
Execution Time: 754.567 ms

3 Subquery

explain analyze
SELECT 
        sitename_l AS Species, string_agg(lifestage, ', ') AS Lifestage 
FROM 
    (SELECT 
            sitename_l, lifestage, geometry 
    FROM 
            shp_nationwide_efh as ds
    where
        st_dwithin(ds.geometry,
        (select st_collect (
        'SRID=4326;
        POLYGON ((-67.06819 44.99416, -67.1857 44.94707, 
                  -67.21651 44.88058, -67.15834 44.78871, 
                  -67.04385 44.81789, -66.91015 44.86279, 
                  -67.06819 44.99416))'::geometry)),
        5)) as tmp_aoi
GROUP BY 
        sitename_l
Displaying records 1 - 10
QUERY PLAN
HashAggregate (cost=159.44..160.79 rows=108 width=46) (actual time=916.607..916.646 rows=62 loops=1)
Group Key: ds.sitename_l
InitPlan 1 (returns $0)
-> Aggregate (cost=0.02..0.03 rows=1 width=32) (actual time=0.023..0.023 rows=1 loops=1)
-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.000..0.001 rows=1 loops=1)
-> Seq Scan on shp_nationwide_efh ds (cost=0.00..158.64 rows=155 width=21) (actual time=3.490..915.324 rows=195 loops=1)
Filter: st_dwithin(geometry, $0, ‘5’::double precision)
Rows Removed by Filter: 269
Planning Time: 0.431 ms
Execution Time: 916.713 ms

4 CURRENT tabulate_dataset_shp_within_aoi

tabulate_dataset_shp_within_aoi <- function(dataset_code, aoi_wkt, output = "kable"){
  # summarize shapefile dataset from area of interest
  
  # TODO: pull from db: https://docs.google.com/spreadsheets/d/1MMVqPr39R5gAyZdY2iJIkkIdYqgEBJYQeGqDk1z-RKQ/edit#gid=936111013
  # datasets_gsheet2db()
  
  # dataset_code = "cetacean-bia"; aoi_wkt = params$aoi_wkt
  # dataset_code = "efh"; aoi_wkt = "POLYGON ((-67.06819 44.99416, -67.1857 44.94707, -67.21651 44.88058, -67.15834 44.78871, -67.04385 44.81789, -66.91015 44.86279, -67.06819 44.99416))"
  message(glue("tab..._shp_within_aoi(dataset_code='{dataset_code}', aoi_wkt='{paste(aoi_wkt, collapse=';')}')"))

  # dataset_code = "cetacean-bia";
  # params <- yaml::yaml.load("
  # title: Testing
  # aoi_wkt:
  # - POLYGON ((-115.9245 32.26236, -115.9245 32.26565, -115.9206 32.26565, -115.9206
  #               32.26236, -115.9245 32.26236))
  # - POLYGON ((-121.9503 33.01519, -121.9503 35.51658, -115.8711 35.51658, -115.8711
  #             33.01519, -121.9503 33.01519))")
  # aoi_wkt <- params$aoi_wkt
  # dataset_code = "oil-gas-wells"
  # aoi_wkt      = "POLYGON ((-157.4273 55.22198, -157.4273 61.76097, -143.1428 61.76097, -143.1428 55.22198, -157.4273 55.22198))"
  # dataset_code='fed-sand-gravel-lease'
  # aoi_wkt='POLYGON ((-175.4932 15.34568, -175.4932 27.93566, -151.813 27.93566, -151.813 15.34568, -175.4932 15.34568))'
  
  # dataset_code='monuments'
  # aoi_wkt='POLYGON ((-180.0668 16.98081, -180.0668 29.87807, -153.4797 29.87807, -153.4797 16.98081, -180.0668 16.98081))'
  
  if (is.null(aoi_wkt))
    return("Please draw a Location to get a summary of the intersecting features for this dataset.")

  # if (length(aoi_wkt) > 1)
  #   return("Please draw only ONE polygon to get a summary of the intersecting features for this dataset.")
    
  ds <- tbl(con, "datasets") %>% 
    filter(code == !!dataset_code) %>% 
    replace_na(list(buffer_nm = 0)) %>% 
    collect()
  
  if (length(aoi_wkt) > 1){
    aoi_wkts <- glue("'SRID=4326;{aoi_wkt}'::geometry")
    #aoi_sql  <- glue("ST_COLLECT(\n{paste(aoi_wkts, collapse=',\n')})")
    aoi_sql  <- glue("ST_COLLECT(\n{paste(aoi_wkts, collapse=',\n')})")
    # cat(aoi_sql)
  } else {
    aoi_sql <- glue("'SRID=4326;{aoi_wkt}'")
    # aoi_sql <- glue("'SRID=4326;{aoi_wkt[2]}'")
  }

  
  dbSendQuery(con, glue("DROP TABLE IF EXISTS tmp_aoi CASCADE;"))
  if (!is.na(ds$summarize_sql)){
    sql_intersection <- glue("
      {ds$select_sql} AS ds
      WHERE ST_DWithin(Geography(ds.geometry), {aoi_sql}, {ds$buffer_nm} * 1852);")
    dbExecute(con, glue("CREATE TEMPORARY TABLE tmp_aoi AS {sql_intersection};"))
    # sql_summarize <- "SELECT sitename_l AS Species, string_agg(lifestage, ', ') AS Lifestage FROM tmp_aoi GROUP BY sitename_l"
    # sql_summarize <- "SELECT * FROM tmp_aoi"
    #cat(ds$summarize_sql)
    x_df <- dbGetQuery(con, ds$summarize_sql)
  } else {
    x_sql <- glue("
      {ds$select_sql} AS ds
      WHERE ST_DWithin(Geography(ds.geometry), {aoi_sql}, {ds$buffer_nm} * 1852);")
    x_sf <- st_read(con, query = x_sql)
    x_df <- st_drop_geometry(x_sf)
    
    if (!is.na(ds$summarize_r))
      eval(parse(text=ds$summarize_r))
  }
  
  if (output == "tibble"){
    return(x_df)
  }
  
  x_spatial <- ifelse(
    ds$buffer_nm == 0,
    glue("\n\nSpatial: within site", .trim = F),
    glue("\n\nSpatial: within {ds$buffer_nm} nautical miles of site", .trim = F))

  if (knitr::is_html_output()){
    x_caption <- HTML(markdownToHTML(
      text = glue("Source: [{ds$src_name}]({ds$src_url}){x_spatial}"),
      fragment.only = T))
    
    tbl <- x_df %>% 
      kbl(caption = x_caption) %>%
      kable_styling(
        # full_width = F, position = "left", # position = "float_right"
        bootstrap_options = c("striped", "hover", "condensed", "responsive"))
    
  } else {
    x_caption <- glue("Source: [{ds$src_name}]({ds$src_url}){x_spatial}")
    
    tbl <- x_df %>% 
      kable(caption = x_caption, format = "pipe")
  }
  
  tbl
}

5 UPDATED tabulate_dataset_shp_within_aoi

tabulate_dataset_shp_within_aoi <- function(dataset_code, aoi_wkt, output = "kable"){
  # summarize shapefile dataset from area of interest

  message(glue("tab..._shp_within_aoi(dataset_code='{dataset_code}', aoi_wkt='{paste(aoi_wkt, collapse=';')}')"))

  if (is.null(aoi_wkt))
    return("Please draw a Location to get a summary of the intersecting features for this dataset.")

  ds <- tbl(con, "datasets") %>% 
    filter(code == !!dataset_code) %>% 
    replace_na(list(buffer_nm = 0)) %>% 
    collect()
  
  if (length(aoi_wkt) > 1){
    aoi_wkts <- glue("'SRID=4326;{aoi_wkt}'::geometry")
    aoi_sql  <- glue("ST_COLLECT(\n{paste(aoi_wkts, collapse=',\n')})") # Is this recreating the ST_COLLECT statement
                                                                        # for every item in <aoi_wkt> array?
  } else {
    # aoi_sql <- glue("'SRID=4326;{aoi_wkt}'")
    aoi_sql <- glue("'SRID=4326;{aoi_wkt}'::geometry")
  }

  # Use CTE instead of temporary tables
  # TODO
  #    Add conditional to check if ds$summarize_r
  #    Drop geometry column in x_df?
  if (!is.na(ds$sumarize_sql)){
    x_df <- dbGetQuery(con,
              glue("with 
                    tmp_selarea as (
                      select ST_BUFFER({aoi_sql}, {ds$buffer_nm}) as geom
                    )
                    {ds$select_sql} as ds
                    inner join tmp_selarea on ST_INTERSECTS(ds.geometry, tmp_selarea.geom)
                    "))
  } else {
    x_df <- dbGetQuery(con,
              glue("with 
                    tmp_selarea as (
                      select ST_BUFFER({aoi_sql}, {ds$buffer_nm}) as geom
                    ),
                    tmp_aoi as (
                      {ds$select_sql} as ds
                      inner join tmp_selarea on ST_INTERSECTS(ds.geometry, tmp_selarea.geom)
                    )
                   {ds$summarize_sql}
                   "))
  }
  if (output == "tibble"){
    return(x_df)
  }

  x_spatial <- ifelse(
    ds$buffer_nm == 0,
    glue("\n\nSpatial: within site", .trim = F),
    glue("\n\nSpatial: within {ds$buffer_nm} nautical miles of site", .trim = F))

  if (knitr::is_html_output()){
    x_caption <- HTML(markdownToHTML(
      text = glue("Source: [{ds$src_name}]({ds$src_url}){x_spatial}"),
      fragment.only = T))
    
    tbl <- x_df %>% 
      kbl(caption = x_caption) %>%
      kable_styling(
        # full_width = F, position = "left", # position = "float_right"
        bootstrap_options = c("striped", "hover", "condensed", "responsive"))
    
  } else {
    x_caption <- glue("Source: [{ds$src_name}]({ds$src_url}){x_spatial}")
    
    tbl <- x_df %>% 
      kable(caption = x_caption, format = "pipe")
  }
  
  tbl
}