Management Measures

library(rvest)
library(readr)
library(dplyr)
library(DT)
library(here)

url <- "https://tethys.pnnl.gov/management-measures"
measures_csv <- here("data/management-measures.csv")

d <- read_html(url) %>% 
  html_table() %>% 
  .[[1]]

d %>% 
  filter(Technology == "Tidal") %>% 
  datatable()
table(d$Technology)
## 
## Tidal  Wave 
##   380   362
table(d$Stressor)
## 
##                   Airborne noise              Barrier to movement 
##                                8                               12 
##     Changes in sediment dynamics            Changes in water flow 
##                               58                                4 
##                   Collision risk                    Contamination 
##                               18                              168 
##                     Displacement       Dissipation of wave energy 
##                                6                                2 
##         EMF and thermal pressure                     Entanglement 
##                               56                               60 
##                       Entrapment                 Habitat creation 
##                                4                               16 
##                         Lighting                  Loss of habitat 
##                                6                               32 
##           Loss of seabed habitat Marine Non-Native Species (MNNS) 
##                                4                              214 
##                 Underwater noise                 Vessel collision 
##                               16                                6 
##               Vessel disturbance 
##                               52
table(d$Receptor)
## 
##        Benthic          Birds           Fish        Habitat Marine Mammals 
##            124            136            158             82            150 
##       Reptiles 
##             92
table(d$`Specific Receptor`)
## 
##                    All receptors                    Basking shark 
##                              452                                2 
##        Basking shark; Large fish Basking shark; Whale shark; Fish 
##                               18                                2 
##                          Benthic            Benthic invertebrates 
##                                2                               46 
##                  Benthic species                            Birds 
##                                4                                2 
##                   Birds on water                        Cetaceans 
##                               18                               24 
##                    Demersal fish                     Diving birds 
##                               28                               16 
##                    Elasmobranchs                             Fish 
##                                4                               16 
##      Fish, Benthic invertebrates               Intertidal ecology 
##                                6                                4 
##                       Large fish                   Marine Mammals 
##                                8                               38 
##                   Migratory fish    Migratory fish; Elasmobranchs 
##                                8                                2 
##                   Roosting birds                      Sea turtles 
##                                2                               12 
##                         Seabirds                            Seals 
##                               14                                6 
##                 Seals; Cetaceans              Shore-nesting birds 
##                                2                                6
write_csv(d, measures_csv)
datatable(d)

Simple one page result

url <- "https://tethys.pnnl.gov/knowledge-base-marine-energy?f[0]=receptor:496&f[1]=technology:428"

d <- read_html(url) %>% 
  html_table() %>% 
  .[[1]]

datatable(d)

get tags

library(rvest)
library(here)
library(DT)
library(zeallot)
library(stringr)
library(dplyr)
library(tibble)
library(purrr)

# helper functions ----
get_facet_items <- function(facet_name){
  # facet_name = "technology"
  item_nodes <- facet_nodes[[which(facet_names == facet_name)]] %>% 
    html_nodes(".facet-item")
  
  map_df(item_nodes, get_facet_item)
  
}

get_facet_item <- function(item_node){
  # item_node <- item_nodes[[1]]
  
  # keys <- c("category" = 1, "id" = 2)
  # stopifnot(key %in% names(keys))
  
  label <- item_node  %>%
    html_node("a span.facet-item__value") %>% 
    html_text()

  keys <- item_node  %>%
    html_node("a") %>%
    html_attr("data-drupal-facet-item-id") %>% 
    str_split("-") %>% 
    .[[1]]
  
  tibble(facet = keys[1], item_id = keys[2], item_label = label)
}

# variables ----
url      <- "https://tethys.pnnl.gov/knowledge-base-marine-energy"
tags_csv <- here("data/tags.csv")

# scrape html ----
html <- read_html(url)

facet_nodes <- html_nodes(html, ".js-facets-checkbox-links")
facet_names <- html_attr(facet_nodes, "data-drupal-facet-alias")

# explore
#html_structure(facet_nodes[[1]])
#html_text(facet_nodes[[1]]) %>% cat()
#as_list(facet_nodes[[1]])

d <- tibble(facet = facet_names) %>% 
  group_by(facet) %>%
  do(get_facet_items(.$facet)) %>% 
  ungroup()

write_csv(d, tags_csv)

datatable(d)

table of receptors x stressors

# receptor <- 
#   filter(d, facet == "receptor") %>% pull(item_label)
# stressor <- 
#   filter(d, facet == "stressor") %>% pull(item_label)
# 
# s_r_m <- matrix(
#   NA, 
#   nrow = length(receptor), ncol = length(stressor),
#   dimnames = list(receptor, stressor)) %>% 
#   as_tibble(rownames = "receptor")
# 
# datatable(s_r_m)

library(readr)
library(here)
library(tidyr)
library(glue)
library(htmltools)

s_r_redo <- T
s_r_csv <- here("data/tethys_stressor_receptor.csv")

receptor <- 
  filter(d, facet == "receptor") %>% pull(item_label) %>% unique() %>% sort()
stressor <- 
  filter(d, facet == "stressor") %>% pull(item_label)

tethys_pfx <- "https://tethys.pnnl.gov/knowledge-base-marine-energy"


get_num_refs <- function(url){
  # url = "https://tethys.pnnl.gov/knowledge-base-marine-energy?f[0]=receptor:280&f[1]=stressor:355"
  
  #if (url == "https://tethys.pnnl.gov/knowledge-base-marine-energy?f[0]=receptor:284&f[1]=stressor:531") browser()
  message(glue("url: {url}"))
  
  tbls <- read_html(url) %>% 
    html_table() 
  
  if (length(tbls) == 0){
    num_refs <- 0 %>% as.integer()
  } else{
    num_refs <- nrow(tbls[[1]])
  }

  num_refs
}

  
if (!file.exists(s_r_csv) | s_r_redo){
  
  s_r <- expand_grid(receptor, stressor) %>% 
    left_join(
      d %>% 
        filter(facet == "receptor") %>% 
        select(receptor = item_label, receptor_id = item_id),
      by = "receptor") %>% 
    left_join(
      d %>% 
        filter(facet == "stressor") %>% 
        select(stressor = item_label, stressor_id = item_id),
      by = "stressor") %>% 
    mutate(
      url      = glue("{tethys_pfx}?f[0]=receptor:{receptor_id}&f[1]=stressor:{stressor_id}"),
      num_refs = map_int(url, get_num_refs),
      link     = glue(
        "<a href='{url}'>{receptor} x {stressor} ({num_refs})</a>"))
  
  write_csv(s_r, s_r_csv)
}
s_r <- read_csv(s_r_csv)

s_r %>% 
  mutate(
    link     = 
      if_else(
        num_refs > 0,
        glue("<a href='{url}'>{if_else(num_refs == 50, '50+', as.character(num_refs))}</a>"),
        "0")) %>% 
  select(stressor, receptor, link) %>% 
  pivot_wider(names_from = "stressor", values_from = link) %>% 
  datatable(escape = F)
## multi-page result

url <- "https://tethys.pnnl.gov/knowledge-base-marine-energy?f[0]=technology:428"


# TODO: 
read_html(url) %>% 
  html_nodes(":contains(pager)")

get_tethys_refs <- function(url){
  read_html(url) %>% 
    html_table() %>% 
    .[[1]]
}

# https://tethys.pnnl.gov/knowledge-base-marine-energy?f[0]=technology:428&search=&page=3

base_url <- "https://tethys.pnnl.gov/knowledge-base-marine-energy"
tags <- c("receptor:496", "technology:428")


while (!last_page){
  tbl_pg <- get_tethys_refs()
  tbl <- bind_rows(tbl, tbl_pg)
  ck_last_page()
}


datatable(d)