
#install.packages('janeaustenr', repos = "http://cran.us.r-project.org")
library(janeaustenr)
library(magrittr)
library(dplyr)
library(tidytext)

# n-grams are discussed in chapter 5
paste0("Bigrams:",collapse=" ")
austen_bigrams <- austen_books() %>%
  unnest_tokens(bigram, text, token = "ngrams", n = 2)
austen_bigrams

paste0("Count bigrams:",collapse=" ")
austen_bigrams %>%
  count(bigram, sort = TRUE)

library(tidyr)
bigrams_separated <- austen_bigrams %>%
  separate(bigram, c("word1", "word2"), sep = " ")

bigrams_filtered <- bigrams_separated %>%
  filter(!word1 %in% stop_words$word) %>%
  filter(!word2 %in% stop_words$word)

paste0("Filtered by street:",collapse=" ")
bigrams_filtered %>%
  filter(word2 == "street") %>%
  count(book, word1, sort = TRUE)

