Pontifications
library(tidyverse)
library(stringr)
library(tokenizers)
library(tidytext)
library(textclean)
library(hunspell)
my_hunspell_stem <- function(token) {
stem_token <- hunspell_stem(token)[[1]]
if (length(stem_token) == 0) return(token) else return(stem_token[1])
}
vec_hunspell_stem <- Vectorize(my_hunspell_stem, "token")
# start of function
tidy_count <- function(csv_url) {
first3weeks =
read_csv(csv_url)
first3weeks_title_content_concatenated <-
first3weeks %>%
unite(text, title, content, sep = " ")
first3weeks_title_content_concatenated <-
first3weeks_title_content_concatenated %>%
mutate(text = replace_html(text))
tidy_first3weeks <-
first3weeks_title_content_concatenated %>%
unnest_tokens(word, text) %>%
mutate(word = vec_hunspell_stem(word))
data(stop_words)
tidy_first3weeks <-
tidy_first3weeks %>%
anti_join(stop_words)
df <-
tidy_first3weeks %>%
count(word, sort = TRUE)
return (df)
}
# end of function
ff62_count <-
tidy_count(
"https://raw.githubusercontent.com/rtanglao/rt-kitsune-api/master/created21october2018-ff62-5-25september-2018-questions-id-content-created-product-tags-topic-firefoxversion.csv"
)
ff62_count <-
ff62_count %>%
filter(n > 100) %>%
filter(!str_detect(
word,
"firefox|1|2|3|4|5|mozilla|7|8|9|ff|https|browser|computer"
)) %>%
mutate(word = reorder(word, n))
ff62_count <-
ff62_count %>%
mutate(firefoxversion = 62)
# start of Firefox 60
ff60_count <- tidy_count(
"https://raw.githubusercontent.com/rtanglao/rt-kitsune-api/master/created21october2018-ff60-9-29may2018-questions-id-content-created-product-tags-topic-firefoxversion.csv")
ff60_count <-
ff60_count %>%
filter(n > 100) %>%
filter(!str_detect(
word,
"firefox|1|2|3|4|5|6|mozilla|7|8|9|ff|https|browser|computer"
)) %>%
mutate(word = reorder(word, n))
ff60_count <-
ff60_count %>%
mutate(firefoxversion = 60)
# end of Firefox 60
# start of Firefox 61
ff61_count <-
tidy_count(
"https://raw.githubusercontent.com/rtanglao/rt-kitsune-api/master/created21october2018-ff61-26june-16july-2018-questions-id-content-created-product-tags-topic-firefoxversion.csv")
ff61_count <-
ff61_count %>%
filter(n > 100) %>%
filter(!str_detect(
word,
"firefox|1|2|3|4|5|mozilla|7|8|9|ff|https|browser|computer"
)) %>%
mutate(word = reorder(word, n))
ff61_count <-
ff61_count %>%
mutate(firefoxversion = 61)
# end of Firefox 61
ff60_61_62 <-
ff60_count %>%
bind_rows(ff61_count) %>%
bind_rows(ff62_count) %>%
mutate(word = reorder(word, n))
ggplot(ff60_61_62, aes(word, n)) +
geom_col() +
xlab(NULL) +
coord_flip() +
facet_wrap(~ firefoxversion)
Output
Leave a comment on github