-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path03_word_cloud_and_frequency.R
More file actions
85 lines (64 loc) · 2 KB
/
03_word_cloud_and_frequency.R
File metadata and controls
85 lines (64 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
library(tidyverse)
library(tidytext)
library(RmecabKo)
library(stringr)
library(ggplot2)
songListLyrics <- read.csv("melon_ranking_lyrics_1964-2016.csv", stringsAsFactors = FALSE)
songListLyrics <- songListLyrics %>% unnest_tokens(word, lyric, token = token_words)
songListLyrics %>% anti_join(stopwords_ko) %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
head(20)
# wordcloud
library(wordcloud)
songListLyrics %>% anti_join(stopwords_ko) %>%
anti_join(stop_words) %>%
count(word) %>%
with(wordcloud(word, n, max.words = 100, family = "Apple SD Gothic Neo"))
songListLyrics %>% anti_join(stopwords_ko) %>%
anti_join(stop_words) %>%
count(word, sort = TRUE) %>%
head(20) %>%
ggplot(aes(x = word, y = n)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(family = "Apple SD Gothic Neo"))
# Words in Decade
songListLyrics <- songListLyrics %>%
na.omit() %>%
mutate(decade = floor(year / 10) * 10)
lyricsWords <- songListLyrics %>%
unnest_tokens(word, lyric, token = token_words)
lyricsWords %>%
count(word, sort = TRUE)
wordsByDecade <- lyricsWords %>%
count(decade, word, sort = TRUE) %>%
ungroup()
wordsByDecade
tfIdf <- wordsByDecade %>%
bind_tf_idf(word, decade, n) %>%
arrange(desc(tf_idf))
tfIdf
tfIdf %>%
group_by(decade) %>%
top_n(12, tf_idf) %>%
ungroup() %>%
mutate(word = reorder(word, tf_idf)) %>%
ggplot(aes(word, tf_idf, fill = decade)) +
geom_col(show.legend = FALSE) +
facet_wrap(~ decade, scales = "free") +
ylab("tf-idf") +
coord_flip() +
theme(axis.text.y = element_text(family = "Apple SD Gothic Neo"))
library(widyr)
decadeCors <- wordsByDecade %>%
pairwise_cor(decade, word, n, sort = TRUE)
decadeCors
library(ggraph)
library(igraph)
decadeCors %>%
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(alpha = correlation, width = correlation)) +
geom_node_point(size = 6, color = "lightblue") +
geom_node_text(aes(label = name), repel = TRUE) +
theme_void()