-
-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathSentimentAnalysis
98 lines (81 loc) · 2.1 KB
/
SentimentAnalysis
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Read file
apple <- read.csv(file.choose(), header = T)
str(apple)
# Build corpus
library(tm)
corpus <- iconv(apple$text, to = "utf-8-mac")
corpus <- Corpus(VectorSource(corpus))
inspect(corpus[1:5])
# Clean text
corpus <- tm_map(corpus, tolower)
inspect(corpus[1:5])
corpus <- tm_map(corpus, removePunctuation)
inspect(corpus[1:5])
corpus <- tm_map(corpus, removeNumbers)
inspect(corpus[1:5])
cleanset <- tm_map(corpus, removeWords, stopwords('english'))
inspect(cleanset[1:5])
removeURL <- function(x) gsub('http[[:alnum:]]*', '', x)
cleanset <- tm_map(cleanset, content_transformer(removeURL))
inspect(cleanset[1:5])
cleanset <- tm_map(cleanset, removeWords, c('aapl', 'apple'))
cleanset <- tm_map(cleanset, gsub,
pattern = 'stocks',
replacement = 'stock')
cleanset <- tm_map(cleanset, stripWhitespace)
inspect(cleanset[1:5])
# Term document matrix
tdm <- TermDocumentMatrix(cleanset)
tdm
tdm <- as.matrix(tdm)
tdm[1:10, 1:20]
# Bar plot
w <- rowSums(tdm)
w <- subset(w, w>=25)
barplot(w,
las = 2,
col = rainbow(50))
# Word cloud
library(wordcloud)
w <- sort(rowSums(tdm), decreasing = TRUE)
set.seed(222)
wordcloud(words = names(w),
freq = w,
max.words = 150,
random.order = F,
min.freq = 5,
colors = brewer.pal(8, 'Dark2'),
scale = c(5, 0.3),
rot.per = 0.7)
library(wordcloud2)
w <- data.frame(names(w), w)
colnames(w) <- c('word', 'freq')
wordcloud2(w,
size = 0.7,
shape = 'triangle',
rotateRatio = 0.5,
minSize = 1)
letterCloud(w,
word = "apple",
size=1)
# Sentiment analysis
library(syuzhet)
library(lubridate)
library(ggplot2)
library(scales)
library(reshape2)
library(dplyr)
# Read file
apple <- read.csv(file.choose(), header = T)
tweets <- iconv(apple$text, to = 'utf-8-mac')
# Obtain sentiment scores
s <- get_nrc_sentiment(tweets)
head(s)
tweets[4]
get_nrc_sentiment('delay')
# Bar plot
barplot(colSums(s),
las = 2,
col = rainbow(10),
ylab = 'Count',
main = 'Sentiment Scores for Apple Tweets')