Skip to contents

Load twitterAnalsis package

library(twitterAnalysis)
#> Loading required package: dplyr
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> Loading required package: stopwords
#> Loading required package: tm
#> Loading required package: NLP
#> 
#> Attaching package: 'tm'
#> The following object is masked from 'package:stopwords':
#> 
#>     stopwords
#> Loading required package: wordcloud
#> Loading required package: RColorBrewer
#> Loading required package: twitteR
#> 
#> Attaching package: 'twitteR'
#> The following objects are masked from 'package:dplyr':
#> 
#>     id, location
#> Loading required package: syuzhet

Obtain Twitter API credentials

From https://developer.twitter.com/ obtain API access credentials.

consumer_key <- 'xxx'
consumer_secret <- 'xxx'
access_token <- 'xxx'
token_secret <- 'xxx'

Quick use example

The functions of the package are all meant to be used in tandem with each other.

Step 1: Setup Twitter API credentials

api_user <- user_info(consumer_key, consumer_secret, access_token, token_secret)

Step 2: get 30 most recent tweets of a user (for example, @elonmusk)

elon_df <- load_twitter_by_user('elonmusk', 100, api_user)
#> [1] "Using direct authentication"
head(elon_df)
#>   screenName                  id             created favoriteCount retweetCount
#> 1   elonmusk 1621886041245237253 2023-02-04 14:59:00          2242           84
#> 2   elonmusk 1621881994375299073 2023-02-04 14:42:55          3146          142
#> 3   elonmusk 1621881617978429440 2023-02-04 14:41:25          1444           87
#> 4   elonmusk 1621793704175362050 2023-02-04 08:52:05         13767          426
#> 5   elonmusk 1621719831249567744 2023-02-04 03:58:32          6621          301
#> 6   elonmusk 1621719712429129728 2023-02-04 03:58:04         23223          653
#>                                    text
#> 1                           @cb_doge 🤣
#> 2                    @d4t4wr4ngl3r 🤣🤣
#> 3 @BillyM2k @cb_doge 🤣 it’s a fair cop
#> 4                      @TheJackForge 🤣
#> 5                               @tobi !
#> 6                        @TrungTPhan 🤣

Step 3: Preform pre-processing

elon_clean_df <- generalPreprocessing(elon_df)
head(elon_clean_df)
#>   screenName                  id             created favoriteCount retweetCount
#> 1   elonmusk 1621886041245237253 2023-02-04 14:59:00          2242           84
#> 2   elonmusk 1621881994375299073 2023-02-04 14:42:55          3146          142
#> 3   elonmusk 1621881617978429440 2023-02-04 14:41:25          1444           87
#> 4   elonmusk 1621793704175362050 2023-02-04 08:52:05         13767          426
#> 5   elonmusk 1621719831249567744 2023-02-04 03:58:32          6621          301
#> 6   elonmusk 1621719712429129728 2023-02-04 03:58:04         23223          653
#>                                    text           text_clean
#> 1                           @cb_doge 🤣                   🤣
#> 2                    @d4t4wr4ngl3r 🤣🤣                 🤣🤣
#> 3 @BillyM2k @cb_doge 🤣 it’s a fair cop   🤣 it’s a fair cop
#> 4                      @TheJackForge 🤣                   🤣
#> 5                               @tobi !                    !
#> 6                        @TrungTPhan 🤣                   🤣

Step 4: Preform Sentiment Analysis

elon_sentiment_df <- sentiment_labeler(elon_clean_df, 'text')
#> Warning: `spread_()` was deprecated in tidyr 1.2.0.
#>  Please use `spread()` instead.
#>  The deprecated feature was likely used in the syuzhet package.
#>   Please report the issue to the authors.
head(elon_sentiment_df)
#>   screenName                  id             created favoriteCount retweetCount
#> 1   elonmusk 1621886041245237253 2023-02-04 14:59:00          2242           84
#> 2   elonmusk 1621881994375299073 2023-02-04 14:42:55          3146          142
#> 3   elonmusk 1621881617978429440 2023-02-04 14:41:25          1444           87
#> 4   elonmusk 1621793704175362050 2023-02-04 08:52:05         13767          426
#> 5   elonmusk 1621719831249567744 2023-02-04 03:58:32          6621          301
#> 6   elonmusk 1621719712429129728 2023-02-04 03:58:04         23223          653
#>                                    text           text_clean sentiment
#> 1                           @cb_doge 🤣                   🤣   neutral
#> 2                    @d4t4wr4ngl3r 🤣🤣                 🤣🤣   neutral
#> 3 @BillyM2k @cb_doge 🤣 it’s a fair cop   🤣 it’s a fair cop  positive
#> 4                      @TheJackForge 🤣                   🤣   neutral
#> 5                               @tobi !                    !   neutral
#> 6                        @TrungTPhan 🤣                   🤣   neutral

Step 5: Generate a WordCloud

elon_plt_clean_df <- clean_tweets(elon_sentiment_df)
elon_count_df <- count_words(elon_plt_clean_df)
plt <- create_wordcloud(elon_count_df)

Optional: We can also determine the proportion of neutral, positive, and netaive tweets with count_tweets().

count_tweets(elon_sentiment_df)
#> $negative
#> [1] 0.1041667
#> 
#> $neutral
#> [1] 0.6458333
#> 
#> $positive
#> [1] 0.25