textprepr-vignette.Rmd
library(textprepr)
tweets <- c("Make America Great Again! @DonaldTrump #America",
"It's rocket-science tier investment~~ #LoveElonMusk",
"America America America always GREAT",
"make america great again! #AMERICA")
remove_punct(tweets)
#> [1] "Make America Great Again DonaldTrump America"
#> [2] "Its rocketscience tier investment LoveElonMusk"
#> [3] "America America America always GREAT"
#> [4] "make america great again AMERICA"
remove_punct(tweets, skip="@#'")
#> [1] "Make America Great Again @DonaldTrump #America"
#> [2] "It's rocketscience tier investment #LoveElonMusk"
#> [3] "America America America always GREAT"
#> [4] "make america great again #AMERICA"
extract_ngram(tweets, n=4)
#> [1] "Make America Great Again!"
#> [2] "America Great Again! @DonaldTrump"
#> [3] "Great Again! @DonaldTrump #America"
#> [4] "Again! @DonaldTrump #America It's"
#> [5] "@DonaldTrump #America It's rocket-science"
#> [6] "#America It's rocket-science tier"
#> [7] "It's rocket-science tier investment~~"
#> [8] "rocket-science tier investment~~ #LoveElonMusk"
#> [9] "tier investment~~ #LoveElonMusk America"
#> [10] "investment~~ #LoveElonMusk America America"
#> [11] "#LoveElonMusk America America America"
#> [12] "America America America always"
#> [13] "America America always GREAT"
#> [14] "America always GREAT make"
#> [15] "always GREAT make america"
#> [16] "GREAT make america great"
#> [17] "make america great again!"
#> [18] "america great again! #AMERICA"
extract_hashtags(tweets)
#> [1] "America" "LoveElonMusk" "AMERICA"
tweet_count <- generate_cloud(tweets, type="words")
tweet_count <- generate_cloud(tweets, type="hashtag")
tweet_count <- generate_cloud(tweets, type="stopwords")