Introduction to clevercloudr

Overview

The goal of clevercloudr is to to serve as a solution to create meaningful word clouds. This library will help data scientists and data analysts clean the data easily by providing functions to clean raw text data, conduct stemming and customize stopwords.

Load the library

library(clevercloudr)

Data

The first function CleverClean() takes in a list of strings as an input.

text <- list("grounds!!!", "feet6", "running123", "feeding", "feed", "feed$", "grounding", "feet", "happiness")
text
#> [[1]]
#> [1] "grounds!!!"
#> 
#> [[2]]
#> [1] "feet6"
#> 
#> [[3]]
#> [1] "running123"
#> 
#> [[4]]
#> [1] "feeding"
#> 
#> [[5]]
#> [1] "feed"
#> 
#> [[6]]
#> [1] "feed$"
#> 
#> [[7]]
#> [1] "grounding"
#> 
#> [[8]]
#> [1] "feet"
#> 
#> [[9]]
#> [1] "happiness"

Clean text data with `CleverClean()`

CleverClean() takes in a list of strings as an input. It removes digits and puncations in the strings and returns a character vector.

clean_text <- CleverClean(text)
clean_text
#> [1] "grounds"   "feet"      "running"   "feeding"   "feed"      "feed"     
#> [7] "grounding" "feet"      "happiness"

Perform stemming with `CleverStemmer()`

CleverStemmer() takes in a character vector or a string as an input. It performs stemming on each element of the character vector or each word in the string.

stem_text <- CleverStemmer(clean_text)
stem_text
#> [1] "ground" "feet"   "run"    "feed"   "feed"   "feed"   "ground" "feet"  
#> [9] "happi"

Add customized English stopwords with `CleverStopwords()`

CleverStopwords() takes a list of strings as an input. It will add each string in the input list to a list of most common English stopwords.

new_words <- list("happi")
new_stopwords <- CleverStopwords(new_words)
new_stopwords
#> [[1]]
#> [1] "a"
#> 
#> [[2]]
#> [1] "about"
#> 
#> [[3]]
#> [1] "above"
#> 
#> [[4]]
#> [1] "after"
#> 
#> [[5]]
#> [1] "again"
#> 
#> [[6]]
#> [1] "against"
#> 
#> [[7]]
#> [1] "all"
#> 
#> [[8]]
#> [1] "am"
#> 
#> [[9]]
#> [1] "an"
#> 
#> [[10]]
#> [1] "and"
#> 
#> [[11]]
#> [1] "any"
#> 
#> [[12]]
#> [1] "are"
#> 
#> [[13]]
#> [1] "aren't"
#> 
#> [[14]]
#> [1] "as"
#> 
#> [[15]]
#> [1] "at"
#> 
#> [[16]]
#> [1] "be"
#> 
#> [[17]]
#> [1] "because"
#> 
#> [[18]]
#> [1] "been"
#> 
#> [[19]]
#> [1] "before"
#> 
#> [[20]]
#> [1] "being"
#> 
#> [[21]]
#> [1] "below"
#> 
#> [[22]]
#> [1] "between"
#> 
#> [[23]]
#> [1] "both"
#> 
#> [[24]]
#> [1] "but"
#> 
#> [[25]]
#> [1] "by"
#> 
#> [[26]]
#> [1] "can't"
#> 
#> [[27]]
#> [1] "cannot"
#> 
#> [[28]]
#> [1] "could"
#> 
#> [[29]]
#> [1] "couldn't"
#> 
#> [[30]]
#> [1] "did"
#> 
#> [[31]]
#> [1] "didn't"
#> 
#> [[32]]
#> [1] "do"
#> 
#> [[33]]
#> [1] "does"
#> 
#> [[34]]
#> [1] "doesn't"
#> 
#> [[35]]
#> [1] "doing"
#> 
#> [[36]]
#> [1] "don't"
#> 
#> [[37]]
#> [1] "down"
#> 
#> [[38]]
#> [1] "during"
#> 
#> [[39]]
#> [1] "each"
#> 
#> [[40]]
#> [1] "few"
#> 
#> [[41]]
#> [1] "for"
#> 
#> [[42]]
#> [1] "from"
#> 
#> [[43]]
#> [1] "further"
#> 
#> [[44]]
#> [1] "had"
#> 
#> [[45]]
#> [1] "hadn't"
#> 
#> [[46]]
#> [1] "happi"
#> 
#> [[47]]
#> [1] "has"
#> 
#> [[48]]
#> [1] "hasn't"
#> 
#> [[49]]
#> [1] "have"
#> 
#> [[50]]
#> [1] "haven't"
#> 
#> [[51]]
#> [1] "having"
#> 
#> [[52]]
#> [1] "he"
#> 
#> [[53]]
#> [1] "he'd"
#> 
#> [[54]]
#> [1] "he'll"
#> 
#> [[55]]
#> [1] "he's"
#> 
#> [[56]]
#> [1] "her"
#> 
#> [[57]]
#> [1] "here"
#> 
#> [[58]]
#> [1] "here's"
#> 
#> [[59]]
#> [1] "hers"
#> 
#> [[60]]
#> [1] "herself"
#> 
#> [[61]]
#> [1] "him"
#> 
#> [[62]]
#> [1] "himself"
#> 
#> [[63]]
#> [1] "his"
#> 
#> [[64]]
#> [1] "how"
#> 
#> [[65]]
#> [1] "how's"
#> 
#> [[66]]
#> [1] "i"
#> 
#> [[67]]
#> [1] "i'd"
#> 
#> [[68]]
#> [1] "i'll"
#> 
#> [[69]]
#> [1] "i'm"
#> 
#> [[70]]
#> [1] "i've"
#> 
#> [[71]]
#> [1] "if"
#> 
#> [[72]]
#> [1] "in"
#> 
#> [[73]]
#> [1] "into"
#> 
#> [[74]]
#> [1] "is"
#> 
#> [[75]]
#> [1] "isn't"
#> 
#> [[76]]
#> [1] "it"
#> 
#> [[77]]
#> [1] "it's"
#> 
#> [[78]]
#> [1] "its"
#> 
#> [[79]]
#> [1] "itself"
#> 
#> [[80]]
#> [1] "let's"
#> 
#> [[81]]
#> [1] "me"
#> 
#> [[82]]
#> [1] "more"
#> 
#> [[83]]
#> [1] "most"
#> 
#> [[84]]
#> [1] "mustn't"
#> 
#> [[85]]
#> [1] "my"
#> 
#> [[86]]
#> [1] "myself"
#> 
#> [[87]]
#> [1] "no"
#> 
#> [[88]]
#> [1] "nor"
#> 
#> [[89]]
#> [1] "not"
#> 
#> [[90]]
#> [1] "of"
#> 
#> [[91]]
#> [1] "off"
#> 
#> [[92]]
#> [1] "on"
#> 
#> [[93]]
#> [1] "once"
#> 
#> [[94]]
#> [1] "only"
#> 
#> [[95]]
#> [1] "or"
#> 
#> [[96]]
#> [1] "other"
#> 
#> [[97]]
#> [1] "ought"
#> 
#> [[98]]
#> [1] "our"
#> 
#> [[99]]
#> [1] "ours"
#> 
#> [[100]]
#> [1] "ourselves"
#> 
#> [[101]]
#> [1] "out"
#> 
#> [[102]]
#> [1] "over"
#> 
#> [[103]]
#> [1] "own"
#> 
#> [[104]]
#> [1] "same"
#> 
#> [[105]]
#> [1] "shan't"
#> 
#> [[106]]
#> [1] "she"
#> 
#> [[107]]
#> [1] "she'd"
#> 
#> [[108]]
#> [1] "she'll"
#> 
#> [[109]]
#> [1] "she's"
#> 
#> [[110]]
#> [1] "should"
#> 
#> [[111]]
#> [1] "shouldn't"
#> 
#> [[112]]
#> [1] "so"
#> 
#> [[113]]
#> [1] "some"
#> 
#> [[114]]
#> [1] "such"
#> 
#> [[115]]
#> [1] "than"
#> 
#> [[116]]
#> [1] "that"
#> 
#> [[117]]
#> [1] "that's"
#> 
#> [[118]]
#> [1] "the"
#> 
#> [[119]]
#> [1] "their"
#> 
#> [[120]]
#> [1] "theirs"
#> 
#> [[121]]
#> [1] "them"
#> 
#> [[122]]
#> [1] "themselves"
#> 
#> [[123]]
#> [1] "then"
#> 
#> [[124]]
#> [1] "there"
#> 
#> [[125]]
#> [1] "there's"
#> 
#> [[126]]
#> [1] "these"
#> 
#> [[127]]
#> [1] "they"
#> 
#> [[128]]
#> [1] "they'd"
#> 
#> [[129]]
#> [1] "they'll"
#> 
#> [[130]]
#> [1] "they're"
#> 
#> [[131]]
#> [1] "they've"
#> 
#> [[132]]
#> [1] "this"
#> 
#> [[133]]
#> [1] "those"
#> 
#> [[134]]
#> [1] "through"
#> 
#> [[135]]
#> [1] "to"
#> 
#> [[136]]
#> [1] "too"
#> 
#> [[137]]
#> [1] "under"
#> 
#> [[138]]
#> [1] "until"
#> 
#> [[139]]
#> [1] "up"
#> 
#> [[140]]
#> [1] "very"
#> 
#> [[141]]
#> [1] "was"
#> 
#> [[142]]
#> [1] "wasn't"
#> 
#> [[143]]
#> [1] "we"
#> 
#> [[144]]
#> [1] "we'd"
#> 
#> [[145]]
#> [1] "we'll"
#> 
#> [[146]]
#> [1] "we're"
#> 
#> [[147]]
#> [1] "we've"
#> 
#> [[148]]
#> [1] "were"
#> 
#> [[149]]
#> [1] "weren't"
#> 
#> [[150]]
#> [1] "what"
#> 
#> [[151]]
#> [1] "what's"
#> 
#> [[152]]
#> [1] "when"
#> 
#> [[153]]
#> [1] "when's"
#> 
#> [[154]]
#> [1] "where"
#> 
#> [[155]]
#> [1] "where's"
#> 
#> [[156]]
#> [1] "which"
#> 
#> [[157]]
#> [1] "while"
#> 
#> [[158]]
#> [1] "who"
#> 
#> [[159]]
#> [1] "who's"
#> 
#> [[160]]
#> [1] "whom"
#> 
#> [[161]]
#> [1] "why"
#> 
#> [[162]]
#> [1] "why's"
#> 
#> [[163]]
#> [1] "will"
#> 
#> [[164]]
#> [1] "with"
#> 
#> [[165]]
#> [1] "won't"
#> 
#> [[166]]
#> [1] "would"
#> 
#> [[167]]
#> [1] "wouldn't"
#> 
#> [[168]]
#> [1] "you"
#> 
#> [[169]]
#> [1] "you'd"
#> 
#> [[170]]
#> [1] "you'll"
#> 
#> [[171]]
#> [1] "you're"
#> 
#> [[172]]
#> [1] "you've"
#> 
#> [[173]]
#> [1] "your"
#> 
#> [[174]]
#> [1] "yours"
#> 
#> [[175]]
#> [1] "yourself"
#> 
#> [[176]]
#> [1] "yourselves"

Generate the word cloud with preproccessed text and customized stopwords using `CleverWordCloud()`

CleverWordCloud() takes in two arguments, a character vector of words and a list of stopwords. It generates a png object and an html file in the current directory where the function is called. The word cloud is meaningful and reflects accurate frequencies without the influences of different tenses and/or various forms of the same word.

CleverWordCloud(stem_text, new_stopwords)

Overview

Load the library

Data

Clean text data with CleverClean()

Perform stemming with CleverStemmer()

Add customized English stopwords with CleverStopwords()

Generate the word cloud with preproccessed text and customized stopwords using CleverWordCloud()

Clean text data with `CleverClean()`

Perform stemming with `CleverStemmer()`

Add customized English stopwords with `CleverStopwords()`

Generate the word cloud with preproccessed text and customized stopwords using `CleverWordCloud()`