R/rtweetclean.R
clean_df.Rd
Returns a new dataframe containing additional columns that were not in the original Generatable columns include... text_only: strips emojis, hashtags, and hyperlinks from the text column word_count: counts the number of words contained in the text_only column emojis: contains the extracted emojis from text proportion_of_avg_retweets: a proportion value of how many retweets a tweet received compared to the account average proportion_of_avg_favorites: a proportion value of how many favorites a tweet received compared to the account average
clean_df( raw_tweets_df, text_only = TRUE, word_count = TRUE, emojis = TRUE, proportion_of_avg_retweets = TRUE, proportion_of_avg_favorites = TRUE )
raw_tweets_df | dataframe |
---|---|
text_only | bool |
word_count | bool |
emojis | bool |
proportion_of_avg_retweets | bool |
proportion_of_avg_favorites | bool |
dataframe
text <- c("example tweet text 1 @user2 @user", "#example #tweet 2 ", "example tweet 3 https://t.co/G4ziCaPond", "example tweet 4") retweet_count <- c(43, 12, 24, 29) favorite_count <- c(85, 41, 65, 54) raw_df <- data.frame(text, retweet_count, favorite_count) clean_df(raw_df)#> text retweet_count favorite_count #> 1 example tweet text 1 @user2 @user 43 85 #> 2 #example #tweet 2 12 41 #> 3 example tweet 3 https://t.co/G4ziCaPond 24 65 #> 4 example tweet 4 29 54 #> text_only word_count emojis prptn_rts_vs_avg #> 1 example tweet text 1 4 1.5925926 #> 2 2 1 0.4444444 #> 3 example tweet 3 3 0.8888889 #> 4 example tweet 4 3 1.0740741 #> prptn_favorites_vs_avg #> 1 1.3877551 #> 2 0.6693878 #> 3 1.0612245 #> 4 0.8816327clean_df(raw_df, emojis = FALSE)#> text retweet_count favorite_count #> 1 example tweet text 1 @user2 @user 43 85 #> 2 #example #tweet 2 12 41 #> 3 example tweet 3 https://t.co/G4ziCaPond 24 65 #> 4 example tweet 4 29 54 #> text_only word_count prptn_rts_vs_avg prptn_favorites_vs_avg #> 1 example tweet text 1 4 1.5925926 1.3877551 #> 2 2 1 0.4444444 0.6693878 #> 3 example tweet 3 3 0.8888889 1.0612245 #> 4 example tweet 4 3 1.0740741 0.8816327