Source code for pytextprep.generate_cloud

import matplotlib.pyplot as plt
from pytextprep.extract_hashtags import extract_hashtags
from wordcloud import WordCloud
import nltk
from nltk.corpus import stopwords

[docs]def generate_cloud(tweets, type="words"): """Creates a word cloud of the most frequently occurring words in a group of tweets Parameters ---------- tweets : list List of tweets. type : {"words", "hashtag", "stopwords"}, default="words" Type of content to show in wordcloud. "words" shows all words, "hashtag" only shows hashtags, and "stopwords" does not show common English words. Returns ------- matplotlib.figure.Figure Word cloud of most frequently occurring words Examples -------- >>> from pytextprep.generate_cloud import generate_cloud >>> import matplotlib.pyplot as plt >>> tweets = [ "Make America Great Again! @DonaldTrump #America", "It's rocket-science tier investment~~ #LoveElonMusk", "America America America #USA #USA" ] >>> fig, wc = generate_cloud(tweets) >>> plt.show() """ nltk.download("stopwords") # Check input argument tweets if not isinstance(tweets, list): raise TypeError("Argument tweets should be of type list.") # Check length of argument tweets if len(tweets) < 1: raise ValueError("Make sure argument tweets contains at least one message") # Check that argument tweets only contains strings if not all(isinstance(_, str) for _ in tweets): raise ValueError("Make sure argument tweets only contains strings") # Check input argument type if type not in {"words", "hashtag", "stopwords"}: raise ValueError("Make sure the argument type is one of the accepted values") if type == "words": text = (" ".join(tweets)).lower() wordcloud = WordCloud( max_words=50, background_color="white", stopwords={} ).generate(text) elif type == "hashtag": tweets = extract_hashtags(tweets) text = (" ".join(tweets)).lower() wordcloud = WordCloud( max_words=50, background_color="white", stopwords={} ).generate(text) else: text = (" ".join(tweets)).lower() text = " ".join([_ for _ in text.split() if _ not in stopwords.words("english")]) wordcloud = WordCloud( max_words=50, background_color="white" ).generate(text) # Plot word cloud fig = plt.figure() plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off") return fig, wordcloud