Source code for pytextprep.extract_ngram

[docs]def extract_ngram(tweets, n): """Extracts n-grams from the input tweet data Parameters ---------- tweets : array_like List of tweets n : int Length of n-grams to be created Returns ------- list List of n-grams generated Examples -------- >>> tweets_list =[ "Make America Great Again DonaldTrump", ] >>> extract_ngram(tweets=tweets_list, n=3) [ "Make America Great", "America Great Again", "Great Again DonaldTrump" ] """ # Check for correct input type if not isinstance(tweets, list): raise TypeError("'tweets' should be of type 'list'.") # Convert array like input to string s = " ".join(tweets) # Break tweets into individual words words = [word for word in s.split(" ") if word != ""] # Use the zip function to generate n-grams ngrams = zip(*[words[i:] for i in range(n)]) # Concatenate and return ngrams return [" ".join(ngram) for ngram in ngrams]