remove special charac - Posts

from nltk.tokenize
import word_tokenize,
sent_tokenize
from nltk.corpus import
stopwords
from collections import
Counter
import
matplotlib.pyplot as plt
from wordcloud import
WordCloud
text = """
Consider any text
paragraph. Remove the
stopwords. Tokenize the
paragraph to extract
words and sentences.
Calculate the word
frequency distribution
and plot the
frequencies. Plot the
wordcloud of the text.
"""
words =
word_tokenize(text)
sentences =
sent_tokenize(text)
stop_words =
set(stopwords.words("e
nglish"))
filtered_words = [word for word in words if
word.lower() not in
stop_words]
word_freq =
Counter(filtered_words)
plt.figure(figsize=(10,
5))
plt.bar(word_freq.keys(
), word_freq.values())
plt.xticks(rotation=45)
plt.xlabel("Words")
plt.ylabel("Frequency")
plt.title("Word
Frequency
Distribution")
plt.show()
wordcloud =
WordCloud(width=800,
height=400,
background_color='whit
e').generate(text)
plt.figure(figsize=(10,
5))
plt.imshow(wordcloud,
interpolation='bilinear')
plt.axis('off')
plt.title("Wordcloud of
the Text")
plt.show()