import nltk from nltk.chat.util import Chat, reflections from collections import Counter from nltk.corpus import stopwords nltk.download('stopwords') nltk.download('punkt') def filter_words_by_frequency(text, k): # Remove extra spaces and tokenize the input text into words words = nltk.word_tokenize(text.strip()) # Convert words to lowercase and remove punctuation words = [word.lower() for word in words if word.isalnum()] # Remove common stop words stop_words = set(stopwords.words('english')) words = [word for word in words if word not in stop_words] # Count word frequencies word_counts = Counter(words) # Filter words occurring k times or more filtered_words = [word for word, count in word_counts.items() if count >= k] return filtered_words # Example usage: input_text = "a mouse is smaller than a dog but a dog is stronger " k = 2 result = filter_words_by_frequency(input_text, k) print(result)