New paste Repaste Download
#!/usr/bin/env python
# coding: utf-8
# In[12]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from nltk.probability import FreqDist
from nltk.tokenize import sent_tokenize
import string
#nltk.download('punkt')
#nltk.download('stopwords')
#nltk.download('wordnet')
text = "Natural Language Processing (NLP) is a subfield of artificial intelligence and linguistics. It involves the interaction between computers and humans in natural language. NLP focuses on the interaction between computers and humans, as well as the processing and analysis of large amounts of natural language data."
print("The input text is: \n" + text)
tokens = word_tokenize(text)
print("\nThe tokens generated after tokenisation are: \n")
print(tokens)
table = str.maketrans('', '', string.punctuation)
stripped_tokens = [word for word in tokens if word.translate(table).strip()]
print("\nThe tokens generated after removing punctuation are: \n")
print(stripped_tokens)
stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in stripped_tokens if word.lower() not in stop_words]
print("\nThe tokens after removing stopwords are: \n")
print(filtered_tokens)
stemmer = PorterStemmer()
stemmed_tokens = [stemmer.stem(word) for word in filtered_tokens]
print("\nThe tokens after stemming are: \n")
print(stemmed_tokens)
lemmatizer = WordNetLemmatizer()
lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]
print("\nThe tokens after lemmatization are: \n")
print(lemmatized_tokens)
freq_dist = FreqDist(lemmatized_tokens)
print("\nFrequency Distribution: \n")
print(freq_dist.most_common(5))
sentences = sent_tokenize(text)
print("\nSentences: \n")
for sentence in sentences:
    print(sentence)
# In[ ]:
Filename: None. Size: 2kb. View raw, , hex, or download this file.

This paste expires on 2024-05-16 05:40:37.119093. Pasted through web.