import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.probability import FreqDist
from heapq import nlargest
# Load the contents of the file into a text variable
with open('path/to/your/file.txt', 'r') as file:
texte = file.read()
sent_tokens = sent_tokenize(texte)
word_tokens = word_tokenize(texte)
stop_words = set(stopwords.words('french'))
punctuations = ['.', ',', '!', '?', ';', ':']
filtered_words = [word for word in word_tokens if word.lower() not in stop_words and word not in punctuations]
freq_dist = FreqDist(filtered_words)
ranking_sentences = []
for i, sentence in enumerate(sent_tokens):
sentence_score = 0
for word in word_tokenize(sentence):
if word.lower() in freq_dist:
sentence_score += freq_dist[word.lower()]
ranking_sentences.append((sentence_score, i))
n = int(len(sent_tokens) / 3)
summary_sentences = nlargest(n, ranking_sentences)
summary_sentences.sort(key=lambda x: x[1])
summary = ""
for i in range(n):
summary += sent_tokens[summary_sentences[i][1]] + " "
print(summary)