-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathquality_tester.py
More file actions
43 lines (37 loc) · 1.32 KB
/
quality_tester.py
File metadata and controls
43 lines (37 loc) · 1.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from readability import Readability
import os
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
import numpy as np
from nltk.corpus import stopwords
import string
def flesch_score(text):
r = Readability(text)
score = r.flesch()
return score
def sentence_length(text):
'''
Returns mean_length, median_length of sentences (in number of words).
'''
# Tokenize sentences
sentences = sent_tokenize(text)
# Function to count words (excluding punctuation)
def count_words(sentence):
words = word_tokenize(sentence)
words = [word for word in words if word.isalnum()] # Excludes punctuation
return len(words)
# Compute sentence lengths
sentence_lengths = [count_words(sentence) for sentence in sentences]
# Compute mean and median
mean_length = np.mean(sentence_lengths)
median_length = np.median(sentence_lengths)
return mean_length, median_length
if __name__=='__main__':
file_path = os.path.join(os.getcwd(), "downloaded_files", "CONTRIBUTING_transformers.md")
# filename = '/downloaded_files/flutter_CONTRIBUTING.md'
with open(file_path, 'r') as text_file:
content = text_file.read()
print("Flesch Readability Score = ")
print(flesch_score(content))
print("Sentences length = ")
print(sentence_length(content))