-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_remove_signature.py
More file actions
153 lines (144 loc) · 6.12 KB
/
test_remove_signature.py
File metadata and controls
153 lines (144 loc) · 6.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import unittest
from parameterized import parameterized
from unittest.mock import MagicMock, patch
from ai_data_preprocessing_queue.Pipeline import Pipeline
from ai_data_preprocessing_queue.Steps.remove_signature import (
step, remove_greetings_and_following_text, remove_newline)
class TestRemoveSignature(unittest.TestCase):
@parameterized.expand([ # type: ignore[misc]
(
"multiple_newlines",
"Could you please review the attached document?\n\n\nI need your feedback by Friday.",
"Could you please review the attached document? I need your feedback by Friday.",
),
(
"multiple_spaces",
"The meeting is scheduled for 3PM tomorrow.",
"The meeting is scheduled for 3PM tomorrow.",
),
(
"mixed_whitespace",
"Please find the report attached. \n\n The numbers look good \r\n\r\n for Q3!",
"Please find the report attached. The numbers look good for Q3!",
),
(
"empty_string",
"",
""
),
(
"trailing_whitespace",
"I'll send the updated version tomorrow. \n\n ",
"I'll send the updated version tomorrow."
)
])
def test_remove_newline(self, name: str, input_text: str, expected: str) -> None:
self.assertEqual(remove_newline(input_text), expected)
@parameterized.expand([ # type: ignore[misc]
(
"english_signature_basic",
"Here's the project update. Sincerely, John Smith\nProject Manager",
"Here's the project update."
),
(
"english_signature_with_content",
"Please review the attached documents. Best regards, Jane Doe\nSenior Developer\nTech Department",
"Please review the attached documents."
),
(
"english_signature_with_content_and_several_newlines",
"Please review the attached documents. Best regards,\nJane Doe\n\nSenior Developer\n\nTech Department",
"Please review the attached documents."
),
(
"german_signature",
"Die Unterlagen wurden aktualisiert. Mit freundlichen Grüßen, Hans Schmidt\nPhone: +49 123 456789",
"Die Unterlagen wurden aktualisiert."
),
(
"greeting_with_comma",
"Meeting is scheduled for tomorrow. Kind regards, Sarah",
"Meeting is scheduled for tomorrow."
),
(
"mixed_case_greeting",
"Report is ready. BEST REGARDS, Tom Wilson",
"Report is ready."
),
(
"multiple_greetings",
"Hello team, here's the update. Best regards, Jim\nRegards, HR Team",
"Hello team, here's the update."
),
(
"empty_string",
"",
""
),
(
"no_greetings",
"This is a plain text without any greetings or signatures.",
"This is a plain text without any greetings or signatures."
),
])
def test_remove_greetings_and_following_text(self, name: str, input_text: str, expected: str) -> None:
self.assertEqual(remove_greetings_and_following_text(input_text), expected)
@parameterized.expand([ # type: ignore[misc]
(
"remove_signature_basic",
"We're sending the final draft for review. Best regards, Alice Johnson\nProject Lead",
"We're sending the final draft for review.",
),
(
"remove_signature_extended",
"Order Mice/keyboard\nGoodmorning, Can you please order the following: 10 x Dell Laser Mouse IL3220 "
"10 x Dell Business Keyboard AB322 (UK layout) Thx Best regards Jimmy B. "
"| C Facilities & Reception Klaus+Andreas Nederland | Anonymstraat 47 | 1234 AJ Amsterdam | Netherlands "
"Phone: +01 23 695 4567 | Mobile: +97 65 445 1234 | Fax: +31 35 695 8825 jim.anonymus@company.com "
"| www.nl.somecompany.com",
"Order Mice/keyboard Goodmorning, Can you please order the following: 10 x Dell Laser Mouse IL3220 "
"10 x Dell Business Keyboard AB322 (UK layout) Thx",
),
(
"thanking_at_start",
"Thank you very much for your support. "
"I will prepare the contract and send it tomorrow.\n\nBest regards, Bob Brown",
"I will prepare the contract and send it tomorrow.",
),
(
"thanking_in_middle",
"Thank you very much for your support. "
"I appreciate your support on this migration. Thanks a lot, I will share the logs shortly.",
"I appreciate your support on this migration. I will share the logs shortly.",
),
(
"single_greeting_word_german",
"The deliverables are ready. Grüße",
"The deliverables are ready.",
),
(
"german_empty_result",
"Vielen Dank für Ihre Hilfe. Mit freundlichen Grüßen, Lena Meyer "
"Und hier kommt noch mehr Text.",
"",
),
(
"no_change",
"Please schedule the kickoff meeting for next Tuesday morning at 10:00.",
"Please schedule the kickoff meeting for next Tuesday morning at 10:00.",
),
])
def test_remove_signature(self, name: str, input_text: str, expected: str) -> None:
pipeline = Pipeline({"remove_signature": None})
value = pipeline.consume(input_text)
self.assertEqual(expected, value)
def test_remove_signature_step_empty_item(self) -> None:
result = step("", {}, None, "")
self.assertEqual(result, "")
@patch("ai_data_preprocessing_queue.Steps.remove_signature.remove_greetings_and_following_text",
side_effect=Exception("Test error"))
def test_remove_signature_step_error(self, _: MagicMock) -> None:
with self.assertRaises(Exception):
step("Please schedule the kickoff meeting for next Tuesday morning at 10:00.", {}, None, "")
if __name__ == "__main__":
unittest.main()