Skip to content

Commit 643e5aa

Browse files
committed
KIT-4469 fixed added more unit tests
1 parent 9db279c commit 643e5aa

2 files changed

Lines changed: 47 additions & 5 deletions

File tree

ai_data_preprocessing_queue/Steps/remove_signature.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import re
2+
from typing import Any
23

34

45
def remove_newline(text: str) -> str:
@@ -40,7 +41,7 @@ def remove_greetings_and_following_text(text: str) -> str:
4041
THANK_SUFFIXES = [
4142
r"(?:in advance(?: for (?:your|the) (?:help|support|understanding|assistance))?)",
4243
r"(?:for (?:your|the) (?:help|support|understanding|assistance))",
43-
r"(?:schon mal )?(?:im voraus)?(?: für (?:ihre|ihr|eure|die|den) (?:hilfe|support|verständnis))?",
44+
r"(?:schon mal\s+)?(?:im voraus\s+)?für\s+(?:ihre|ihr|eure|die|den)\s+(?:hilfe|support|verständnis)",
4445
r"vorab",
4546
r"kindly?"
4647
]
@@ -68,11 +69,11 @@ def remove_single_greeting_words(text: str, pattern: str) -> str:
6869
return re.sub(pattern, " ", text, flags=re.IGNORECASE | re.UNICODE)
6970

7071

71-
def step(text: str) -> str:
72-
if not text:
73-
return text
72+
def step(item: Any, item_state: dict[str, Any], global_state: dict[str, Any] | None, preprocessor_data: str) -> Any:
73+
if not item:
74+
return item
7475
try:
75-
text_greetings_removed = remove_greetings_and_following_text(text)
76+
text_greetings_removed = remove_greetings_and_following_text(item)
7677
thankyou_removed = remove_thanking_expressions(text_greetings_removed)
7778
single_greetings_removed = remove_single_greeting_words(thankyou_removed, single_greetings_pattern)
7879

tests/test_remove_signature.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from parameterized import parameterized
44

5+
from ai_data_preprocessing_queue.Pipeline import Pipeline
56
from ai_data_preprocessing_queue.Steps.remove_signature import (
67
remove_greetings_and_following_text, remove_newline)
78

@@ -87,6 +88,46 @@ def test_remove_newline(self, name: str, input_text: str, expected: str) -> None
8788
def test_remove_greetings_and_following_text(self, name: str, input_text: str, expected: str) -> None:
8889
self.assertEqual(remove_greetings_and_following_text(input_text), expected)
8990

91+
@parameterized.expand([ # type: ignore[misc]
92+
(
93+
"remove_signature_basic",
94+
"We're sending the final draft for review. Best regards, Alice Johnson\nProject Lead",
95+
"We're sending the final draft for review.",
96+
),
97+
(
98+
"thanking_at_start",
99+
"Thank you very much for your support. "
100+
"I will prepare the contract and send it tomorrow.\n\nBest regards, Bob Brown",
101+
"I will prepare the contract and send it tomorrow.",
102+
),
103+
(
104+
"thanking_in_middle",
105+
"Thank you very much for your support. "
106+
"I appreciate your support on this migration. Thanks a lot, I will share the logs shortly.",
107+
"I appreciate your support on this migration. I will share the logs shortly.",
108+
),
109+
(
110+
"single_greeting_word_german",
111+
"The deliverables are ready. Grüße",
112+
"The deliverables are ready.",
113+
),
114+
(
115+
"german_empty_result",
116+
"Vielen Dank für Ihre Hilfe. Mit freundlichen Grüßen, Lena Meyer "
117+
"Und hier kommt noch mehr Text.",
118+
"",
119+
),
120+
(
121+
"no_change",
122+
"Please schedule the kickoff meeting for next Tuesday morning at 10:00.",
123+
"Please schedule the kickoff meeting for next Tuesday morning at 10:00.",
124+
),
125+
])
126+
def test_remove_signature_parameterized(self, name: str, input_text: str, expected: str) -> None:
127+
pipeline = Pipeline({"remove_signature": None})
128+
value = pipeline.consume(input_text)
129+
self.assertEqual(expected, value)
130+
90131

91132
if __name__ == "__main__":
92133
unittest.main()

0 commit comments

Comments
 (0)