|
29 | 29 | from mailparser.utils import ( |
30 | 30 | convert_mail_date, |
31 | 31 | fingerprints, |
| 32 | + get_addresses, |
32 | 33 | get_header, |
33 | 34 | get_mail_keys, |
34 | 35 | get_to_domains, |
|
62 | 63 | mail_test_16 = os.path.join(base_path, "mails", "mail_test_16") |
63 | 64 | mail_test_17 = os.path.join(base_path, "mails", "mail_test_17") |
64 | 65 | mail_test_18 = os.path.join(base_path, "mails", "mail_test_18") |
| 66 | +mail_test_19 = os.path.join(base_path, "mails", "mail_test_19") |
65 | 67 | mail_malformed_1 = os.path.join(base_path, "mails", "mail_malformed_1") |
66 | 68 | mail_malformed_2 = os.path.join(base_path, "mails", "mail_malformed_2") |
67 | 69 | mail_malformed_3 = os.path.join(base_path, "mails", "mail_malformed_3") |
@@ -1084,3 +1086,162 @@ def test_unicode_decode_error_in_payload(self): |
1084 | 1086 | mail = mailparser.parse_from_string(raw_mail) |
1085 | 1087 | # Should have parsed successfully and body contains the text |
1086 | 1088 | self.assertIn("hello", mail.body) |
| 1089 | + |
| 1090 | + |
| 1091 | +class TestEmailAsDisplayName(unittest.TestCase): |
| 1092 | + """ |
| 1093 | + Tests for address parsing when the display name is itself an email address. |
| 1094 | +
|
| 1095 | + RFC 5322 §3.4 forbids unquoted "@" in the display-name phrase, so a header |
| 1096 | + like ``From: alice@example.com <bob@example.com>`` is technically |
| 1097 | + non-conforming. Python's strict parser (CVE-2023-27043 hardening) returns |
| 1098 | + ``[('', '')]`` for such input, which would silently hide the real sender. |
| 1099 | +
|
| 1100 | + mail-parser is a security/forensics tool: it intentionally bypasses this |
| 1101 | + strict compliance and applies a regex fallback so that analysts always see |
| 1102 | + the address values that are actually present in the header. |
| 1103 | + """ |
| 1104 | + |
| 1105 | + def test_from_email_as_display_name(self): |
| 1106 | + """From header with an email address as display name is parsed correctly.""" |
| 1107 | + mail = mailparser.parse_from_file(mail_test_19) |
| 1108 | + result = mail.from_ |
| 1109 | + self.assertIsInstance(result, list) |
| 1110 | + self.assertEqual(len(result), 1) |
| 1111 | + name, addr = result[0] |
| 1112 | + self.assertEqual(addr, "bob@example.com") |
| 1113 | + self.assertEqual(name, "alice@example.com") |
| 1114 | + |
| 1115 | + def test_cc_email_as_display_name(self): |
| 1116 | + """CC header with an email address as display name is parsed correctly.""" |
| 1117 | + mail = mailparser.parse_from_file(mail_test_19) |
| 1118 | + result = mail.cc |
| 1119 | + self.assertIsInstance(result, list) |
| 1120 | + self.assertEqual(len(result), 1) |
| 1121 | + name, addr = result[0] |
| 1122 | + self.assertEqual(addr, "frank@example.com") |
| 1123 | + self.assertEqual(name, "eve@example.com") |
| 1124 | + |
| 1125 | + def test_reply_to_email_as_display_name(self): |
| 1126 | + """Reply-To header with an email address as display name is parsed correctly.""" |
| 1127 | + mail = mailparser.parse_from_file(mail_test_19) |
| 1128 | + result = mail.reply_to |
| 1129 | + self.assertIsInstance(result, list) |
| 1130 | + self.assertEqual(len(result), 1) |
| 1131 | + name, addr = result[0] |
| 1132 | + self.assertEqual(addr, "ivan@example.com") |
| 1133 | + self.assertEqual(name, "henry@example.com") |
| 1134 | + |
| 1135 | + def test_to_mixed_addresses(self): |
| 1136 | + """To header with a mix of quoted name and bare address is parsed correctly.""" |
| 1137 | + mail = mailparser.parse_from_file(mail_test_19) |
| 1138 | + result = mail.to |
| 1139 | + self.assertIsInstance(result, list) |
| 1140 | + self.assertEqual(len(result), 2) |
| 1141 | + # "Charlie Brown" <charlie@example.com> |
| 1142 | + name0, addr0 = result[0] |
| 1143 | + self.assertEqual(addr0, "charlie@example.com") |
| 1144 | + self.assertEqual(name0, "Charlie Brown") |
| 1145 | + # dave@example.com (bare address, no display name) |
| 1146 | + name1, addr1 = result[1] |
| 1147 | + self.assertEqual(addr1, "dave@example.com") |
| 1148 | + self.assertEqual(name1, "") |
| 1149 | + |
| 1150 | + # ------------------------------------------------------------------ |
| 1151 | + # Edge-case tests via parse_from_string (no additional mail files needed) |
| 1152 | + # ------------------------------------------------------------------ |
| 1153 | + |
| 1154 | + def test_same_email_as_name_and_address_suppresses_name(self): |
| 1155 | + """When display name == address, name is suppressed to empty string. |
| 1156 | +
|
| 1157 | + This covers the case ``From: bob@example.com <bob@example.com>`` which |
| 1158 | + is both RFC non-compliant (unquoted @) AND redundant. After the regex |
| 1159 | + fallback recovers the address, the existing name-suppression logic |
| 1160 | + (decoded_name == email_addr → "") must still fire correctly. |
| 1161 | + """ |
| 1162 | + mail = mailparser.parse_from_string( |
| 1163 | + "From: bob@example.com <bob@example.com>\nSubject: x\n\nBody" |
| 1164 | + ) |
| 1165 | + result = mail.from_ |
| 1166 | + self.assertEqual(len(result), 1) |
| 1167 | + name, addr = result[0] |
| 1168 | + self.assertEqual(addr, "bob@example.com") |
| 1169 | + self.assertEqual(name, "") |
| 1170 | + |
| 1171 | + def test_quoted_email_as_display_name(self): |
| 1172 | + """Properly quoted email-as-name (RFC-compliant) is parsed by strict parser.""" |
| 1173 | + mail = mailparser.parse_from_string( |
| 1174 | + 'From: "alice@example.com" <bob@example.com>\nSubject: x\n\nBody' |
| 1175 | + ) |
| 1176 | + result = mail.from_ |
| 1177 | + self.assertEqual(len(result), 1) |
| 1178 | + name, addr = result[0] |
| 1179 | + self.assertEqual(addr, "bob@example.com") |
| 1180 | + self.assertEqual(name, "alice@example.com") |
| 1181 | + |
| 1182 | + def test_standard_display_name_unchanged(self): |
| 1183 | + """Standard ``Name <email>`` format still works correctly (no regression).""" |
| 1184 | + mail = mailparser.parse_from_string( |
| 1185 | + "From: Alice Smith <alice@example.com>\nSubject: x\n\nBody" |
| 1186 | + ) |
| 1187 | + result = mail.from_ |
| 1188 | + self.assertEqual(len(result), 1) |
| 1189 | + name, addr = result[0] |
| 1190 | + self.assertEqual(addr, "alice@example.com") |
| 1191 | + self.assertEqual(name, "Alice Smith") |
| 1192 | + |
| 1193 | + def test_bare_address_no_display_name(self): |
| 1194 | + """Bare address with no display name returns empty name (no regression).""" |
| 1195 | + mail = mailparser.parse_from_string( |
| 1196 | + "From: alice@example.com\nSubject: x\n\nBody" |
| 1197 | + ) |
| 1198 | + result = mail.from_ |
| 1199 | + self.assertEqual(len(result), 1) |
| 1200 | + name, addr = result[0] |
| 1201 | + self.assertEqual(addr, "alice@example.com") |
| 1202 | + self.assertEqual(name, "") |
| 1203 | + |
| 1204 | + def test_empty_header_returns_empty_list(self): |
| 1205 | + """A missing address header returns [] — absent headers must not appear.""" |
| 1206 | + mail = mailparser.parse_from_string("Subject: x\n\nBody") |
| 1207 | + # Python's getaddresses("") yields [('', '')], but we filter out entries |
| 1208 | + # with an empty address so that absent headers are not included in the |
| 1209 | + # parsed mail object. |
| 1210 | + self.assertEqual(mail.from_, []) |
| 1211 | + |
| 1212 | + # ------------------------------------------------------------------ |
| 1213 | + # Unit tests for get_addresses() helper directly |
| 1214 | + # ------------------------------------------------------------------ |
| 1215 | + |
| 1216 | + def test_get_addresses_email_as_name(self): |
| 1217 | + """get_addresses() fallback recovers address when display name is an email.""" |
| 1218 | + result = get_addresses("alice@example.com <bob@example.com>") |
| 1219 | + self.assertEqual(result, [("alice@example.com", "bob@example.com")]) |
| 1220 | + |
| 1221 | + def test_get_addresses_standard_format(self): |
| 1222 | + """get_addresses() strict path handles normal ``Name <email>`` correctly.""" |
| 1223 | + result = get_addresses("Alice Smith <alice@example.com>") |
| 1224 | + self.assertEqual(result, [("Alice Smith", "alice@example.com")]) |
| 1225 | + |
| 1226 | + def test_get_addresses_bare_email(self): |
| 1227 | + """get_addresses() handles bare email address with no display name.""" |
| 1228 | + result = get_addresses("alice@example.com") |
| 1229 | + self.assertEqual(result, [("", "alice@example.com")]) |
| 1230 | + |
| 1231 | + def test_get_addresses_empty_header(self): |
| 1232 | + """get_addresses() on empty string returns [('', '')] — raw Python lib result. |
| 1233 | +
|
| 1234 | + The ('', '') entry is filtered out in __getattr__ (core.py) so that |
| 1235 | + absent headers do not appear in the parsed mail output. |
| 1236 | + """ |
| 1237 | + result = get_addresses("") |
| 1238 | + self.assertEqual(result, [("", "")]) |
| 1239 | + |
| 1240 | + def test_get_addresses_multiple_with_email_as_name(self): |
| 1241 | + """get_addresses() fallback handles multiple addresses when all fail strict.""" |
| 1242 | + result = get_addresses( |
| 1243 | + "alice@example.com <bob@example.com>, eve@example.com <frank@example.com>" |
| 1244 | + ) |
| 1245 | + self.assertEqual(len(result), 2) |
| 1246 | + self.assertEqual(result[0], ("alice@example.com", "bob@example.com")) |
| 1247 | + self.assertEqual(result[1], ("eve@example.com", "frank@example.com")) |
0 commit comments