handle html only email

This commit is contained in:
j3d1 2024-01-15 23:38:03 +01:00
parent 892493a300
commit 5e1890e990
4 changed files with 75 additions and 2 deletions

View file

@ -145,7 +145,17 @@ def parse_email_body(raw, log=None):
else:
log.info("Attachment", ctype, cdispo)
else:
body = parsed.get_payload(decode=True).decode('utf-8')
if parsed.get_content_type() == 'text/plain':
body = parsed.get_payload(decode=True).decode('utf-8')
elif parsed.get_content_type() == 'text/html':
from bs4 import BeautifulSoup
import re
body = parsed.get_payload(decode=True).decode('utf-8')
soup = BeautifulSoup(body, 'html.parser')
body = re.sub(r'([\r\n]+.?)*[\r\n]', r'\n', soup.get_text()).strip('\n')
else:
log.warning("Unknown content type", parsed.get_content_type())
body = "Unknown content type"
body = unescape_and_decode_quoted_printable(body)
body = unescape_and_decode_base64(body)
log.debug(body)
@ -250,5 +260,5 @@ class LMTPHandler:
return '250 Message accepted for delivery'
except Exception as e:
log.error(e)
log.error(type(e), e)
return '451 Internal server error'