From 734af10525040170a9a00810b844d005ba42d45c Mon Sep 17 00:00:00 2001 From: jedi Date: Tue, 9 Jan 2024 22:42:47 +0100 Subject: [PATCH] parse and save email attachments --- core/core/settings.py | 21 +-- core/core/test_runner.py | 12 ++ core/files/models.py | 25 ++- core/files/tests/v1/test_files.py | 19 +++ core/mail/migrations/0003_emailattachment.py | 59 ++++++++ core/mail/models.py | 6 + core/mail/protocol.py | 151 ++++++++++++------- core/mail/tests/v2/test_mails.py | 140 ++++++++++++++++- 8 files changed, 357 insertions(+), 76 deletions(-) create mode 100644 core/mail/migrations/0003_emailattachment.py diff --git a/core/core/settings.py b/core/core/settings.py index 295c5af..a6f7ce6 100644 --- a/core/core/settings.py +++ b/core/core/settings.py @@ -197,21 +197,14 @@ DATA_UPLOAD_MAX_MEMORY_SIZE = 1024 * 1024 * 128 # 128 MB DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField' -if 'test' in sys.argv: - CHANNEL_LAYERS = { - 'default': { - 'BACKEND': 'channels.layers.InMemoryChannelLayer' - } +CHANNEL_LAYERS = { + 'default': { + 'BACKEND': 'channels_redis.core.RedisChannelLayer', + 'CONFIG': { + 'hosts': [('localhost', 6379)], + }, } -else: - CHANNEL_LAYERS = { - 'default': { - 'BACKEND': 'channels_redis.core.RedisChannelLayer', - 'CONFIG': { - 'hosts': [('localhost', 6379)], - }, - } - } +} TEST_RUNNER = 'core.test_runner.FastTestRunner' diff --git a/core/core/test_runner.py b/core/core/test_runner.py index fb131a9..bd7f9eb 100644 --- a/core/core/test_runner.py +++ b/core/core/test_runner.py @@ -19,3 +19,15 @@ class FastTestRunner(DiscoverRunner): settings.PASSWORD_HASHERS = ( 'django.contrib.auth.hashers.MD5PasswordHasher', ) + + settings.CHANNEL_LAYERS = { + 'default': { + 'BACKEND': 'channels.layers.InMemoryChannelLayer' + } + } + settings.DATABASES = { + 'default': { + 'ENGINE': 'django.db.backends.sqlite3', + 'NAME': ':memory:', + } + } diff --git a/core/files/models.py b/core/files/models.py index d417790..c16c417 100644 --- a/core/files/models.py +++ b/core/files/models.py @@ -27,12 +27,16 @@ class FileManager(models.Manager): kwargs['file'] = ContentFile(content, content_hash) kwargs['hash'] = content_hash kwargs['mime_type'] = mime_type + elif 'file' in kwargs and 'hash' in kwargs and type(kwargs['file']) == ContentFile and 'mime_type' in kwargs: + pass else: raise ValueError('data must be a base64 encoded string or file and hash must be provided') try: return self.get(hash=kwargs['hash']), False except self.model.DoesNotExist: - return self.create(**kwargs), True + obj = super().create(**kwargs) + obj.file.save(content=kwargs['file'], name=kwargs['hash']) + return obj, True def create(self, **kwargs): if 'data' in kwargs and type(kwargs['data']) == str: @@ -51,23 +55,32 @@ class FileManager(models.Manager): kwargs['file'] = ContentFile(content, content_hash) kwargs['hash'] = content_hash kwargs['mime_type'] = mime_type - elif 'file' in kwargs and 'hash' in kwargs and type(kwargs['file']) == ContentFile: + elif 'file' in kwargs and 'hash' in kwargs and type(kwargs['file']) == ContentFile and 'mime_type' in kwargs: pass else: raise ValueError('data must be a base64 encoded string or file and hash must be provided') if not self.filter(hash=kwargs['hash']).exists(): - return super().create(**kwargs) + obj = super().create(**kwargs) + obj.file.save(content=kwargs['file'], name=kwargs['hash']) + return obj else: raise IntegrityError('File with this hash already exists') -class File(models.Model): - item = models.ForeignKey(Item, models.CASCADE, db_column='iid', null=True, blank=True, related_name='files') +class AbstractFile(models.Model): created_at = models.DateTimeField(blank=True, null=True) updated_at = models.DateTimeField(blank=True, null=True) deleted_at = models.DateTimeField(blank=True, null=True) - file = models.ImageField(upload_to=hash_upload) + file = models.FileField(upload_to=hash_upload) mime_type = models.CharField(max_length=255, null=False, blank=False) hash = models.CharField(max_length=64, null=False, blank=False, unique=True) objects = FileManager() + + class Meta: + abstract = True + + +class File(AbstractFile): + item = models.ForeignKey(Item, models.CASCADE, db_column='iid', null=True, blank=True, related_name='files') + pass diff --git a/core/files/tests/v1/test_files.py b/core/files/tests/v1/test_files.py index fd05b95..ce59b2c 100644 --- a/core/files/tests/v1/test_files.py +++ b/core/files/tests/v1/test_files.py @@ -1,4 +1,5 @@ from django.test import TestCase, Client +from django.core.files.base import ContentFile from files.models import File from inventory.models import Event, Container, Item @@ -13,13 +14,27 @@ class FileTestCase(TestCase): self.event = Event.objects.create(slug='EVENT', name='Event') self.box = Container.objects.create(name='BOX') + def test_create_file_raw(self): + from hashlib import sha256 + content = b"foo" + chash = sha256(content).hexdigest() + item = Item.objects.create(container=self.box, event=self.event, description='1') + file = File.objects.create(file=ContentFile(b"foo"), mime_type='text/plain', hash=chash, item=item) + file.save() + self.assertEqual(1, len(File.objects.all())) + self.assertEqual(content, File.objects.all()[0].file.read()) + self.assertEqual(chash, File.objects.all()[0].hash) + def test_list_files(self): import base64 + item = File.objects.create(data="data:text/plain;base64," + base64.b64encode(b"foo").decode('utf-8')) response = client.get('/api/1/files') self.assertEqual(response.status_code, 200) self.assertEqual(response.json()[0]['hash'], item.hash) self.assertEqual(len(response.json()[0]['hash']), 64) + self.assertEqual(len(File.objects.all()), 1) + self.assertEqual(File.objects.all()[0].file.read(), b"foo") def test_one_file(self): import base64 @@ -28,6 +43,8 @@ class FileTestCase(TestCase): self.assertEqual(response.status_code, 200) self.assertEqual(response.json()['hash'], item.hash) self.assertEqual(len(response.json()['hash']), 64) + self.assertEqual(len(File.objects.all()), 1) + self.assertEqual(File.objects.all()[0].file.read(), b"foo") def test_create_file(self): import base64 @@ -38,6 +55,8 @@ class FileTestCase(TestCase): content_type='application/json') self.assertEqual(response.status_code, 201) self.assertEqual(len(response.json()['hash']), 64) + self.assertEqual(len(File.objects.all()), 1) + self.assertEqual(File.objects.all()[0].file.read(), b"foo") def test_delete_file(self): import base64 diff --git a/core/mail/migrations/0003_emailattachment.py b/core/mail/migrations/0003_emailattachment.py new file mode 100644 index 0000000..f3c7281 --- /dev/null +++ b/core/mail/migrations/0003_emailattachment.py @@ -0,0 +1,59 @@ +# Generated by Django 4.2.7 on 2024-01-09 20:56 + +from django.db import migrations, models +import django.db.models.deletion +import files.models +from mail.models import Email +from mail.protocol import parse_email_body + + +class NullLogger: + def info(self, *args, **kwargs): + pass + + def warning(self, *args, **kwargs): + pass + + def debug(self, *args, **kwargs): + pass + + +class Migration(migrations.Migration): + dependencies = [ + ('mail', '0002_printed_quotable'), + ] + + def generate_email_attachments(apps, schema_editor): + for email in Email.objects.all(): + raw = email.raw + if raw is None: + continue + parsed, body, attachments = parse_email_body(raw.encode('utf-8'), NullLogger()) + email.attachments.clear() + for attachment in attachments: + email.attachments.add(attachment) + email.body = body + email.save() + + operations = [ + migrations.CreateModel( + name='EmailAttachment', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('created_at', models.DateTimeField(blank=True, null=True)), + ('updated_at', models.DateTimeField(blank=True, null=True)), + ('deleted_at', models.DateTimeField(blank=True, null=True)), + ('file', models.ImageField(upload_to=files.models.hash_upload)), + ('mime_type', models.CharField(max_length=255)), + ('hash', models.CharField(max_length=64, unique=True)), + ('name', models.CharField(max_length=255)), + ('email', + models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='attachments', + to='mail.email')), + ], + options={ + 'abstract': False, + }, + ), + migrations.RunPython(generate_email_attachments), + ] diff --git a/core/mail/models.py b/core/mail/models.py index 50e2367..4bd0973 100644 --- a/core/mail/models.py +++ b/core/mail/models.py @@ -4,6 +4,7 @@ from django.db import models from django_softdelete.models import SoftDeleteModel from core.settings import MAIL_DOMAIN +from files.models import AbstractFile from inventory.models import Event from tickets.models import IssueThread @@ -32,3 +33,8 @@ class EventAddress(models.Model): id = models.AutoField(primary_key=True) event = models.ForeignKey(Event, models.SET_NULL, null=True) address = models.CharField(max_length=255) + + +class EmailAttachment(AbstractFile): + email = models.ForeignKey(Email, models.CASCADE, related_name='attachments', null=True) + name = models.CharField(max_length=255) diff --git a/core/mail/protocol.py b/core/mail/protocol.py index a57d2ff..7d5c68f 100644 --- a/core/mail/protocol.py +++ b/core/mail/protocol.py @@ -3,10 +3,11 @@ import logging import aiosmtplib from asgiref.sync import sync_to_async from channels.layers import get_channel_layer +from django.core.files.base import ContentFile -from mail.models import Email, EventAddress +from mail.models import Email, EventAddress, EmailAttachment from notify_sessions.models import SystemEvent -from tickets.models import IssueThread, StateChange +from tickets.models import IssueThread def find_quoted_printable(s, marker): @@ -99,6 +100,96 @@ def find_target_event(address): pass return None + +def parse_email_body(raw, log=None): + import email + from hashlib import sha256 + + attachments = [] + + parsed = email.message_from_bytes(raw) + body = "" + if parsed.is_multipart(): + for part in parsed.walk(): + ctype = part.get_content_type() + cdispo = str(part.get('Content-Disposition')) + + # skip any text/plain (txt) attachments + if ctype == 'text/plain' and 'attachment' not in cdispo: + segment = part.get_payload(decode=True).decode('utf-8') + segment = unescape_and_decode_quoted_printable(segment) + segment = unescape_and_decode_base64(segment) + log.debug(segment) + body = body + segment + elif 'attachment' in cdispo or 'inline' in cdispo: + file = ContentFile(part.get_payload(decode=True)) + chash = sha256(file.read()).hexdigest() + name = part.get_filename() + if name is None: + name = "unnamed" + attachment, _ = EmailAttachment.objects.get_or_create( + name=name, mime_type=ctype, file=file, hash=chash) + attachment.save() + attachments.append(attachment) + if 'inline' in cdispo: + body = body + f'' + log.info("Image", ctype, attachment.id) + else: + log.info("Attachment", ctype, cdispo) + else: + body = parsed.get_payload(decode=True).decode('utf-8') + + return parsed, body, attachments + + +def receive_email(envelope, log=None): + parsed, body, attachments = parse_email_body(envelope.content, log) + + header_from = parsed.get('From') + header_to = parsed.get('To') + header_in_reply_to = parsed.get('In-Reply-To') + header_message_id = parsed.get('Message-ID') + + if header_from != envelope.mail_from: + log.warning("Header from does not match envelope from") + log.info(f"Header from: {header_from}, envelope from: {envelope.mail_from}") + + if header_to != envelope.rcpt_tos[0]: + log.warning("Header to does not match envelope to") + log.info(f"Header to: {header_to}, envelope to: {envelope.rcpt_tos[0]}") + + recipient = envelope.rcpt_tos[0].lower() + sender = envelope.mail_from + subject = parsed.get('Subject') + subject = unescape_and_decode_quoted_printable(subject) + subject = unescape_and_decode_base64(subject) + target_event = find_target_event(recipient) + + active_issue_thread, new = find_active_issue_thread(header_in_reply_to, subject) + body_decoded = body + body_decoded = unescape_and_decode_quoted_printable(body_decoded) + body_decoded = unescape_and_decode_base64(body_decoded) + + email = Email.objects.create( + sender=sender, recipient=recipient, body=body_decoded, subject=subject, reference=header_message_id, + in_reply_to=header_in_reply_to, raw=envelope.content.decode('utf-8'), event=target_event, + issue_thread=active_issue_thread) + for attachment in attachments: + email.attachments.add(attachment) + email.save() + + reply = None + if new: + references = collect_references(active_issue_thread) + + reply_email = Email.objects.create( + sender=recipient, recipient=sender, body="Thank you for your message.", subject="Message received", + in_reply_to=header_message_id, event=target_event, issue_thread=active_issue_thread) + reply = make_reply(reply_email, references) + + return email, new, reply + + class LMTPHandler: async def handle_RCPT(self, server, session, envelope, address, rcpt_options): from core.settings import MAIL_DOMAIN @@ -109,7 +200,6 @@ class LMTPHandler: return '250 OK' async def handle_DATA(self, server, session, envelope): - import email log = logging.getLogger('mail.log') log.setLevel(logging.DEBUG) log.info('Message from %s' % envelope.mail_from) @@ -117,51 +207,7 @@ class LMTPHandler: log.info('Message data:\n') try: - parsed = email.message_from_bytes(envelope.content) - body = "" - if parsed.is_multipart(): - for part in parsed.walk(): - ctype = part.get_content_type() - cdispo = str(part.get('Content-Disposition')) - - # skip any text/plain (txt) attachments - if ctype == 'text/plain' and 'attachment' not in cdispo: - body = part.get_payload(decode=True) - else: - log.info("Attachment", ctype, cdispo) - else: - body = parsed.get_payload(decode=True) - log.info(body) - - header_from = parsed.get('From') - header_to = parsed.get('To') - header_in_reply_to = parsed.get('In-Reply-To') - header_message_id = parsed.get('Message-ID') - - if header_from != envelope.mail_from: - log.warning("Header from does not match envelope from") - log.info(f"Header from: {header_from}, envelope from: {envelope.mail_from}") - - if header_to != envelope.rcpt_tos[0]: - log.warning("Header to does not match envelope to") - log.info(f"Header to: {header_to}, envelope to: {envelope.rcpt_tos[0]}") - - recipient = envelope.rcpt_tos[0].lower() - sender = envelope.mail_from - subject = parsed.get('Subject') - subject = unescape_and_decode_quoted_printable(subject) - subject = unescape_and_decode_base64(subject) - target_event = await sync_to_async(find_target_event)(recipient) - - active_issue_thread, new = await sync_to_async(find_active_issue_thread)(header_in_reply_to, subject) - body_decoded = body.decode('utf-8') - body_decoded = unescape_and_decode_quoted_printable(body_decoded) - body_decoded = unescape_and_decode_base64(body_decoded) - - email = await sync_to_async(Email.objects.create)( - sender=sender, recipient=recipient, body=body_decoded, subject=subject, reference=header_message_id, - in_reply_to=header_in_reply_to, raw=envelope.content.decode('utf-8'), event=target_event, - issue_thread=active_issue_thread) + email, new, reply = await sync_to_async(receive_email)(envelope, log) log.info(f"Created email {email.id}") systemevent = await sync_to_async(SystemEvent.objects.create)(type='email received', reference=email.id) log.info(f"Created system event {systemevent.id}") @@ -172,15 +218,10 @@ class LMTPHandler: ) log.info(f"Sent message to frontend") if new: - references = await sync_to_async(collect_references)(active_issue_thread) - - reply_email = await sync_to_async(Email.objects.create)( - sender=recipient, recipient=sender, body="Thank you for your message.", subject="Message received", - in_reply_to=header_message_id, event=target_event, issue_thread=active_issue_thread) - await send_smtp(make_reply(reply_email, references), log) + await send_smtp(reply, log) log.info("Sent auto reply") return '250 Message accepted for delivery' except Exception as e: log.error(e) - return '550 Message rejected' + return '451 Internal server error' diff --git a/core/mail/tests/v2/test_mails.py b/core/mail/tests/v2/test_mails.py index 939365b..84aa7ec 100644 --- a/core/mail/tests/v2/test_mails.py +++ b/core/mail/tests/v2/test_mails.py @@ -8,7 +8,7 @@ from knox.models import AuthToken from authentication.models import ExtendedUser from core.settings import MAIL_DOMAIN from inventory.models import Event -from mail.models import Email, EventAddress +from mail.models import Email, EventAddress, EmailAttachment from mail.protocol import LMTPHandler from tickets.models import IssueThread, StateChange @@ -301,3 +301,141 @@ class LMTPHandlerTestCase(TestCase): # TODO replace with less hacky test states = StateChange.objects.filter(issue_thread=IssueThread.objects.all()[0]) self.assertEqual(1, len(states)) self.assertEqual('pending_new', states[0].state) + + def test_split_text_inline_image(self): + from aiosmtpd.smtp import Envelope + from asgiref.sync import async_to_sync + import aiosmtplib + aiosmtplib.send = make_mocked_coro() + handler = LMTPHandler() + server = mock.Mock() + session = mock.Mock() + envelope = Envelope() + envelope.mail_from = 'test1@test' + envelope.rcpt_tos = ['test2@test'] + envelope.content = b'''Subject: test +From: test1@test +To: test2@test +Message-ID: <1@test> +Content-Type: multipart/alternative; boundary="abc" + +--abc +Content-Type: text/plain; charset=utf-8 + +test1 + +--abc +Content-Type: image/jpeg; name="test.jpg" +Content-Disposition: inline; filename="test.jpg" +Content-Transfer-Encoding: base64 +Content-ID: <1> +X-Attachment-Id: 1 + +dGVzdGltYWdl + +--abc +Content-Type: text/plain; charset=utf-8 + +test2 + +--abc--''' + + result = async_to_sync(handler.handle_DATA)(server, session, envelope) + self.assertEqual(result, '250 Message accepted for delivery') + self.assertEqual(len(Email.objects.all()), 2) + self.assertEqual(len(IssueThread.objects.all()), 1) + aiosmtplib.send.assert_called_once() + self.assertEqual('test', Email.objects.all()[0].subject) + self.assertEqual('test1@test', Email.objects.all()[0].sender) + self.assertEqual('test2@test', Email.objects.all()[0].recipient) + self.assertEqual('test1\ntest2\n', Email.objects.all()[0].body) + self.assertEqual(IssueThread.objects.all()[0], Email.objects.all()[0].issue_thread) + self.assertEqual('<1@test>', Email.objects.all()[0].reference) + self.assertEqual(None, Email.objects.all()[0].in_reply_to) + self.assertEqual('Message received', Email.objects.all()[1].subject) + self.assertEqual('test2@test', Email.objects.all()[1].sender) + self.assertEqual('test1@test', Email.objects.all()[1].recipient) + self.assertEqual('Thank you for your message.', Email.objects.all()[1].body) + self.assertEqual(IssueThread.objects.all()[0], Email.objects.all()[1].issue_thread) + self.assertTrue(Email.objects.all()[1].reference.startswith("<")) + self.assertTrue(Email.objects.all()[1].reference.endswith("@localhost>")) + self.assertEqual("<1@test>", Email.objects.all()[1].in_reply_to) + self.assertEqual('test', IssueThread.objects.all()[0].name) + self.assertEqual('pending_new', IssueThread.objects.all()[0].state) + self.assertEqual(None, IssueThread.objects.all()[0].assigned_to) + states = StateChange.objects.filter(issue_thread=IssueThread.objects.all()[0]) + self.assertEqual(1, len(states)) + self.assertEqual('pending_new', states[0].state) + self.assertEqual(1, len(EmailAttachment.objects.all())) + self.assertEqual(1, EmailAttachment.objects.all()[0].id) + self.assertEqual('image/jpeg', EmailAttachment.objects.all()[0].mime_type) + self.assertEqual('test.jpg', EmailAttachment.objects.all()[0].name) + file_content = EmailAttachment.objects.all()[0].file.read() + self.assertEqual(b'testimage', file_content) + + def test_text_with_attachment(self): + from aiosmtpd.smtp import Envelope + from asgiref.sync import async_to_sync + import aiosmtplib + aiosmtplib.send = make_mocked_coro() + handler = LMTPHandler() + server = mock.Mock() + session = mock.Mock() + envelope = Envelope() + envelope.mail_from = 'test1@test' + envelope.rcpt_tos = ['test2@test'] + envelope.content = b'''Subject: test +From: test1@test +To: test2@test +Message-ID: <1@test> +Content-Type: multipart/mixed; boundary="abc" + +--abc +Content-Type: text/plain; charset=utf-8 + +test1 + +--abc +Content-Type: image/jpeg; name="test.jpg" +Content-Disposition: attachment; filename="test.jpg" +Content-Transfer-Encoding: base64 +Content-ID: <1> +X-Attachment-Id: 1 + +dGVzdGltYWdl + +--abc--''' + + result = async_to_sync(handler.handle_DATA)(server, session, envelope) + self.assertEqual(result, '250 Message accepted for delivery') + self.assertEqual(len(Email.objects.all()), 2) + self.assertEqual(len(IssueThread.objects.all()), 1) + aiosmtplib.send.assert_called_once() + self.assertEqual('test', Email.objects.all()[0].subject) + self.assertEqual('test1@test', Email.objects.all()[0].sender) + self.assertEqual('test2@test', Email.objects.all()[0].recipient) + self.assertEqual('test1\n', Email.objects.all()[0].body) + self.assertEqual(IssueThread.objects.all()[0], Email.objects.all()[0].issue_thread) + self.assertEqual('<1@test>', Email.objects.all()[0].reference) + self.assertEqual(None, Email.objects.all()[0].in_reply_to) + self.assertEqual('Message received', Email.objects.all()[1].subject) + self.assertEqual('test2@test', Email.objects.all()[1].sender) + self.assertEqual('test1@test', Email.objects.all()[1].recipient) + self.assertEqual('Thank you for your message.', Email.objects.all()[1].body) + self.assertEqual(IssueThread.objects.all()[0], Email.objects.all()[1].issue_thread) + self.assertTrue(Email.objects.all()[1].reference.startswith("<")) + self.assertTrue(Email.objects.all()[1].reference.endswith("@localhost>")) + self.assertEqual("<1@test>", Email.objects.all()[1].in_reply_to) + self.assertEqual('test', IssueThread.objects.all()[0].name) + self.assertEqual('pending_new', IssueThread.objects.all()[0].state) + self.assertEqual(None, IssueThread.objects.all()[0].assigned_to) + states = StateChange.objects.filter(issue_thread=IssueThread.objects.all()[0]) + self.assertEqual(1, len(states)) + self.assertEqual('pending_new', states[0].state) + self.assertEqual(1, len(EmailAttachment.objects.all())) + self.assertEqual(1, EmailAttachment.objects.all()[0].id) + self.assertEqual('image/jpeg', EmailAttachment.objects.all()[0].mime_type) + self.assertEqual('test.jpg', EmailAttachment.objects.all()[0].name) + file_content = EmailAttachment.objects.all()[0].file.read() + self.assertEqual(b'testimage', file_content) + -- 2.39.5