#!/usr/bin/env python """ mraptor_milter mraptor_milter is a milter script for the Sendmail and Postfix e-mail servers. It parses MS Office documents (e.g. Word, Excel) to detect malicious macros. Documents with malicious macros are removed and replaced by harmless text files. Supported formats: - Word 97-2003 (.doc, .dot), Word 2007+ (.docm, .dotm) - Excel 97-2003 (.xls), Excel 2007+ (.xlsm, .xlsb) - PowerPoint 97-2003 (.ppt), PowerPoint 2007+ (.pptm, .ppsm) - Word 2003 XML (.xml) - Word/Excel Single File Web Page / MHTML (.mht) - Publisher (.pub) Author: Philippe Lagadec - http://www.decalage.info License: BSD, see source code or documentation mraptor_milter is part of the python-oletools package: http://www.decalage.info/python/oletools """ # === LICENSE ================================================================== # mraptor_milter is copyright (c) 2016-2017 Philippe Lagadec (http://www.decalage.info) # All rights reserved. # # Redistribution and use in source and binary forms, with or without modification, # are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # --- CHANGELOG -------------------------------------------------------------- # 2016-08-08 v0.01 PL: - first version # 2016-08-12 v0.02 PL: - added logging to file with time rotation # - archive each e-mail to a file before filtering # 2016-08-30 v0.03 PL: - added daemonize to run as a Unix daemon # 2016-09-06 v0.50 PL: - fixed issue #20, is_zipfile on Python 2.6 # 2017-04-26 v0.51 PL: - fixed absolute imports (issue #141) __version__ = '0.51' # --- TODO ------------------------------------------------------------------- # TODO: option to run in the foreground for troubleshooting # TODO: option to write logs to the console # TODO: options to set listening port and interface # TODO: config file for all parameters # TODO: option to run as a non-privileged user # TODO: handle files in archives # --- IMPORTS ---------------------------------------------------------------- import Milter # not part of requirements, therefore: # pylint: disable=import-error import io import time import email import sys import os import logging import logging.handlers import datetime import StringIO # not part of requirements, therefore: # pylint: disable=import-error from socket import AF_INET6 # IMPORTANT: it should be possible to run oletools directly as scripts # in any directory without installing them with pip or setup.py. # In that case, relative imports are NOT usable. # And to enable Python 2+3 compatibility, we need to use absolute imports, # so we add the oletools parent folder to sys.path (absolute+normalized path): _thismodule_dir = os.path.normpath(os.path.abspath(os.path.dirname(__file__))) # print('_thismodule_dir = %r' % _thismodule_dir) _parent_dir = os.path.normpath(os.path.join(_thismodule_dir, '..')) # print('_parent_dir = %r' % _thirdparty_dir) if not _parent_dir in sys.path: sys.path.insert(0, _parent_dir) from oletools import olevba, mraptor from Milter.utils import parse_addr # not part of requirements, therefore: # pylint: disable=import-error from zipfile import is_zipfile # --- CONSTANTS -------------------------------------------------------------- # TODO: read parameters from a config file # at postfix smtpd_milters = inet:127.0.0.1:25252 SOCKET = "inet:25252@127.0.0.1" # bind to unix or tcp socket "inet:port@ip" or "/<path>/<to>/<something>.sock" TIMEOUT = 30 # Milter timeout in seconds # CFG_DIR = "/etc/macromilter/" # LOG_DIR = "/var/log/macromilter/" # TODO: different path on Windows: LOGFILE_DIR = '/var/log/mraptor_milter' # LOGFILE_DIR = '.' LOGFILE_NAME = 'mraptor_milter.log' LOGFILE_PATH = os.path.join(LOGFILE_DIR, LOGFILE_NAME) # Directory where to save a copy of each received e-mail: ARCHIVE_DIR = '/var/log/mraptor_milter' # ARCHIVE_DIR = '.' # file to store PID for daemonize PIDFILE = "/tmp/mraptor_milter.pid" # === LOGGING ================================================================ # Set up a specific logger with our desired output level log = logging.getLogger('MRMilter') # disable logging by default - enable it in main app: log.setLevel(logging.CRITICAL+1) # NOTE: all logging config is done in the main app, not here. # === CLASSES ================================================================ # Inspired from https://github.com/jmehnle/pymilter/blob/master/milter-template.py # TODO: check https://github.com/sdgathman/pymilter which looks more recent class MacroRaptorMilter(Milter.Base): ''' ''' def __init__(self): # A new instance with each new connection. # each connection runs in its own thread and has its own myMilter # instance. Python code must be thread safe. This is trivial if only stuff # in myMilter instances is referenced. self.id = Milter.uniqueID() # Integer incremented with each call. self.message = None self.IP = None self.port = None self.flow = None self.scope = None self.IPname = None # Name from a reverse IP lookup @Milter.noreply def connect(self, IPname, family, hostaddr): ''' New connection (may contain several messages) :param IPname: Name from a reverse IP lookup :param family: IP version 4 (AF_INET) or 6 (AF_INET6) :param hostaddr: tuple (IP, port [, flow, scope]) :return: Milter.CONTINUE ''' # Examples: # (self, 'ip068.subnet71.example.com', AF_INET, ('215.183.71.68', 4720) ) # (self, 'ip6.mxout.example.com', AF_INET6, # ('3ffe:80e8:d8::1', 4720, 1, 0) ) self.IP = hostaddr[0] self.port = hostaddr[1] if family == AF_INET6: self.flow = hostaddr[2] self.scope = hostaddr[3] else: self.flow = None self.scope = None self.IPname = IPname # Name from a reverse IP lookup self.message = None # content log.info("[%d] connect from host %s at %s" % (self.id, IPname, hostaddr)) return Milter.CONTINUE @Milter.noreply def envfrom(self, mailfrom, *rest): ''' Mail From - Called at the beginning of each message within a connection :param mailfrom: :param str: :return: Milter.CONTINUE ''' self.message = io.BytesIO() # NOTE: self.message is only an *internal* copy of message data. You # must use addheader, chgheader, replacebody to change the message # on the MTA. self.canon_from = '@'.join(parse_addr(mailfrom)) self.message.write('From %s %s\n' % (self.canon_from, time.ctime())) log.debug('[%d] Mail From %s %s\n' % (self.id, self.canon_from, time.ctime())) log.debug('[%d] mailfrom=%r, rest=%r' % (self.id, mailfrom, rest)) return Milter.CONTINUE @Milter.noreply def envrcpt(self, to, *rest): ''' RCPT TO :param to: :param str: :return: Milter.CONTINUE ''' log.debug('[%d] RCPT TO %r, rest=%r\n' % (self.id, to, rest)) return Milter.CONTINUE @Milter.noreply def header(self, header_field, header_field_value): ''' Add header :param header_field: :param header_field_value: :return: Milter.CONTINUE ''' self.message.write("%s: %s\n" % (header_field, header_field_value)) return Milter.CONTINUE @Milter.noreply def eoh(self): ''' End of headers :return: Milter.CONTINUE ''' self.message.write("\n") return Milter.CONTINUE @Milter.noreply def body(self, chunk): ''' Message body (chunked) :param chunk: :return: Milter.CONTINUE ''' self.message.write(chunk) return Milter.CONTINUE def close(self): return Milter.CONTINUE def abort(self): ''' Clean up if the connection is closed by client :return: Milter.CONTINUE ''' return Milter.CONTINUE def archive_message(self): ''' Save a copy of the current message in its original form to a file :return: nothing ''' date_time = datetime.datetime.utcnow().isoformat('_') # assumption: by combining datetime + milter id, the filename should be unique: # (the only case for duplicates is when restarting the milter twice in less than a second) fname = 'mail_%s_%d.eml' % (date_time, self.id) fname = os.path.join(ARCHIVE_DIR, fname) log.debug('Saving a copy of the original message to file %r' % fname) open(fname, 'wb').write(self.message.getvalue()) def eom(self): ''' This method is called when the end of the email message has been reached. This event also triggers the milter specific actions :return: Milter.ACCEPT or Milter.DISCARD if processing error ''' try: # set data pointer back to 0 self.message.seek(0) self.archive_message() result = self.check_mraptor() if result is not None: return result else: return Milter.ACCEPT # if error make a fall-back to accept except Exception: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] log.exception("[%d] Unexpected error - fall back to ACCEPT: %s %s %s" % (self.id, exc_type, fname, exc_tb.tb_lineno)) return Milter.ACCEPT def check_mraptor(self): ''' Check the attachments of a message using mraptor. If an attachment is identified as suspicious, it is replaced by a simple text file. :return: Milter.ACCEPT or Milter.DISCARD if processing error ''' msg = email.message_from_string(self.message.getvalue()) result = Milter.ACCEPT try: for part in msg.walk(): # for name, value in part.items(): # log.debug(' - %s: %r' % (name, value)) content_type = part.get_content_type() log.debug('[%d] Content-type: %r' % (self.id, content_type)) # TODO: handle any content-type, but check the file magic? if not content_type.startswith('multipart'): filename = part.get_filename(None) log.debug('[%d] Analyzing attachment %r' % (self.id, filename)) attachment = part.get_payload(decode=True) attachment_lowercase = attachment.lower() # check if this is a supported file type (if not, just skip it) # TODO: this function should be provided by olevba if attachment.startswith(olevba.olefile.MAGIC) \ or is_zipfile(StringIO.StringIO(attachment)) \ or 'http://schemas.microsoft.com/office/word/2003/wordml' in attachment \ or ('mime' in attachment_lowercase and 'version' in attachment_lowercase and 'multipart' in attachment_lowercase): vba_parser = olevba.VBA_Parser(filename='message', data=attachment) vba_code_all_modules = '' for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_all_macros(): vba_code_all_modules += vba_code + '\n' m = mraptor.MacroRaptor(vba_code_all_modules) m.scan() if m.suspicious: log.warning('[%d] The attachment %r contains a suspicious macro: replace it with a text file' % (self.id, filename)) part.set_payload('This attachment has been removed because it contains a suspicious macro.') part.set_type('text/plain') # TODO: handle case when CTE is absent part.replace_header('Content-Transfer-Encoding', '7bit') # for name, value in part.items(): # log.debug(' - %s: %r' % (name, value)) # TODO: archive filtered e-mail to a file else: log.debug('The attachment %r is clean.' % filename) except Exception: log.exception('[%d] Error while processing the message' % self.id) # TODO: depending on error, decide to forward the e-mail as-is or not result = Milter.DISCARD # TODO: only do this if the body has actually changed body = str(msg) self.message = io.BytesIO(body) self.replacebody(body) log.info('[%d] Message relayed' % self.id) return result # === MAIN =================================================================== def main(): # banner print('mraptor_milter v%s - http://decalage.info/python/oletools' % __version__) print('logging to file %s' % LOGFILE_PATH) print('Press Ctrl+C to stop.') # make sure the log directory exists: try: os.makedirs(LOGFILE_DIR) except: pass # Add the log message handler to the logger # log to files rotating once a day: handler = logging.handlers.TimedRotatingFileHandler(LOGFILE_PATH, when='D', encoding='utf8') # create formatter and add it to the handlers formatter = logging.Formatter('%(asctime)s - %(levelname)8s: %(message)s') handler.setFormatter(formatter) log.addHandler(handler) # enable logging: log.setLevel(logging.DEBUG) log.info('Starting mraptor_milter v%s - listening on %s' % (__version__, SOCKET)) log.debug('Python version: %s' % sys.version) # Register to have the Milter factory create instances of the class: Milter.factory = MacroRaptorMilter flags = Milter.CHGBODY + Milter.CHGHDRS + Milter.ADDHDRS flags += Milter.ADDRCPT flags += Milter.DELRCPT Milter.set_flags(flags) # tell Sendmail which features we use # set the "last" fall back to ACCEPT if exception occur Milter.set_exception_policy(Milter.ACCEPT) # start the milter Milter.runmilter("mraptor_milter", SOCKET, TIMEOUT) log.info('Stopping mraptor_milter.') if __name__ == "__main__": # Using daemonize: # See http://daemonize.readthedocs.io/en/latest/ from daemonize import Daemonize # not part of requirements, therefore: # pylint: disable=import-error daemon = Daemonize(app="mraptor_milter", pid=PIDFILE, action=main) daemon.start() # Using python-daemon - Does not work as-is, need to create the PID file # See https://pypi.org/project/python-daemon/ # See PEP-3143: https://www.python.org/dev/peps/pep-3143/ # import daemon # import lockfile # with daemon.DaemonContext(pidfile=lockfile.FileLock(PIDFILE)): # main()
Memory