#!/usr/bin/env python
from __future__ import print_function
from __future__ import absolute_import
from __future__ import division
import os
import sys
import argparse
import itertools
try:
import win_unicode_console
WIN_UNICODE_CONSOLE = True
except ImportError:
WIN_UNICODE_CONSOLE = False
from struct import unpack_from
from oletools.olevba import VBA_Parser, decompress_stream
from oletools.common import codepages
PYTHON2 = sys.version_info[0] < 3
codec = 'latin1' # Assume 'latin1' unless redefined by the 'dir' stream
if PYTHON2:
def decode(x):
return x.decode(codec, errors='replace').encode('utf-8')
else:
xrange = range
def ord(x):
return x
def decode(x):
return x.decode(codec, errors='replace')
__description__ = 'A VBA p-code disassembler'
__license__ = 'GPL'
__uri__ = 'https://github.com/bontchev/pcodedmp'
__VERSION__ = '1.2.6'
__author__ = 'Vesselin Bontchev'
__email__ = 'vbontchev@yahoo.com'
def hexdump(buffer, length=16):
theHex = lambda data: ' '.join('{:02X}'.format(ord(i)) for i in data)
theStr = lambda data: ''.join(chr(ord(i)) if (31 < ord(i) < 127) else '.' for i in data)
result = ''
for offset in xrange(0, len(buffer), length):
data = buffer[offset:offset + length]
result += '{:08X} {:{}} {}\n'.format(offset, theHex(data), length * 3 - 1, theStr(data))
return result
def getWord(buffer, offset, endian):
return unpack_from(endian + 'H', buffer, offset)[0]
def getDWord(buffer, offset, endian):
return unpack_from(endian + 'L', buffer, offset)[0]
def skipStructure(buffer, offset, endian, isLengthDW, elementSize, checkForMinusOne):
if isLengthDW:
length = getDWord(buffer, offset, endian)
offset += 4
skip = checkForMinusOne and (length == 0xFFFFFFFF)
else:
length = getWord(buffer, offset, endian)
offset += 2
skip = checkForMinusOne and (length == 0xFFFF)
if not skip:
offset += length * elementSize
return offset
def getVar(buffer, offset, endian, isDWord):
if isDWord:
value = getDWord(buffer, offset, endian)
offset += 4
else:
value = getWord(buffer, offset, endian)
offset += 2
return offset, value
def getTypeAndLength(buffer, offset, endian):
if endian == '>':
return ord(buffer[offset]), ord(buffer[offset + 1])
else:
return ord(buffer[offset + 1]), ord(buffer[offset])
def processDir(vbaParser, dirPath, args, output_file=sys.stdout):
tags = {
1 : 'PROJ_SYSKIND', # 0 - Win16, 1 - Win32, 2 - Mac, 3 - Win64
2 : 'PROJ_LCID',
3 : 'PROJ_CODEPAGE',
4 : 'PROJ_NAME',
5 : 'PROJ_DOCSTRING',
6 : 'PROJ_HELPFILE',
7 : 'PROJ_HELPCONTEXT',
8 : 'PROJ_LIBFLAGS',
9 : 'PROJ_VERSION',
10 : 'PROJ_GUID',
11 : 'PROJ_PROPERTIES',
12 : 'PROJ_CONSTANTS',
13 : 'PROJ_LIBID_REGISTERED',
14 : 'PROJ_LIBID_PROJ',
15 : 'PROJ_MODULECOUNT',
16 : 'PROJ_EOF',
17 : 'PROJ_TYPELIB_VERSION',
18 : 'PROJ_COMPAT_EXE',
19 : 'PROJ_COOKIE',
20 : 'PROJ_LCIDINVOKE',
21 : 'PROJ_COMMAND_LINE',
22 : 'PROJ_REFNAME_PROJ',
25 : 'MOD_NAME',
26 : 'MOD_STREAM',
28 : 'MOD_DOCSTRING',
29 : 'MOD_HELPFILE',
30 : 'MOD_HELPCONTEXT',
32 : 'MOD_PROPERTIES',
33 : 'MOD_FBASMOD_StdMods',
34 : 'MOD_FBASMOD_Classes',
35 : 'MOD_FBASMOD_Creatable',
36 : 'MOD_FBASMOD_NoDisplay',
37 : 'MOD_FBASMOD_NoEdit',
38 : 'MOD_FBASMOD_RefLibs',
39 : 'MOD_FBASMOD_NonBasic',
40 : 'MOD_FBASMOD_Private',
41 : 'MOD_FBASMOD_Internal',
42 : 'MOD_FBASMOD_AllModTypes',
43 : 'MOD_END',
44 : 'MOD_COOKIETYPE',
45 : 'MOD_BASECLASSNULL',
46 : 'MOD_BASECLASS',
47 : 'PROJ_LIBID_TWIDDLED',
48 : 'PROJ_LIBID_EXTENDED',
49 : 'MOD_TEXTOFFSET',
50 : 'MOD_UNICODESTREAM',
60 : 'PROJ_UNICODE_CONSTANTS',
61 : 'PROJ_UNICODE_HELPFILE',
62 : 'PROJ_UNICODE_REFNAME_PROJ',
63 : 'PROJ_UNICODE_COMMAND_LINE',
64 : 'PROJ_UNICODE_DOCSTRING',
71 : 'MOD_UNICODE_NAME',
72 : 'MOD_UNICODE_DOCSTRING',
73 : 'MOD_UNICODE_HELPFILE'
}
global codec
if not args.disasmOnly:
print('-' * 79, file=output_file)
print('dir stream after decompression:', file=output_file)
is64bit = False
dirDataCompressed = vbaParser.ole_file.openstream(dirPath).read()
dirData = decompress_stream(dirDataCompressed)
streamSize = len(dirData)
codeModules = []
if not args.disasmOnly:
print('{:d} bytes'.format(streamSize), file=output_file)
if args.verbose:
print(hexdump(dirData), file=output_file)
print('dir stream parsed:', file=output_file)
offset = 0
# The "dir" stream is ALWAYS in little-endian format, even on a Mac
while offset < streamSize:
try:
tag = getWord(dirData, offset, '<')
wLength = getWord(dirData, offset + 2, '<')
# The following idiocy is because Microsoft can't stick
# to their own format specification
if tag == 9:
wLength = 6
elif tag == 3:
wLength = 2
# End of the idiocy
if not tag in tags:
tagName = 'UNKNOWN'
else:
tagName = tags[tag]
if not args.disasmOnly:
print('{:08X}: {}'.format(offset, tagName), end='', file=output_file)
offset += 6
if wLength:
if not args.disasmOnly:
print(':', file=output_file)
print(hexdump(dirData[offset:offset + wLength]), file=output_file)
if tagName == 'PROJ_CODEPAGE':
codepage = getWord(dirData, offset, '<')
codec = codepages.codepage2codec(codepage)
elif tagName == 'MOD_UNICODESTREAM':
# Convert the stream name from UTF-16-LE to Unicode:
stream_name_unicode = dirData[offset:offset + wLength].decode('utf_16_le', errors='replace')
if PYTHON2:
# On Python 2 only, convert it to bytes in UTF-8, so that it is a native str:
stream_name = stream_name_unicode.encode('utf8', errors='replace')
else:
# On Python 3, native str are unicode
stream_name = stream_name_unicode
codeModules.append(stream_name)
elif tagName == 'PROJ_SYSKIND':
sysKind = getDWord(dirData, offset, '<')
is64bit = sysKind == 3
offset += wLength
elif not args.disasmOnly:
print('', file=output_file)
except:
break
return dirData, codeModules, is64bit
def process_VBA_PROJECT(vbaParser, vbaProjectPath, args, output_file=sys.stdout):
vbaProjectData = vbaParser.ole_file.openstream(vbaProjectPath).read()
if args.disasmOnly:
return vbaProjectData
print('-' * 79, file=output_file)
print('_VBA_PROJECT stream:', file=output_file)
print('{:d} bytes'.format(len(vbaProjectData)), file=output_file)
if args.verbose:
print(hexdump(vbaProjectData), file=output_file)
return vbaProjectData
def getTheIdentifiers(vbaProjectData):
identifiers = []
try:
magic = getWord(vbaProjectData, 0, '<')
if magic != 0x61CC:
return identifiers
version = getWord(vbaProjectData, 2, '<')
unicodeRef = (version >= 0x5B) and (not version in [0x60, 0x62, 0x63]) or (version == 0x4E)
unicodeName = (version >= 0x59) and (not version in [0x60, 0x62, 0x63]) or (version == 0x4E)
nonUnicodeName = ((version <= 0x59) and (version != 0x4E)) or (0x5F > version > 0x6B)
word = getWord(vbaProjectData, 5, '<')
if word == 0x000E:
endian = '>'
else:
endian = '<'
offset = 0x1E
offset, numRefs = getVar(vbaProjectData, offset, endian, False)
offset += 2
for _ in itertools.repeat(None, numRefs):
offset, refLength = getVar(vbaProjectData, offset, endian, False)
if refLength == 0:
offset += 6
else:
if ((unicodeRef and (refLength < 5)) or ((not unicodeRef) and (refLength < 3))):
offset += refLength
else:
if unicodeRef:
c = vbaProjectData[offset + 4]
else:
c = vbaProjectData[offset + 2]
offset += refLength
if chr(ord(c)) in ['C', 'D']:
offset = skipStructure(vbaProjectData, offset, endian, False, 1, False)
offset += 10
offset, word = getVar(vbaProjectData, offset, endian, False)
if word:
offset = skipStructure(vbaProjectData, offset, endian, False, 1, False)
offset, wLength = getVar(vbaProjectData, offset, endian, False)
if wLength:
offset += 2
offset += wLength + 30
# Number of entries in the class/user forms table
offset = skipStructure(vbaProjectData, offset, endian, False, 2, False)
# Number of compile-time identifier-value pairs
offset = skipStructure(vbaProjectData, offset, endian, False, 4, False)
offset += 2
# Typeinfo typeID
offset = skipStructure(vbaProjectData, offset, endian, False, 1, True)
# Project description
offset = skipStructure(vbaProjectData, offset, endian, False, 1, True)
# Project help file name
offset = skipStructure(vbaProjectData, offset, endian, False, 1, True)
offset += 0x64
# Skip the module descriptors
offset, numProjects = getVar(vbaProjectData, offset, endian, False)
for _ in itertools.repeat(None, numProjects):
offset, wLength = getVar(vbaProjectData, offset, endian, False)
# Code module name
if unicodeName:
offset += wLength
if nonUnicodeName:
if wLength:
offset, wLength = getVar(vbaProjectData, offset, endian, False)
offset += wLength
# Stream time
offset = skipStructure(vbaProjectData, offset, endian, False, 1, False)
offset = skipStructure(vbaProjectData, offset, endian, False, 1, True)
offset, _ = getVar(vbaProjectData, offset, endian, False)
if version >= 0x6B:
offset = skipStructure(vbaProjectData, offset, endian, False, 1, True)
offset = skipStructure(vbaProjectData, offset, endian, False, 1, True)
offset += 2
if version != 0x51:
offset += 4
offset = skipStructure(vbaProjectData, offset, endian, False, 8, False)
offset += 11
offset += 6
offset = skipStructure(vbaProjectData, offset, endian, True, 1, False)
offset += 6
offset, w0 = getVar(vbaProjectData, offset, endian, False)
offset, numIDs = getVar(vbaProjectData, offset, endian, False)
offset, w1 = getVar(vbaProjectData, offset, endian, False)
offset += 4
numJunkIDs = numIDs + w1 - w0
numIDs = w0 - w1
# Skip the junk IDs
for _ in itertools.repeat(None, numJunkIDs):
offset += 4
idType, idLength = getTypeAndLength(vbaProjectData, offset, endian)
offset += 2
if idType > 0x7F:
offset += 6
offset += idLength
# Now offset points to the start of the variable names area
for _ in itertools.repeat(None, numIDs):
isKwd = False
ident = ''
idType, idLength = getTypeAndLength(vbaProjectData, offset, endian)
offset += 2
if (idLength == 0) and (idType == 0):
offset += 2
idType, idLength = getTypeAndLength(vbaProjectData, offset, endian)
offset += 2
isKwd = True
if idType & 0x80:
offset += 6
if idLength:
ident = decode(vbaProjectData[offset:offset + idLength])
identifiers.append(ident)
offset += idLength
if not isKwd:
offset += 4
except Exception as e:
print('Error: {}.'.format(e), file=sys.stderr)
return identifiers
#'name', '0x', 'imp_', 'func_', 'var_', 'rec_', 'type_', 'context_'
# 2, 2, 2, 4, 4, 4, 4, 4
# VBA7 opcodes; VBA3, VBA5 and VBA6 will be upconverted to these.
opcodes = {
0 : { 'mnem' : 'Imp', 'args' : [], 'varg' : False },
1 : { 'mnem' : 'Eqv', 'args' : [], 'varg' : False },
2 : { 'mnem' : 'Xor', 'args' : [], 'varg' : False },
3 : { 'mnem' : 'Or', 'args' : [], 'varg' : False },
4 : { 'mnem' : 'And', 'args' : [], 'varg' : False },
5 : { 'mnem' : 'Eq', 'args' : [], 'varg' : False },
6 : { 'mnem' : 'Ne', 'args' : [], 'varg' : False },
7 : { 'mnem' : 'Le', 'args' : [], 'varg' : False },
8 : { 'mnem' : 'Ge', 'args' : [], 'varg' : False },
9 : { 'mnem' : 'Lt', 'args' : [], 'varg' : False },
10 : { 'mnem' : 'Gt', 'args' : [], 'varg' : False },
11 : { 'mnem' : 'Add', 'args' : [], 'varg' : False },
12 : { 'mnem' : 'Sub', 'args' : [], 'varg' : False },
13 : { 'mnem' : 'Mod', 'args' : [], 'varg' : False },
14 : { 'mnem' : 'IDiv', 'args' : [], 'varg' : False },
15 : { 'mnem' : 'Mul', 'args' : [], 'varg' : False },
16 : { 'mnem' : 'Div', 'args' : [], 'varg' : False },
17 : { 'mnem' : 'Concat', 'args' : [], 'varg' : False },
18 : { 'mnem' : 'Like', 'args' : [], 'varg' : False },
19 : { 'mnem' : 'Pwr', 'args' : [], 'varg' : False },
20 : { 'mnem' : 'Is', 'args' : [], 'varg' : False },
21 : { 'mnem' : 'Not', 'args' : [], 'varg' : False },
22 : { 'mnem' : 'UMi', 'args' : [], 'varg' : False },
23 : { 'mnem' : 'FnAbs', 'args' : [], 'varg' : False },
24 : { 'mnem' : 'FnFix', 'args' : [], 'varg' : False },
25 : { 'mnem' : 'FnInt', 'args' : [], 'varg' : False },
26 : { 'mnem' : 'FnSgn', 'args' : [], 'varg' : False },
27 : { 'mnem' : 'FnLen', 'args' : [], 'varg' : False },
28 : { 'mnem' : 'FnLenB', 'args' : [], 'varg' : False },
29 : { 'mnem' : 'Paren', 'args' : [], 'varg' : False },
30 : { 'mnem' : 'Sharp', 'args' : [], 'varg' : False },
31 : { 'mnem' : 'LdLHS', 'args' : ['name'], 'varg' : False },
32 : { 'mnem' : 'Ld', 'args' : ['name'], 'varg' : False },
33 : { 'mnem' : 'MemLd', 'args' : ['name'], 'varg' : False },
34 : { 'mnem' : 'DictLd', 'args' : ['name'], 'varg' : False },
35 : { 'mnem' : 'IndexLd', 'args' : ['0x'], 'varg' : False },
36 : { 'mnem' : 'ArgsLd', 'args' : ['name', '0x'], 'varg' : False },
37 : { 'mnem' : 'ArgsMemLd', 'args' : ['name', '0x'], 'varg' : False },
38 : { 'mnem' : 'ArgsDictLd', 'args' : ['name', '0x'], 'varg' : False },
39 : { 'mnem' : 'St', 'args' : ['name'], 'varg' : False },
40 : { 'mnem' : 'MemSt', 'args' : ['name'], 'varg' : False },
41 : { 'mnem' : 'DictSt', 'args' : ['name'], 'varg' : False },
42 : { 'mnem' : 'IndexSt', 'args' : ['0x'], 'varg' : False },
43 : { 'mnem' : 'ArgsSt', 'args' : ['name', '0x'], 'varg' : False },
44 : { 'mnem' : 'ArgsMemSt', 'args' : ['name', '0x'], 'varg' : False },
45 : { 'mnem' : 'ArgsDictSt', 'args' : ['name', '0x'], 'varg' : False },
46 : { 'mnem' : 'Set', 'args' : ['name'], 'varg' : False },
47 : { 'mnem' : 'Memset', 'args' : ['name'], 'varg' : False },
48 : { 'mnem' : 'Dictset', 'args' : ['name'], 'varg' : False },
49 : { 'mnem' : 'Indexset', 'args' : ['0x'], 'varg' : False },
50 : { 'mnem' : 'ArgsSet', 'args' : ['name', '0x'], 'varg' : False },
51 : { 'mnem' : 'ArgsMemSet', 'args' : ['name', '0x'], 'varg' : False },
52 : { 'mnem' : 'ArgsDictSet', 'args' : ['name', '0x'], 'varg' : False },
53 : { 'mnem' : 'MemLdWith', 'args' : ['name'], 'varg' : False },
54 : { 'mnem' : 'DictLdWith', 'args' : ['name'], 'varg' : False },
55 : { 'mnem' : 'ArgsMemLdWith', 'args' : ['name', '0x'], 'varg' : False },
56 : { 'mnem' : 'ArgsDictLdWith', 'args' : ['name', '0x'], 'varg' : False },
57 : { 'mnem' : 'MemStWith', 'args' : ['name'], 'varg' : False },
58 : { 'mnem' : 'DictStWith', 'args' : ['name'], 'varg' : False },
59 : { 'mnem' : 'ArgsMemStWith', 'args' : ['name', '0x'], 'varg' : False },
60 : { 'mnem' : 'ArgsDictStWith', 'args' : ['name', '0x'], 'varg' : False },
61 : { 'mnem' : 'MemSetWith', 'args' : ['name'], 'varg' : False },
62 : { 'mnem' : 'DictSetWith', 'args' : ['name'], 'varg' : False },
63 : { 'mnem' : 'ArgsMemSetWith', 'args' : ['name', '0x'], 'varg' : False },
64 : { 'mnem' : 'ArgsDictSetWith', 'args' : ['name', '0x'], 'varg' : False },
65 : { 'mnem' : 'ArgsCall', 'args' : ['name', '0x'], 'varg' : False },
66 : { 'mnem' : 'ArgsMemCall', 'args' : ['name', '0x'], 'varg' : False },
67 : { 'mnem' : 'ArgsMemCallWith', 'args' : ['name', '0x'], 'varg' : False },
68 : { 'mnem' : 'ArgsArray', 'args' : ['name', '0x'], 'varg' : False },
69 : { 'mnem' : 'Assert', 'args' : [], 'varg' : False },
70 : { 'mnem' : 'BoS', 'args' : ['0x'], 'varg' : False },
71 : { 'mnem' : 'BoSImplicit', 'args' : [], 'varg' : False },
72 : { 'mnem' : 'BoL', 'args' : [], 'varg' : False },
73 : { 'mnem' : 'LdAddressOf', 'args' : ['name'], 'varg' : False },
74 : { 'mnem' : 'MemAddressOf', 'args' : ['name'], 'varg' : False },
75 : { 'mnem' : 'Case', 'args' : [], 'varg' : False },
76 : { 'mnem' : 'CaseTo', 'args' : [], 'varg' : False },
77 : { 'mnem' : 'CaseGt', 'args' : [], 'varg' : False },
78 : { 'mnem' : 'CaseLt', 'args' : [], 'varg' : False },
79 : { 'mnem' : 'CaseGe', 'args' : [], 'varg' : False },
80 : { 'mnem' : 'CaseLe', 'args' : [], 'varg' : False },
81 : { 'mnem' : 'CaseNe', 'args' : [], 'varg' : False },
82 : { 'mnem' : 'CaseEq', 'args' : [], 'varg' : False },
83 : { 'mnem' : 'CaseElse', 'args' : [], 'varg' : False },
84 : { 'mnem' : 'CaseDone', 'args' : [], 'varg' : False },
85 : { 'mnem' : 'Circle', 'args' : ['0x'], 'varg' : False },
86 : { 'mnem' : 'Close', 'args' : ['0x'], 'varg' : False },
87 : { 'mnem' : 'CloseAll', 'args' : [], 'varg' : False },
88 : { 'mnem' : 'Coerce', 'args' : [], 'varg' : False },
89 : { 'mnem' : 'CoerceVar', 'args' : [], 'varg' : False },
90 : { 'mnem' : 'Context', 'args' : ['context_'], 'varg' : False },
91 : { 'mnem' : 'Debug', 'args' : [], 'varg' : False },
92 : { 'mnem' : 'DefType', 'args' : ['0x', '0x'], 'varg' : False },
93 : { 'mnem' : 'Dim', 'args' : [], 'varg' : False },
94 : { 'mnem' : 'DimImplicit', 'args' : [], 'varg' : False },
95 : { 'mnem' : 'Do', 'args' : [], 'varg' : False },
96 : { 'mnem' : 'DoEvents', 'args' : [], 'varg' : False },
97 : { 'mnem' : 'DoUnitil', 'args' : [], 'varg' : False },
98 : { 'mnem' : 'DoWhile', 'args' : [], 'varg' : False },
99 : { 'mnem' : 'Else', 'args' : [], 'varg' : False },
100 : { 'mnem' : 'ElseBlock', 'args' : [], 'varg' : False },
101 : { 'mnem' : 'ElseIfBlock', 'args' : [], 'varg' : False },
102 : { 'mnem' : 'ElseIfTypeBlock', 'args' : ['imp_'], 'varg' : False },
103 : { 'mnem' : 'End', 'args' : [], 'varg' : False },
104 : { 'mnem' : 'EndContext', 'args' : [], 'varg' : False },
105 : { 'mnem' : 'EndFunc', 'args' : [], 'varg' : False },
106 : { 'mnem' : 'EndIf', 'args' : [], 'varg' : False },
107 : { 'mnem' : 'EndIfBlock', 'args' : [], 'varg' : False },
108 : { 'mnem' : 'EndImmediate', 'args' : [], 'varg' : False },
109 : { 'mnem' : 'EndProp', 'args' : [], 'varg' : False },
110 : { 'mnem' : 'EndSelect', 'args' : [], 'varg' : False },
111 : { 'mnem' : 'EndSub', 'args' : [], 'varg' : False },
112 : { 'mnem' : 'EndType', 'args' : [], 'varg' : False },
113 : { 'mnem' : 'EndWith', 'args' : [], 'varg' : False },
114 : { 'mnem' : 'Erase', 'args' : ['0x'], 'varg' : False },
115 : { 'mnem' : 'Error', 'args' : [], 'varg' : False },
116 : { 'mnem' : 'EventDecl', 'args' : ['func_'], 'varg' : False },
117 : { 'mnem' : 'RaiseEvent', 'args' : ['name', '0x'], 'varg' : False },
118 : { 'mnem' : 'ArgsMemRaiseEvent', 'args' : ['name', '0x'], 'varg' : False },
119 : { 'mnem' : 'ArgsMemRaiseEventWith', 'args' : ['name', '0x'], 'varg' : False },
120 : { 'mnem' : 'ExitDo', 'args' : [], 'varg' : False },
121 : { 'mnem' : 'ExitFor', 'args' : [], 'varg' : False },
122 : { 'mnem' : 'ExitFunc', 'args' : [], 'varg' : False },
123 : { 'mnem' : 'ExitProp', 'args' : [], 'varg' : False },
124 : { 'mnem' : 'ExitSub', 'args' : [], 'varg' : False },
125 : { 'mnem' : 'FnCurDir', 'args' : [], 'varg' : False },
126 : { 'mnem' : 'FnDir', 'args' : [], 'varg' : False },
127 : { 'mnem' : 'Empty0', 'args' : [], 'varg' : False },
128 : { 'mnem' : 'Empty1', 'args' : [], 'varg' : False },
129 : { 'mnem' : 'FnError', 'args' : [], 'varg' : False },
130 : { 'mnem' : 'FnFormat', 'args' : [], 'varg' : False },
131 : { 'mnem' : 'FnFreeFile', 'args' : [], 'varg' : False },
132 : { 'mnem' : 'FnInStr', 'args' : [], 'varg' : False },
133 : { 'mnem' : 'FnInStr3', 'args' : [], 'varg' : False },
134 : { 'mnem' : 'FnInStr4', 'args' : [], 'varg' : False },
135 : { 'mnem' : 'FnInStrB', 'args' : [], 'varg' : False },
136 : { 'mnem' : 'FnInStrB3', 'args' : [], 'varg' : False },
137 : { 'mnem' : 'FnInStrB4', 'args' : [], 'varg' : False },
138 : { 'mnem' : 'FnLBound', 'args' : ['0x'], 'varg' : False },
139 : { 'mnem' : 'FnMid', 'args' : [], 'varg' : False },
140 : { 'mnem' : 'FnMidB', 'args' : [], 'varg' : False },
141 : { 'mnem' : 'FnStrComp', 'args' : [], 'varg' : False },
142 : { 'mnem' : 'FnStrComp3', 'args' : [], 'varg' : False },
143 : { 'mnem' : 'FnStringVar', 'args' : [], 'varg' : False },
144 : { 'mnem' : 'FnStringStr', 'args' : [], 'varg' : False },
145 : { 'mnem' : 'FnUBound', 'args' : ['0x'], 'varg' : False },
146 : { 'mnem' : 'For', 'args' : [], 'varg' : False },
147 : { 'mnem' : 'ForEach', 'args' : [], 'varg' : False },
148 : { 'mnem' : 'ForEachAs', 'args' : ['imp_'], 'varg' : False },
149 : { 'mnem' : 'ForStep', 'args' : [], 'varg' : False },
150 : { 'mnem' : 'FuncDefn', 'args' : ['func_'], 'varg' : False },
151 : { 'mnem' : 'FuncDefnSave', 'args' : ['func_'], 'varg' : False },
152 : { 'mnem' : 'GetRec', 'args' : [], 'varg' : False },
153 : { 'mnem' : 'GoSub', 'args' : ['name'], 'varg' : False },
154 : { 'mnem' : 'GoTo', 'args' : ['name'], 'varg' : False },
155 : { 'mnem' : 'If', 'args' : [], 'varg' : False },
156 : { 'mnem' : 'IfBlock', 'args' : [], 'varg' : False },
157 : { 'mnem' : 'TypeOf', 'args' : ['imp_'], 'varg' : False },
158 : { 'mnem' : 'IfTypeBlock', 'args' : ['imp_'], 'varg' : False },
159 : { 'mnem' : 'Implements', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False },
160 : { 'mnem' : 'Input', 'args' : [], 'varg' : False },
161 : { 'mnem' : 'InputDone', 'args' : [], 'varg' : False },
162 : { 'mnem' : 'InputItem', 'args' : [], 'varg' : False },
163 : { 'mnem' : 'Label', 'args' : ['name'], 'varg' : False },
164 : { 'mnem' : 'Let', 'args' : [], 'varg' : False },
165 : { 'mnem' : 'Line', 'args' : ['0x'], 'varg' : False },
166 : { 'mnem' : 'LineCont', 'args' : [], 'varg' : True },
167 : { 'mnem' : 'LineInput', 'args' : [], 'varg' : False },
168 : { 'mnem' : 'LineNum', 'args' : ['name'], 'varg' : False },
169 : { 'mnem' : 'LitCy', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False },
170 : { 'mnem' : 'LitDate', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False },
171 : { 'mnem' : 'LitDefault', 'args' : [], 'varg' : False },
172 : { 'mnem' : 'LitDI2', 'args' : ['0x'], 'varg' : False },
173 : { 'mnem' : 'LitDI4', 'args' : ['0x', '0x'], 'varg' : False },
174 : { 'mnem' : 'LitDI8', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False },
175 : { 'mnem' : 'LitHI2', 'args' : ['0x'], 'varg' : False },
176 : { 'mnem' : 'LitHI4', 'args' : ['0x', '0x'], 'varg' : False },
177 : { 'mnem' : 'LitHI8', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False },
178 : { 'mnem' : 'LitNothing', 'args' : [], 'varg' : False },
179 : { 'mnem' : 'LitOI2', 'args' : ['0x'], 'varg' : False },
180 : { 'mnem' : 'LitOI4', 'args' : ['0x', '0x'], 'varg' : False },
181 : { 'mnem' : 'LitOI8', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False },
182 : { 'mnem' : 'LitR4', 'args' : ['0x', '0x'], 'varg' : False },
183 : { 'mnem' : 'LitR8', 'args' : ['0x', '0x', '0x', '0x'], 'varg' : False },
184 : { 'mnem' : 'LitSmallI2', 'args' : [], 'varg' : False },
185 : { 'mnem' : 'LitStr', 'args' : [], 'varg' : True },
186 : { 'mnem' : 'LitVarSpecial', 'args' : [], 'varg' : False },
187 : { 'mnem' : 'Lock', 'args' : [], 'varg' : False },
188 : { 'mnem' : 'Loop', 'args' : [], 'varg' : False },
189 : { 'mnem' : 'LoopUntil', 'args' : [], 'varg' : False },
190 : { 'mnem' : 'LoopWhile', 'args' : [], 'varg' : False },
191 : { 'mnem' : 'LSet', 'args' : [], 'varg' : False },
192 : { 'mnem' : 'Me', 'args' : [], 'varg' : False },
193 : { 'mnem' : 'MeImplicit', 'args' : [], 'varg' : False },
194 : { 'mnem' : 'MemRedim', 'args' : ['name', '0x', 'type_'], 'varg' : False },
195 : { 'mnem' : 'MemRedimWith', 'args' : ['name', '0x', 'type_'], 'varg' : False },
196 : { 'mnem' : 'MemRedimAs', 'args' : ['name', '0x', 'type_'], 'varg' : False },
197 : { 'mnem' : 'MemRedimAsWith', 'args' : ['name', '0x', 'type_'], 'varg' : False },
198 : { 'mnem' : 'Mid', 'args' : [], 'varg' : False },
199 : { 'mnem' : 'MidB', 'args' : [], 'varg' : False },
200 : { 'mnem' : 'Name', 'args' : [], 'varg' : False },
201 : { 'mnem' : 'New', 'args' : ['imp_'], 'varg' : False },
202 : { 'mnem' : 'Next', 'args' : [], 'varg' : False },
203 : { 'mnem' : 'NextVar', 'args' : [], 'varg' : False },
204 : { 'mnem' : 'OnError', 'args' : ['name'], 'varg' : False },
205 : { 'mnem' : 'OnGosub', 'args' : [], 'varg' : True },
206 : { 'mnem' : 'OnGoto', 'args' : [], 'varg' : True },
207 : { 'mnem' : 'Open', 'args' : ['0x'], 'varg' : False },
208 : { 'mnem' : 'Option', 'args' : [], 'varg' : False },
209 : { 'mnem' : 'OptionBase', 'args' : [], 'varg' : False },
210 : { 'mnem' : 'ParamByVal', 'args' : [], 'varg' : False },
211 : { 'mnem' : 'ParamOmitted', 'args' : [], 'varg' : False },
212 : { 'mnem' : 'ParamNamed', 'args' : ['name'], 'varg' : False },
213 : { 'mnem' : 'PrintChan', 'args' : [], 'varg' : False },
214 : { 'mnem' : 'PrintComma', 'args' : [], 'varg' : False },
215 : { 'mnem' : 'PrintEoS', 'args' : [], 'varg' : False },
216 : { 'mnem' : 'PrintItemComma', 'args' : [], 'varg' : False },
217 : { 'mnem' : 'PrintItemNL', 'args' : [], 'varg' : False },
218 : { 'mnem' : 'PrintItemSemi', 'args' : [], 'varg' : False },
219 : { 'mnem' : 'PrintNL', 'args' : [], 'varg' : False },
220 : { 'mnem' : 'PrintObj', 'args' : [], 'varg' : False },
221 : { 'mnem' : 'PrintSemi', 'args' : [], 'varg' : False },
222 : { 'mnem' : 'PrintSpc', 'args' : [], 'varg' : False },
223 : { 'mnem' : 'PrintTab', 'args' : [], 'varg' : False },
224 : { 'mnem' : 'PrintTabComma', 'args' : [], 'varg' : False },
225 : { 'mnem' : 'PSet', 'args' : ['0x'], 'varg' : False },
226 : { 'mnem' : 'PutRec', 'args' : [], 'varg' : False },
227 : { 'mnem' : 'QuoteRem', 'args' : ['0x'], 'varg' : True },
228 : { 'mnem' : 'Redim', 'args' : ['name', '0x', 'type_'], 'varg' : False },
229 : { 'mnem' : 'RedimAs', 'args' : ['name', '0x', 'type_'], 'varg' : False },
230 : { 'mnem' : 'Reparse', 'args' : [], 'varg' : True },
231 : { 'mnem' : 'Rem', 'args' : [], 'varg' : True },
232 : { 'mnem' : 'Resume', 'args' : ['name'], 'varg' : False },
233 : { 'mnem' : 'Return', 'args' : [], 'varg' : False },
234 : { 'mnem' : 'RSet', 'args' : [], 'varg' : False },
235 : { 'mnem' : 'Scale', 'args' : ['0x'], 'varg' : False },
236 : { 'mnem' : 'Seek', 'args' : [], 'varg' : False },
237 : { 'mnem' : 'SelectCase', 'args' : [], 'varg' : False },
238 : { 'mnem' : 'SelectIs', 'args' : ['imp_'], 'varg' : False },
239 : { 'mnem' : 'SelectType', 'args' : [], 'varg' : False },
240 : { 'mnem' : 'SetStmt', 'args' : [], 'varg' : False },
241 : { 'mnem' : 'Stack', 'args' : ['0x', '0x'], 'varg' : False },
242 : { 'mnem' : 'Stop', 'args' : [], 'varg' : False },
243 : { 'mnem' : 'Type', 'args' : ['rec_'], 'varg' : False },
244 : { 'mnem' : 'Unlock', 'args' : [], 'varg' : False },
245 : { 'mnem' : 'VarDefn', 'args' : ['var_'], 'varg' : False },
246 : { 'mnem' : 'Wend', 'args' : [], 'varg' : False },
247 : { 'mnem' : 'While', 'args' : [], 'varg' : False },
248 : { 'mnem' : 'With', 'args' : [], 'varg' : False },
249 : { 'mnem' : 'WriteChan', 'args' : [], 'varg' : False },
250 : { 'mnem' : 'ConstFuncExpr', 'args' : [], 'varg' : False },
251 : { 'mnem' : 'LbConst', 'args' : ['name'], 'varg' : False },
252 : { 'mnem' : 'LbIf', 'args' : [], 'varg' : False },
253 : { 'mnem' : 'LbElse', 'args' : [], 'varg' : False },
254 : { 'mnem' : 'LbElseIf', 'args' : [], 'varg' : False },
255 : { 'mnem' : 'LbEndIf', 'args' : [], 'varg' : False },
256 : { 'mnem' : 'LbMark', 'args' : [], 'varg' : False },
257 : { 'mnem' : 'EndForVariable', 'args' : [], 'varg' : False },
258 : { 'mnem' : 'StartForVariable', 'args' : [], 'varg' : False },
259 : { 'mnem' : 'NewRedim', 'args' : [], 'varg' : False },
260 : { 'mnem' : 'StartWithExpr', 'args' : [], 'varg' : False },
261 : { 'mnem' : 'SetOrSt', 'args' : ['name'], 'varg' : False },
262 : { 'mnem' : 'EndEnum', 'args' : [], 'varg' : False },
263 : { 'mnem' : 'Illegal', 'args' : [], 'varg' : False }
}
def translateOpcode(opcode, vbaVer, is64bit):
if vbaVer == 3:
if 0 <= opcode <= 67:
return opcode
elif 68 <= opcode <= 70:
return opcode + 2
elif 71 <= opcode <= 111:
return opcode + 4
elif 112 <= opcode <= 150:
return opcode + 8
elif 151 <= opcode <= 164:
return opcode + 9
elif 165 <= opcode <= 166:
return opcode + 10
elif 167 <= opcode <= 169:
return opcode + 11
elif 170 <= opcode <= 238:
return opcode + 12
else: # opcode == 239
return opcode + 24
elif vbaVer == 5:
if 0 <= opcode <= 68:
return opcode
elif 69 <= opcode <= 71:
return opcode + 1
elif 72 <= opcode <= 112:
return opcode + 3
elif 113 <= opcode <= 151:
return opcode + 7
elif 152 <= opcode <= 165:
return opcode + 8
elif 166 <= opcode <= 167:
return opcode + 9
elif 168 <= opcode <= 170:
return opcode + 10
else: # 171 <= opcode <= 252
return opcode + 11
#elif vbaVer == 6:
#elif vbaVer in [6, 7]:
elif not is64bit:
if 0 <= opcode <= 173:
return opcode
elif 174 <= opcode <= 175:
return opcode + 1
elif 176 <= opcode <= 178:
return opcode + 2
else: # 179 <= opcode <= 260
return opcode + 3
else:
return opcode
def getID(idCode, identifiers, vbaVer, is64bit):
internalNames = [
'<crash>', '0', 'Abs', 'Access', 'AddressOf', 'Alias', 'And', 'Any',
'Append', 'Array', 'As', 'Assert', 'B', 'Base', 'BF', 'Binary',
'Boolean', 'ByRef', 'Byte', 'ByVal', 'Call', 'Case', 'CBool', 'CByte',
'CCur', 'CDate', 'CDec', 'CDbl', 'CDecl', 'ChDir', 'CInt', 'Circle',
'CLng', 'Close', 'Compare', 'Const', 'CSng', 'CStr', 'CurDir', 'CurDir$',
'CVar', 'CVDate', 'CVErr', 'Currency', 'Database', 'Date', 'Date$', 'Debug',
'Decimal', 'Declare', 'DefBool', 'DefByte', 'DefCur', 'DefDate', 'DefDec', 'DefDbl',
'DefInt', 'DefLng', 'DefObj', 'DefSng', 'DefStr', 'DefVar', 'Dim', 'Dir',
'Dir$', 'Do', 'DoEvents', 'Double', 'Each', 'Else', 'ElseIf', 'Empty',
'End', 'EndIf', 'Enum', 'Eqv', 'Erase', 'Error', 'Error$', 'Event',
'WithEvents', 'Explicit', 'F', 'False', 'Fix', 'For', 'Format',
'Format$', 'FreeFile', 'Friend', 'Function', 'Get', 'Global', 'Go', 'GoSub',
'Goto', 'If', 'Imp', 'Implements', 'In', 'Input', 'Input$', 'InputB',
'InputB', 'InStr', 'InputB$', 'Int', 'InStrB', 'Is', 'Integer', 'Left',
'LBound', 'LenB', 'Len', 'Lib', 'Let', 'Line', 'Like', 'Load',
'Local', 'Lock', 'Long', 'Loop', 'LSet', 'Me', 'Mid', 'Mid$',
'MidB', 'MidB$', 'Mod', 'Module', 'Name', 'New', 'Next', 'Not',
'Nothing', 'Null', 'Object', 'On', 'Open', 'Option', 'Optional', 'Or',
'Output', 'ParamArray', 'Preserve', 'Print', 'Private', 'Property', 'PSet', 'Public',
'Put', 'RaiseEvent', 'Random', 'Randomize', 'Read', 'ReDim', 'Rem', 'Resume',
'Return', 'RGB', 'RSet', 'Scale', 'Seek', 'Select', 'Set', 'Sgn',
'Shared', 'Single', 'Spc', 'Static', 'Step', 'Stop', 'StrComp', 'String',
'String$', 'Sub', 'Tab', 'Text', 'Then', 'To', 'True', 'Type',
'TypeOf', 'UBound', 'Unload', 'Unlock', 'Unknown', 'Until', 'Variant', 'WEnd',
'While', 'Width', 'With', 'Write', 'Xor', '#Const', '#Else', '#ElseIf',
'#End', '#If', 'Attribute', 'VB_Base', 'VB_Control', 'VB_Creatable', 'VB_Customizable', 'VB_Description',
'VB_Exposed', 'VB_Ext_Key', 'VB_HelpID', 'VB_Invoke_Func', 'VB_Invoke_Property', 'VB_Invoke_PropertyPut', 'VB_Invoke_PropertyPutRef', 'VB_MemberFlags',
'VB_Name', 'VB_PredecraredID', 'VB_ProcData', 'VB_TemplateDerived', 'VB_VarDescription', 'VB_VarHelpID', 'VB_VarMemberFlags', 'VB_VarProcData',
'VB_UserMemID', 'VB_VarUserMemID', 'VB_GlobalNameSpace', ',', '.', '"', '_', '!',
'#', '&', "'", '(', ')', '*', '+', '-',
' /', ':', ';', '<', '<=', '<>', '=', '=<',
'=>', '>', '><', '>=', '?', '\\', '^', ':='
]
origCode = idCode
idCode >>= 1
try:
if idCode >= 0x100:
idCode -= 0x100
if vbaVer >= 7:
idCode -= 4
if is64bit:
idCode -= 3
if idCode > 0xBE:
idCode -= 1
return identifiers[idCode]
else:
if vbaVer >= 7:
if idCode >= 0xC3:
idCode -= 1
return internalNames[idCode]
except:
return 'id_{:04X}'.format(origCode)
def getName(buffer, identifiers, offset, endian, vbaVer, is64bit):
objectID = getWord(buffer, offset, endian)
objectName = getID(objectID, identifiers, vbaVer, is64bit)
return objectName
def disasmName(word, identifiers, mnemonic, opType, vbaVer, is64bit):
varTypes = ['', '?', '%', '&', '!', '#', '@', '?', '$', '?', '?', '?', '?', '?']
varName = getID(word, identifiers, vbaVer, is64bit)
if opType < len(varTypes):
strType = varTypes[opType]
else:
strType = ''
if opType == 32:
varName = '[' + varName + ']'
if mnemonic == 'OnError':
strType = ''
if opType == 1:
varName = '(Resume Next)'
elif opType == 2:
varName = '(GoTo 0)'
elif mnemonic == 'Resume':
strType = ''
if opType == 1:
varName = '(Next)'
elif opType != 0:
varName = ''
return varName + strType + ' '
def disasmImp(objectTable, identifiers, arg, word, mnemonic, endian, vbaVer, is64bit):
if mnemonic != 'Open':
if arg == 'imp_' and (len(objectTable) >= word + 8):
impName = getName(objectTable, identifiers, word + 6, endian, vbaVer, is64bit)
else:
impName = '{}{:04X} '.format(arg, word)
else:
accessMode = ['Read', 'Write', 'Read Write']
lockMode = ['Read Write', 'Write', 'Read']
mode = word & 0x00FF
access = (word & 0x0F00) >> 8
lock = (word & 0xF000) >> 12
impName = '(For '
if mode & 0x01:
impName += 'Input'
elif mode & 0x02:
impName += 'Output'
elif mode & 0x04:
impName += 'Random'
elif mode & 0x08:
impName += 'Append'
elif mode == 0x20:
impName += 'Binary'
if access and (access <= len(accessMode)):
impName += ' Access ' + accessMode[access - 1]
if lock:
if lock & 0x04:
impName += ' Shared'
elif lock <= len(accessMode):
impName += ' Lock ' + lockMode[lock - 1]
impName += ')'
return impName
def disasmRec(indirectTable, identifiers, dword, endian, vbaVer, is64bit):
objectName = getName(indirectTable, identifiers, dword + 2, endian, vbaVer, is64bit)
options = getWord(indirectTable, dword + 18, endian)
if (options & 1) == 0:
objectName = '(Private) ' + objectName
return objectName
def getTypeName(typeID):
dimTypes = ['', 'Null', 'Integer', 'Long', 'Single', 'Double', 'Currency', 'Date', 'String', 'Object', 'Error', 'Boolean', 'Variant', '', 'Decimal', '', '', 'Byte']
typeFlags = typeID & 0xE0
typeID &= ~0xE0
if typeID < len(dimTypes):
typeName = dimTypes[typeID]
else:
typeName = ''
if typeFlags & 0x80:
typeName += 'Ptr'
return typeName
def disasmType(indirectTable, dword):
dimTypes = ['', 'Null', 'Integer', 'Long', 'Single', 'Double', 'Currency', 'Date', 'String', 'Object', 'Error', 'Boolean', 'Variant', '', 'Decimal', '', '', 'Byte']
typeID = ord(indirectTable[dword + 6])
if typeID < len(dimTypes):
typeName = dimTypes[typeID]
else:
typeName = 'type_{:08X}'.format(dword)
return typeName
def disasmObject(indirectTable, objectTable, identifiers, offset, endian, vbaVer, is64bit):
# TODO - Dim declarations in 64-bit Office documents
if is64bit:
return ''
typeDesc = getDWord(indirectTable, offset, endian)
flags = getWord(indirectTable, typeDesc, endian)
if flags & 0x02:
typeName = disasmType(indirectTable, typeDesc)
else:
word = getWord(indirectTable, typeDesc + 2, endian)
if word == 0:
typeName = ''
else:
offs = (word >> 2) * 10
if offs + 4 > len(objectTable):
return ''
flags = getWord(objectTable, offs, endian)
hlName = getWord(objectTable, offs + 6, endian)
# TODO - The following logic is flawed and doesn't always work. Disabling it for now
#if flags & 0x02:
# theNames = []
# numNames = getWord(objectTable, hlName, endian)
# offs = hlName + 2
# for myName in range(numNames):
# theNames.append(getName(objectTable, identifiers, offs, endian, vbaVer, is64bit))
# offs += 2
# typeName = ' '.join(theNames)
#else:
# typeName = getID(hlName, identifiers, vbaVer, is64bit)
# Using the following line instead:
typeName = getID(hlName, identifiers, vbaVer, is64bit)
return typeName
def disasmVar(indirectTable, objectTable, identifiers, dword, endian, vbaVer, is64bit):
bFlag1 = ord(indirectTable[dword])
bFlag2 = ord(indirectTable[dword + 1])
hasAs = (bFlag1 & 0x20) != 0
hasNew = (bFlag2 & 0x20) != 0
varName = getName(indirectTable, identifiers, dword + 2, endian, vbaVer, is64bit)
if hasNew or hasAs:
varType = ''
if hasNew:
varType += 'New'
if hasAs:
varType += ' '
if hasAs:
if is64bit:
offs = 16
else:
offs = 12
word = getWord(indirectTable, dword + offs + 2, endian)
if word == 0xFFFF:
typeID = ord(indirectTable[dword + offs])
typeName = getTypeName(typeID)
else:
typeName = disasmObject(indirectTable, objectTable, identifiers, dword + offs, endian, vbaVer, is64bit)
if len(typeName) > 0:
varType += 'As ' + typeName
if len(varType) > 0:
varName += ' (' + varType + ')'
return varName
def disasmArg(indirectTable, identifiers, argOffset, endian, vbaVer, is64bit):
flags = getWord(indirectTable, argOffset, endian)
if is64bit:
offs = 4
else:
offs = 0
argName = getName(indirectTable, identifiers, argOffset + 2, endian, vbaVer, is64bit)
argType = getDWord(indirectTable, argOffset + offs + 12, endian)
argOpts = getWord(indirectTable, argOffset + offs + 24, endian)
if argOpts & 0x0004:
argName = 'ByVal ' + argName
if argOpts & 0x0002:
argName = 'ByRef ' + argName
if argOpts & 0x0200:
argName = 'Optional ' + argName
# TODO - ParamArray arguments aren't disassebled properly
#if (flags & 0x0040) == 0:
# argName = 'ParamArray ' + argName + '()'
if flags & 0x0020:
argName += ' As '
argTypeName = ''
if argType & 0xFFFF0000:
argTypeID = argType & 0x000000FF
argTypeName = getTypeName(argTypeID)
# TODO - Custom type arguments aren't disassembled properly
#else:
# argTypeName = getName(indirectTable, identifiers, argType + 6, endian, vbaVer, is64bit)
argName += argTypeName
return argName
def disasmFunc(indirectTable, declarationTable, identifiers, dword, opType, endian, vbaVer, is64bit):
funcDecl = '('
flags = getWord(indirectTable, dword, endian)
subName = getName(indirectTable, identifiers, dword + 2, endian, vbaVer, is64bit)
if vbaVer > 5:
offs2 = 4
else:
offs2 = 0
if is64bit:
offs2 += 16
argOffset = getDWord(indirectTable, dword + offs2 + 36, endian)
retType = getDWord(indirectTable, dword + offs2 + 40, endian)
declOffset = getWord(indirectTable, dword + offs2 + 44, endian)
cOptions = ord(indirectTable[dword + offs2 + 54])
#argCount = ord(indirectTable[dword + offs2 + 55])
newFlags = ord(indirectTable[dword + offs2 + 57])
hasDeclare = False
# TODO - 'Private' and 'Declare' for 64-bit Office
if vbaVer > 5:
if ((newFlags & 0x0002) == 0) and not is64bit:
funcDecl += 'Private '
if newFlags & 0x0004:
funcDecl += 'Friend '
else:
if (flags & 0x0008) == 0:
funcDecl += 'Private '
if opType & 0x04:
funcDecl += 'Public '
if flags & 0x0080:
funcDecl += 'Static '
if ((cOptions & 0x90) == 0) and (declOffset != 0xFFFF) and not is64bit:
hasDeclare = True
funcDecl += 'Declare '
if vbaVer > 5:
if newFlags & 0x20:
funcDecl += 'PtrSafe '
hasAs = (flags & 0x0020) != 0
if flags & 0x1000:
if opType in [2, 6]:
funcDecl += 'Function '
else:
funcDecl += 'Sub '
elif flags & 0x2000:
funcDecl += 'Property Get '
elif flags & 0x4000:
funcDecl += 'Property Let '
elif flags & 0x8000:
funcDecl += 'Property Set '
funcDecl += subName
if hasDeclare:
libName = getName(declarationTable, identifiers, declOffset + 2, endian, vbaVer, is64bit)
funcDecl += ' Lib "' + libName + '" '
argList = []
while (argOffset != 0xFFFFFFFF) and (argOffset != 0) and (argOffset + 26 < len(indirectTable)):
argName = disasmArg(indirectTable, identifiers, argOffset, endian, vbaVer, is64bit)
argList.append(argName)
argOffset = getDWord(indirectTable, argOffset + 20, endian)
funcDecl += '(' + ', '.join(argList) + ')'
if hasAs:
funcDecl += ' As '
typeName = ''
if (retType & 0xFFFF0000) == 0xFFFF0000:
typeID = retType & 0x000000FF
typeName = getTypeName(typeID)
else:
typeName = getName(indirectTable, identifiers, retType + 6, endian, vbaVer, is64bit)
funcDecl += typeName
funcDecl += ')'
return funcDecl
def disasmVarArg(moduleData, identifiers, offset, wLength, mnemonic, endian, vbaVer, is64bit):
substring = moduleData[offset:offset + wLength]
varArgName = '0x{:04X} '.format(wLength)
if mnemonic in ['LitStr', 'QuoteRem', 'Rem', 'Reparse']:
varArgName += '"' + decode(substring) + '"'
elif mnemonic in ['OnGosub', 'OnGoto']:
offset1 = offset
vars = []
for _ in itertools.repeat(None, int(wLength / 2)):
offset1, word = getVar(moduleData, offset1, endian, False)
vars.append(getID(word, identifiers, vbaVer, is64bit))
varArgName += ', '.join(v for v in vars) + ' '
else:
hexdump = ' '.join('{:02X}'.format(ord(c)) for c in substring)
varArgName += hexdump
return varArgName
def dumpLine(moduleData, lineStart, lineLength, endian, vbaVer, is64bit,
identifiers, objectTable, indirectTable, declarationTable, verbose, line, output_file=sys.stdout):
varTypesLong = ['Var', '?', 'Int', 'Lng', 'Sng', 'Dbl', 'Cur', 'Date', 'Str', 'Obj', 'Err', 'Bool', 'Var']
specials = ['False', 'True', 'Null', 'Empty']
options = ['Base 0', 'Base 1', 'Compare Text', 'Compare Binary', 'Explicit', 'Private Module']
if verbose and (lineLength > 0):
print('{:04X}: '.format(lineStart), end='', file=output_file)
print('Line #{:d}:'.format(line), file=output_file)
if lineLength <= 0:
return
if verbose:
print(hexdump(moduleData[lineStart:lineStart + lineLength]), file=output_file)
offset = lineStart
endOfLine = lineStart + lineLength
while offset < endOfLine:
offset, opcode = getVar(moduleData, offset, endian, False)
opType = (opcode & ~0x03FF) >> 10
opcode &= 0x03FF
translatedOpcode = translateOpcode(opcode, vbaVer, is64bit)
if not translatedOpcode in opcodes:
print('Unrecognized opcode 0x{:04X} at offset 0x{:08X}.'.format(opcode, offset), file=output_file)
return
instruction = opcodes[translatedOpcode]
mnemonic = instruction['mnem']
print('\t', end='', file=output_file)
if verbose:
print('{:04X} '.format(opcode), end='', file=output_file)
print('{} '.format(mnemonic), end='', file=output_file)
if mnemonic in ['Coerce', 'CoerceVar', 'DefType']:
if opType < len(varTypesLong):
print('({}) '.format(varTypesLong[opType]), end='', file=output_file)
elif opType == 17:
print('(Byte) ', end='', file=output_file)
else:
print('({:d}) '.format(opType), end='', file=output_file)
elif mnemonic in ['Dim', 'DimImplicit', 'Type']:
dimType = []
if opType & 0x04:
dimType.append('Global')
elif opType & 0x08:
dimType.append('Public')
elif opType & 0x10:
dimType.append('Private')
elif opType & 0x20:
dimType.append('Static')
if (opType & 0x01) and (mnemonic != 'Type'):
dimType.append('Const')
if len(dimType):
print('({}) '.format(' '.join(dimType)), end='', file=output_file)
elif mnemonic == 'LitVarSpecial':
print('({})'.format(specials[opType]), end='', file=output_file)
elif mnemonic in ['ArgsCall', 'ArgsMemCall', 'ArgsMemCallWith']:
if opType < 16:
print('(Call) ', end='', file=output_file)
else:
opType -= 16
elif mnemonic == 'Option':
print(' ({})'.format(options[opType]), end='', file=output_file)
elif mnemonic in ['Redim', 'RedimAs']:
if opType & 16:
print('(Preserve) ', end='', file=output_file)
for arg in instruction['args']:
if arg == 'name':
offset, word = getVar(moduleData, offset, endian, False)
theName = disasmName(word, identifiers, mnemonic, opType, vbaVer, is64bit)
print('{}'.format(theName), end='', file=output_file)
elif arg in ['0x', 'imp_']:
offset, word = getVar(moduleData, offset, endian, False)
theImp = disasmImp(objectTable, identifiers, arg, word, mnemonic, endian, vbaVer, is64bit)
print('{}'.format(theImp), end='', file=output_file)
elif arg in ['func_', 'var_', 'rec_', 'type_', 'context_']:
offset, dword = getVar(moduleData, offset, endian, True)
if (arg == 'rec_') and (len(indirectTable) >= dword + 20):
theRec = disasmRec(indirectTable, identifiers, dword, endian, vbaVer, is64bit)
print('{}'.format(theRec), end='', file=output_file)
elif (arg == 'type_') and (len(indirectTable) >= dword + 7):
theType = disasmType(indirectTable, dword)
print('(As {})'.format(theType), end='', file=output_file)
elif (arg == 'var_') and (len(indirectTable) >= dword + 16):
if opType & 0x20:
print('(WithEvents) ', end='', file=output_file)
theVar = disasmVar(indirectTable, objectTable, identifiers, dword, endian, vbaVer, is64bit)
print('{}'.format(theVar), end='', file=output_file)
if opType & 0x10:
word = getWord(moduleData, offset, endian)
offset += 2
print(' 0x{:04X}'.format(word), end='', file=output_file)
elif (arg == 'func_') and (len(indirectTable) >= dword + 61):
theFunc = disasmFunc(indirectTable, declarationTable, identifiers, dword, opType, endian, vbaVer, is64bit)
print('{}'.format(theFunc), end='', file=output_file)
else:
print('{}{:08X} '.format(arg, dword), end='', file=output_file)
if is64bit and (arg == 'context_'):
offset, dword = getVar(moduleData, offset, endian, True)
print('{:08X} '.format(dword), end='', file=output_file)
if instruction['varg']:
offset, wLength = getVar(moduleData, offset, endian, False)
theVarArg = disasmVarArg(moduleData, identifiers, offset, wLength, mnemonic, endian, vbaVer, is64bit)
print('{}'.format(theVarArg), end='', file=output_file)
offset += wLength
if wLength & 1:
offset += 1
print('', file=output_file)
def pcodeDump(moduleData, vbaProjectData, dirData, identifiers, is64bit, args, output_file = sys.stdout):
if args.verbose and not args.disasmOnly:
print(hexdump(moduleData), file=output_file)
# Determine endinanness: PC (little-endian) or Mac (big-endian)
if getWord(moduleData, 2, '<') > 0xFF:
endian = '>'
else:
endian = '<'
# TODO - Handle VBA3 modules
vbaVer = 3
try:
version = getWord(vbaProjectData, 2, endian)
if args.verbose:
print('Internal Office version: 0x{:04X}.'.format(version), file=output_file)
# Office 2010 is 0x0097; Office 2013 is 0x00A3;
# Office 2016 PC 32-bit is 0x00B2, 64-bit is 0x00D7, Mac is 0x00D9
if version >= 0x6B:
if version >= 0x97:
vbaVer = 7
else:
vbaVer = 6
if is64bit:
dwLength = getDWord(moduleData, 0x0043, endian)
declarationTable = moduleData[0x0047:0x0047 + dwLength]
dwLength = getDWord(moduleData, 0x0011, endian)
tableStart = dwLength + 12
else:
dwLength = getDWord(moduleData, 0x003F, endian)
declarationTable = moduleData[0x0043:0x0043 + dwLength]
dwLength = getDWord(moduleData, 0x0011, endian)
tableStart = dwLength + 10
dwLength = getDWord(moduleData, tableStart, endian)
tableStart += 4
indirectTable = moduleData[tableStart:tableStart + dwLength]
dwLength = getDWord(moduleData, 0x0005, endian)
dwLength2 = dwLength + 0x8A
dwLength = getDWord(moduleData, dwLength2, endian)
dwLength2 += 4
objectTable = moduleData[dwLength2:dwLength2 + dwLength]
offset = 0x0019
else:
# VBA5
vbaVer = 5
offset = 11
dwLength = getDWord(moduleData, offset, endian)
offs = offset + 4
declarationTable = moduleData[offs:offs + dwLength]
offset = skipStructure(moduleData, offset, endian, True, 1, False)
offset += 64
offset = skipStructure(moduleData, offset, endian, False, 16, False)
offset = skipStructure(moduleData, offset, endian, True, 1, False)
offset += 6
offset = skipStructure(moduleData, offset, endian, True, 1, False)
offs = offset + 8
dwLength = getDWord(moduleData, offs, endian)
tableStart = dwLength + 14
offs = dwLength + 10
dwLength = getDWord(moduleData, offs, endian)
indirectTable = moduleData[tableStart:tableStart + dwLength]
dwLength = getDWord(moduleData, offset, endian)
offs = dwLength + 0x008A
dwLength = getDWord(moduleData, offs, endian)
offs += 4
objectTable = moduleData[offs:offs + dwLength]
offset += 77
if args.verbose:
if len(declarationTable):
print('Declaration table:', file=output_file)
print(hexdump(declarationTable), file=output_file)
if len(indirectTable):
print('Indirect table:', file=output_file)
print(hexdump(indirectTable), file=output_file)
if len(objectTable):
print('Object table:', file=output_file)
print(hexdump(objectTable), file=output_file)
dwLength = getDWord(moduleData, offset, endian)
offset = dwLength + 0x003C
offset, magic = getVar(moduleData, offset, endian, False)
if magic != 0xCAFE:
return
offset += 2
offset, numLines = getVar(moduleData, offset, endian, False)
pcodeStart = offset + numLines * 12 + 10
for line in range(numLines):
offset += 4
offset, lineLength = getVar(moduleData, offset, endian, False)
offset += 2
offset, lineOffset = getVar(moduleData, offset, endian, True)
dumpLine(moduleData, pcodeStart + lineOffset, lineLength, endian, vbaVer, is64bit, identifiers,
objectTable, indirectTable, declarationTable, args.verbose, line, output_file=output_file)
except Exception as e:
print('Error: {}.'.format(e), file=sys.stderr)
return
def processProject(vbaParser, args, output_file = sys.stdout):
try:
vbaProjects = vbaParser.find_vba_projects()
if vbaProjects is None:
return
if output_file.isatty() and WIN_UNICODE_CONSOLE:
win_unicode_console.enable()
for vbaRoot, _, dirPath in vbaProjects:
print('=' * 79, file=output_file)
if not args.disasmOnly:
print('dir stream: {}'.format(dirPath), file=output_file)
dirData, codeModules, is64bit = processDir(vbaParser, dirPath, args, output_file=output_file)
vbaProjectPath = vbaRoot + 'VBA/_VBA_PROJECT'
vbaProjectData = process_VBA_PROJECT(vbaParser, vbaProjectPath, args, output_file=output_file)
identifiers = getTheIdentifiers(vbaProjectData)
if not args.disasmOnly:
print('Identifiers:', file=output_file)
print('', file=output_file)
i = 0
for identifier in identifiers:
print('{:04X}: {}'.format(i, identifier), file=output_file)
i += 1
print('', file=output_file)
print('_VBA_PROJECT parsing done.', file=output_file)
print('-' * 79, file=output_file)
print('Module streams:', file=output_file)
for module in codeModules:
modulePath = vbaRoot + 'VBA/' + module
# make sure it is unicode, because that is what vbaParser expects:
if PYTHON2:
# modulePath is UTF8 bytes (see processDir)
modulePath_unicode = modulePath.decode('utf8', errors='replace')
else:
# modulePath is already unicode
modulePath_unicode = modulePath
moduleData = vbaParser.ole_file.openstream(modulePath_unicode).read()
print ('{} - {:d} bytes'.format(modulePath, len(moduleData)), file=output_file)
pcodeDump(moduleData, vbaProjectData, dirData, identifiers, is64bit, args, output_file=output_file)
if output_file.isatty() and WIN_UNICODE_CONSOLE:
win_unicode_console.disable()
except Exception as e:
print('Error: {}.'.format(e), file=sys.stderr)
def processFile(fileName, args, output_file=sys.stdout):
# TODO - Handle VBA3 documents
print('Processing file: {}'.format(fileName), file=output_file)
vbaParser = None
try:
vbaParser = VBA_Parser(fileName)
if vbaParser.ole_file is None:
for subFile in vbaParser.ole_subfiles:
processProject(subFile, args, output_file=output_file)
else:
processProject(vbaParser, args, output_file=output_file)
except Exception as e:
print('Error: {}.'.format(e), file=sys.stderr)
if vbaParser:
vbaParser.close()
def main():
parser = argparse.ArgumentParser(description='Dumps the p-code of VBA-containing documents.')
parser.add_argument('-v', '--version', action='version',
version='%(prog)s version {}'.format(__VERSION__))
parser.add_argument('-n', '--norecurse', action='store_true',
help="Don't recurse into directories")
parser.add_argument('-d', '--disasmonly', dest='disasmOnly', action='store_true',
help='Only disassemble, no stream dumps')
parser.add_argument('-b', '--verbose', action='store_true',
help='Dump the stream contents')
parser.add_argument('-o', '--output', dest='outputfile', default=None,
help='Output file name')
parser.add_argument('fileOrDir', nargs='+', help='File or dir')
args = parser.parse_args()
errorLevel = 0
try:
output_file = sys.stdout
if args.outputfile is not None:
output_file = open(args.outputfile, 'w')
for name in args.fileOrDir:
if os.path.isdir(name):
for name, subdirList, fileList in os.walk(name):
for fname in fileList:
fullName = os.path.join(name, fname)
processFile(fullName, args, output_file=output_file)
if args.norecurse:
while len(subdirList) > 0:
del(subdirList[0])
elif os.path.isfile(name):
processFile(name, args, output_file=output_file)
else:
print('{} does not exist.'.format(name), file=sys.stderr)
except Exception as e:
print('Error: {}.'.format(e), file=sys.stderr)
errorLevel = -1
if args.outputfile is not None:
output_file.close()
sys.exit(errorLevel)
if __name__ == '__main__':
main()