Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Appease Ruff - Formatting
  • Loading branch information
James Macdonell committed Jul 2, 2024
commit 59573eb88f8390eac059dc37e77456d913c451da
37 changes: 21 additions & 16 deletions src/outlookmsg/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,27 @@

# COMMAND-LINE ENTRY POINT


def main():
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s:%(funcName)s:%(lineno)s - %(levelname)s - %(message)s')
# If no command-line arguments are given, convert the .msg
# file on STDIN to .eml format on STDOUT.
if len(sys.argv) <= 1:
print(load(sys.stdin), file=sys.stdout)

# Otherwise, for each file mentioned on the command-line,
# convert it and save it to a file with ".eml" appended
# to the name.
else:
for fn in sys.argv[1:]:
print(fn + "...")
msg = load(fn)
with open(fn + ".eml", "wb") as f:
f.write(msg.as_bytes())
logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s - %(name)s:%(funcName)s:%(lineno)s - %(levelname)s - %(message)s",
)
# If no command-line arguments are given, convert the .msg
# file on STDIN to .eml format on STDOUT.
if len(sys.argv) <= 1:
print(load(sys.stdin), file=sys.stdout)

# Otherwise, for each file mentioned on the command-line,
# convert it and save it to a file with ".eml" appended
# to the name.
else:
for fn in sys.argv[1:]:
print(fn + "...")
msg = load(fn)
with open(fn + ".eml", "wb") as f:
f.write(msg.as_bytes())


if __name__ == "__main__":
main()
main()
65 changes: 34 additions & 31 deletions src/outlookmsg/attachments.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,38 @@

from .properties import parse_properties


def process_attachment(msg, entry, doc):
# Load attachment stream.
props = parse_properties(entry['__properties_version1.0'], False, entry, doc)

# The attachment content...
blob = props['ATTACH_DATA_BIN']

# Get the filename and MIME type of the attachment.
filename = props.get("ATTACH_LONG_FILENAME") or props.get("ATTACH_FILENAME") or props.get("DISPLAY_NAME")
if isinstance(filename, bytes):
filename = filename.decode("utf8")

mime_type = props.get('ATTACH_MIME_TAG', 'application/octet-stream')
if isinstance(mime_type, bytes):
mime_type = mime_type.decode("utf8")

filename = os.path.basename(filename)

# Python 3.6.
if isinstance(blob, str):
msg.add_attachment(
blob,
filename=filename)
elif isinstance(blob, bytes):
msg.add_attachment(
blob,
maintype=mime_type.split("/", 1)[0], subtype=mime_type.split("/", 1)[-1],
filename=filename)
else: # a Message instance
msg.add_attachment(
blob,
filename=filename)
# Load attachment stream.
props = parse_properties(entry["__properties_version1.0"], False, entry, doc)

# The attachment content...
blob = props["ATTACH_DATA_BIN"]

# Get the filename and MIME type of the attachment.
filename = (
props.get("ATTACH_LONG_FILENAME")
or props.get("ATTACH_FILENAME")
or props.get("DISPLAY_NAME")
)
if isinstance(filename, bytes):
filename = filename.decode("utf8")

mime_type = props.get("ATTACH_MIME_TAG", "application/octet-stream")
if isinstance(mime_type, bytes):
mime_type = mime_type.decode("utf8")

filename = os.path.basename(filename)

# Python 3.6.
if isinstance(blob, str):
msg.add_attachment(blob, filename=filename)
elif isinstance(blob, bytes):
msg.add_attachment(
blob,
maintype=mime_type.split("/", 1)[0],
subtype=mime_type.split("/", 1)[-1],
filename=filename,
)
else: # a Message instance
msg.add_attachment(blob, filename=filename)
9 changes: 5 additions & 4 deletions src/outlookmsg/embedded_msg.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
class EMBEDDED_MESSAGE(object):
@staticmethod
def load(entry, doc, **kwargs):
from .stream import load_message_stream
return load_message_stream(entry, False, doc)
@staticmethod
def load(entry, doc, **kwargs):
from .stream import load_message_stream

return load_message_stream(entry, False, doc)
224 changes: 119 additions & 105 deletions src/outlookmsg/properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,109 +14,123 @@

logger = logging.getLogger(__name__)


def parse_properties(properties, is_top_level, container, doc):
# Read a properties stream and return a Python dictionary
# of the fields and values, using human-readable field names
# in the mapping at the top of this module.

# Load stream content.
with doc.open(properties) as stream:
stream = stream.read()

# Skip header.
i = (32 if is_top_level else 24)

# Read 16-byte entries.
raw_properties = { }
while i < len(stream):
# Read the entry.
property_type = stream[i+0:i+2]
property_tag = stream[i+2:i+4]
value = stream[i+8:i+16]
i += 16

# Turn the byte strings into numbers and look up the property type.
property_type = property_type[0] + (property_type[1]<<8)
property_tag = property_tag[0] + (property_tag[1]<<8)
if property_tag not in property_tags:
continue # should not happen
tag_name, _ = property_tags[property_tag]
tag_type = property_types.get(property_type)

# Fixed Length Properties.
if isinstance(tag_type, FixedLengthValueLoader):
# The value comes from the stream above.
pass

# Variable Length Properties.
elif isinstance(tag_type, VariableLengthValueLoader):
# Look up the stream in the document that holds the value.
streamname = "__substg1.0_{0:0{1}X}{2:0{3}X}".format(property_tag,4, property_type,4)
try:
with doc.open(container[streamname]) as innerstream:
value = innerstream.read()
except Exception:
# Stream isn't present!
logger.error("stream missing {}".format(streamname))
continue

elif isinstance(tag_type, EMBEDDED_MESSAGE):
# Look up the stream in the document that holds the attachment.
streamname = "__substg1.0_{0:0{1}X}{2:0{3}X}".format(property_tag,4, property_type,4)
try:
value = container[streamname]
except Exception:
# Stream isn't present!
logger.error("stream missing {}".format(streamname))
continue

else:
# unrecognized type
logger.error("unhandled property type {}".format(hex(property_type)))
continue

raw_properties[tag_name] = (tag_type, value)

# Decode all FixedLengthValueLoader properties so we have codepage
# properties.
properties = { }
for tag_name, (tag_type, value) in raw_properties.items():
if not isinstance(tag_type, FixedLengthValueLoader):
continue
try:
properties[tag_name] = tag_type.load(value)
except Exception as e:
logger.error("Error while reading stream: {}".format(str(e)))

# String8 strings use code page information stored in other
# properties, which may not be present. Find the Python
# encoding to use.

# The encoding of the "BODY" (and HTML body) properties.
body_encoding = None
if "PR_INTERNET_CPID" in properties and properties['PR_INTERNET_CPID'] in code_pages:
body_encoding = code_pages[properties['PR_INTERNET_CPID']]

# The encoding of "string properties of the message object".
properties_encoding = None
if "PR_MESSAGE_CODEPAGE" in properties and properties['PR_MESSAGE_CODEPAGE'] in code_pages:
properties_encoding = code_pages[properties['PR_MESSAGE_CODEPAGE']]

# Decode all of the remaining properties.
for tag_name, (tag_type, value) in raw_properties.items():
if isinstance(tag_type, FixedLengthValueLoader):
continue # already done, above

# The codepage properties may be wrong. Fall back to
# the other property if present.
encodings = [body_encoding, properties_encoding] if tag_name == "BODY" \
else [properties_encoding, body_encoding]

try:
properties[tag_name] = tag_type.load(value, encodings=encodings, doc=doc)
except KeyError as e:
logger.error("Error while reading stream: {} not found".format(str(e)))
except Exception as e:
logger.error("Error while reading stream: {}".format(str(e)))

return properties
# Read a properties stream and return a Python dictionary
# of the fields and values, using human-readable field names
# in the mapping at the top of this module.

# Load stream content.
with doc.open(properties) as stream:
stream = stream.read()

# Skip header.
i = 32 if is_top_level else 24

# Read 16-byte entries.
raw_properties = {}
while i < len(stream):
# Read the entry.
property_type = stream[i + 0 : i + 2]
property_tag = stream[i + 2 : i + 4]
value = stream[i + 8 : i + 16]
i += 16

# Turn the byte strings into numbers and look up the property type.
property_type = property_type[0] + (property_type[1] << 8)
property_tag = property_tag[0] + (property_tag[1] << 8)
if property_tag not in property_tags:
continue # should not happen
tag_name, _ = property_tags[property_tag]
tag_type = property_types.get(property_type)

# Fixed Length Properties.
if isinstance(tag_type, FixedLengthValueLoader):
# The value comes from the stream above.
pass

# Variable Length Properties.
elif isinstance(tag_type, VariableLengthValueLoader):
# Look up the stream in the document that holds the value.
streamname = "__substg1.0_{0:0{1}X}{2:0{3}X}".format(
property_tag, 4, property_type, 4
)
try:
with doc.open(container[streamname]) as innerstream:
value = innerstream.read()
except Exception:
# Stream isn't present!
logger.error("stream missing {}".format(streamname))
continue

elif isinstance(tag_type, EMBEDDED_MESSAGE):
# Look up the stream in the document that holds the attachment.
streamname = "__substg1.0_{0:0{1}X}{2:0{3}X}".format(
property_tag, 4, property_type, 4
)
try:
value = container[streamname]
except Exception:
# Stream isn't present!
logger.error("stream missing {}".format(streamname))
continue

else:
# unrecognized type
logger.error("unhandled property type {}".format(hex(property_type)))
continue

raw_properties[tag_name] = (tag_type, value)

# Decode all FixedLengthValueLoader properties so we have codepage
# properties.
properties = {}
for tag_name, (tag_type, value) in raw_properties.items():
if not isinstance(tag_type, FixedLengthValueLoader):
continue
try:
properties[tag_name] = tag_type.load(value)
except Exception as e:
logger.error("Error while reading stream: {}".format(str(e)))

# String8 strings use code page information stored in other
# properties, which may not be present. Find the Python
# encoding to use.

# The encoding of the "BODY" (and HTML body) properties.
body_encoding = None
if (
"PR_INTERNET_CPID" in properties
and properties["PR_INTERNET_CPID"] in code_pages
):
body_encoding = code_pages[properties["PR_INTERNET_CPID"]]

# The encoding of "string properties of the message object".
properties_encoding = None
if (
"PR_MESSAGE_CODEPAGE" in properties
and properties["PR_MESSAGE_CODEPAGE"] in code_pages
):
properties_encoding = code_pages[properties["PR_MESSAGE_CODEPAGE"]]

# Decode all of the remaining properties.
for tag_name, (tag_type, value) in raw_properties.items():
if isinstance(tag_type, FixedLengthValueLoader):
continue # already done, above

# The codepage properties may be wrong. Fall back to
# the other property if present.
encodings = (
[body_encoding, properties_encoding]
if tag_name == "BODY"
else [properties_encoding, body_encoding]
)

try:
properties[tag_name] = tag_type.load(value, encodings=encodings, doc=doc)
except KeyError as e:
logger.error("Error while reading stream: {} not found".format(str(e)))
except Exception as e:
logger.error("Error while reading stream: {}".format(str(e)))

return properties
Loading