Fix charset resolution in Content-Type parser

This commit is contained in:
Piotr F. Mieszkowski 2022-06-01 23:44:41 +02:00
parent 55b58d25bc
commit 46be24670c
2 changed files with 20 additions and 7 deletions

View File

@ -4,14 +4,20 @@ PGP_INLINE_BEGIN = b"-----BEGIN PGP MESSAGE-----"
PGP_INLINE_END = b"-----END PGP MESSAGE-----"
def parse_content_type(content_type):
split_at = content_type.find(';')
if split_at < 0:
parts = [p.strip() for p in content_type.split(';')]
if len(parts) == 1:
# No additional attributes provided. Use default encoding.
return (content_type, sys.getdefaultencoding())
second_part = content_type[split_at+1 : ].strip()
if second_part.startswith('charset'):
return (content_type[0 : split_at], second_part[second_part.index('=') + 1 : ].strip())
# At least one attribute provided. Find out if any of them is named
# 'charset' and if so, use it.
ctype = parts[0]
encoding = [p for p in parts[1:] if p.startswith('charset=') ]
if encoding:
eq_idx = encoding[0].index('=')
return (ctype, encoding[0][eq_idx+1:])
else:
return (content_type[0 : split_at], sys.getdefaultencoding())
return (ctype, sys.getdefaultencoding())
def is_pgp_inline(payload):
"""Finds out if the payload (bytes) contains PGP/INLINE markers."""

View File

@ -4,15 +4,22 @@ import sys
import unittest
class LacreTextTest(unittest.TestCase):
def test_parse_content_type(self):
def test_parse_content_type_without_charset(self):
(mtype, mcharset) = lacre.text.parse_content_type('text/plain')
self.assertEqual(mtype, 'text/plain')
self.assertEqual(mcharset, sys.getdefaultencoding())
def test_parse_content_type_with_charset(self):
(mtype, mcharset) = lacre.text.parse_content_type('text/plain; charset="UTF-8"')
self.assertEqual(mtype, 'text/plain')
self.assertEqual(mcharset, '"UTF-8"')
def test_parse_content_type_with_other_attributes(self):
(mtype, mcharset) = lacre.text.parse_content_type('text/plain; some-param="Some Value"')
self.assertEqual(mtype, 'text/plain')
self.assertEqual(mcharset, sys.getdefaultencoding())
def test_parse_content_type_with_several_attributes(self):
(mtype, mcharset) = lacre.text.parse_content_type('text/plain; charset="UTF-8"; some-param="Some Value"')
self.assertEqual(mtype, 'text/plain')
self.assertEqual(mcharset, '"UTF-8"')