Skip to content

Commit f24fecd

Browse files
committed
Issue #18076: Introduce imoportlib.util.decode_source().
The helper function makes it easier to implement imoprtlib.abc.InspectLoader.get_source() by making that function require just the raw bytes for source code and handling all other details.
1 parent f4375ef commit f24fecd

6 files changed

Lines changed: 3611 additions & 3554 deletions

File tree

Doc/library/importlib.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,14 @@ an :term:`importer`.
916916

917917
.. versionadded:: 3.4
918918

919+
.. function:: decode_source(source_bytes)
920+
921+
Decode the given bytes representing source code and return it as a string
922+
with universal newlines (as required by
923+
:meth:`importlib.abc.InspectLoader.get_source`).
924+
925+
.. versionadded:: 3.4
926+
919927
.. function:: resolve_name(name, package)
920928

921929
Resolve a relative module name to an absolute one.

Lib/importlib/_bootstrap.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,18 @@ def _code_to_bytecode(code, mtime=0, source_size=0):
723723
return data
724724

725725

726+
def decode_source(source_bytes):
727+
"""Decode bytes representing source code and return the string.
728+
729+
Universal newline support is used in the decoding.
730+
"""
731+
import tokenize # To avoid bootstrap issues.
732+
source_bytes_readline = _io.BytesIO(source_bytes).readline
733+
encoding = tokenize.detect_encoding(source_bytes_readline)
734+
newline_decoder = _io.IncrementalNewlineDecoder(None, True)
735+
return newline_decoder.decode(source_bytes.decode(encoding[0]))
736+
737+
726738
# Loaders #####################################################################
727739

728740
class BuiltinImporter:
@@ -965,11 +977,7 @@ def get_source(self, fullname):
965977
except OSError as exc:
966978
raise ImportError("source not available through get_data()",
967979
name=fullname) from exc
968-
import tokenize
969-
readsource = _io.BytesIO(source_bytes).readline
970-
encoding = tokenize.detect_encoding(readsource)
971-
newline_decoder = _io.IncrementalNewlineDecoder(None, True)
972-
return newline_decoder.decode(source_bytes.decode(encoding[0]))
980+
return decode_source(source_bytes)
973981

974982
def source_to_code(self, data, path, *, _optimize=-1):
975983
"""Return the code object compiled from source.

Lib/importlib/util.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from ._bootstrap import MAGIC_NUMBER
44
from ._bootstrap import cache_from_source
5+
from ._bootstrap import decode_source
56
from ._bootstrap import module_to_load
67
from ._bootstrap import set_loader
78
from ._bootstrap import set_package

Lib/test/test_importlib/test_util.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,27 @@
99
import warnings
1010

1111

12+
class DecodeSourceBytesTests(unittest.TestCase):
13+
14+
source = "string ='ü'"
15+
16+
def test_ut8_default(self):
17+
source_bytes = self.source.encode('utf-8')
18+
self.assertEqual(util.decode_source(source_bytes), self.source)
19+
20+
def test_specified_encoding(self):
21+
source = '# coding=latin-1\n' + self.source
22+
source_bytes = source.encode('latin-1')
23+
assert source_bytes != source.encode('utf-8')
24+
self.assertEqual(util.decode_source(source_bytes), source)
25+
26+
def test_universal_newlines(self):
27+
source = '\r\n'.join([self.source, self.source])
28+
source_bytes = source.encode('utf-8')
29+
self.assertEqual(util.decode_source(source_bytes),
30+
'\n'.join([self.source, self.source]))
31+
32+
1233
class ModuleToLoadTests(unittest.TestCase):
1334

1435
module_name = 'ModuleManagerTest_module'

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ Core and Builtins
123123
Library
124124
-------
125125

126+
- Issue #18076: Introduce importlib.util.decode_source().
127+
126128
- importlib.abc.SourceLoader.get_source() no longer changes SyntaxError or
127129
UnicodeDecodeError into ImportError.
128130

0 commit comments

Comments
 (0)