Skip to content

Commit f9abc6d

Browse files
committed
PR feedback
1 parent 95c4581 commit f9abc6d

File tree

4 files changed

+35
-30
lines changed

4 files changed

+35
-30
lines changed

compiler/src/dotty/tools/backend/jvm/BCodeBodyBuilder.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -585,7 +585,7 @@ trait BCodeBodyBuilder(val primitives: DottyPrimitives)(using ctx: Context) exte
585585

586586
case StringTag =>
587587
assert(const.value != null, const) // TODO this invariant isn't documented in `case class Constant`
588-
if BCodeUtils.checkConstantStringLength(null, const.stringValue) then
588+
if BCodeUtils.checkConstantStringLength(const.stringValue) then
589589
mnode.visitLdcInsn(const.stringValue) // `stringValue` special-cases null, but not for a const with StringTag
590590
else
591591
report.error("String constant is too long for the JVM", pos)
@@ -604,7 +604,7 @@ trait BCodeBodyBuilder(val primitives: DottyPrimitives)(using ctx: Context) exte
604604
)
605605
else
606606
val toASM = tp.toASMType
607-
if BCodeUtils.checkConstantStringLength(null, toASM.getInternalName) then
607+
if BCodeUtils.checkConstantStringLength(toASM.getInternalName) then
608608
mnode.visitLdcInsn(toASM)
609609
else
610610
report.error("Type name is too long for the JVM", pos)

compiler/src/dotty/tools/backend/jvm/BCodeHelpers.scala

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ trait BCodeHelpers(val backendUtils: BackendUtils)(using ctx: Context) extends B
394394
* Machine Specification, §4.3.4, or `null` if `sym` doesn't need a generic signature.
395395
* @see https://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html#jvms-4.3.4
396396
*/
397-
def getGenericSignature(sym: Symbol, owner: Symbol): String = {
397+
def getGenericSignature(sym: Symbol, owner: Symbol): String | Null = {
398398
atPhase(erasurePhase) {
399399
def computeMemberTpe(): Type =
400400
if (sym.is(Method)) sym.denot.info
@@ -447,7 +447,9 @@ trait BCodeHelpers(val backendUtils: BackendUtils)(using ctx: Context) extends B
447447
val jReturnType = toTypeKind(methodInfo.resultType)
448448
val mdesc = MethodBType(paramJavaTypes, jReturnType).descriptor
449449
val mirrorMethodName = m.javaSimpleName
450-
if !BCodeUtils.checkConstantStringLength(jgensig, mirrorMethodName, mdesc) then
450+
val lengthOk = if jgensig ne null then BCodeUtils.checkConstantStringLength(jgensig)
451+
else BCodeUtils.checkConstantStringLength(mirrorMethodName, mdesc)
452+
if !lengthOk then
451453
report.error("Mirror method signature is too long for the JVM", m.srcPos)
452454
return
453455
val mirrorMethod: asm.MethodVisitor = jclass.visitMethod(
@@ -607,7 +609,7 @@ trait BCodeHelpers(val backendUtils: BackendUtils)(using ctx: Context) extends B
607609
val moduleName = internalName(moduleClass) // + "$"
608610
val mirrorName = bType.internalName
609611
val mirrorClass = new asm.tree.ClassNode
610-
if !BCodeUtils.checkConstantStringLength(null, mirrorName) then
612+
if !BCodeUtils.checkConstantStringLength(mirrorName) then
611613
report.error("Mirror class name is too long for the JVM", moduleClass.srcPos)
612614
return mirrorClass // not filled, but we cannot create it, and we just reported an error
613615
mirrorClass.visit(

compiler/src/dotty/tools/backend/jvm/BCodeSkelBuilder.scala

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,9 @@ trait BCodeSkelBuilder(using ctx: Context) extends BCodeHelpers {
339339
val flags = BCodeUtils.javaFlags(claszSymbol)
340340

341341
val thisSignature = getGenericSignature(claszSymbol, claszSymbol.owner)
342-
if !BCodeUtils.checkConstantStringLength(thisSignature, thisName) then
342+
val lengthOk = if thisSignature ne null then BCodeUtils.checkConstantStringLength(thisSignature)
343+
else BCodeUtils.checkConstantStringLength(thisName)
344+
if !lengthOk then
343345
report.error("Class name is too long for the JVM", claszSymbol.srcPos)
344346
return
345347
cnode.visit(backendUtils.classfileVersion, flags,
@@ -745,7 +747,9 @@ trait BCodeSkelBuilder(using ctx: Context) extends BCodeHelpers {
745747
else jMethodName
746748

747749
val mdesc = asmMethodType(methSymbol).descriptor
748-
if !BCodeUtils.checkConstantStringLength(jgensig, bytecodeName, mdesc) then
750+
val lengthOk = if jgensig ne null then BCodeUtils.checkConstantStringLength(jgensig)
751+
else BCodeUtils.checkConstantStringLength(bytecodeName, mdesc)
752+
if !lengthOk then
749753
report.error("Method signature is too long for the JVM", methSymbol.srcPos)
750754
return
751755
mnode = cnode.visitMethod(

compiler/src/dotty/tools/backend/jvm/BCodeUtils.scala

Lines changed: 22 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,36 +22,35 @@ import scala.tools.asm
2222
// (one can argue we should reuse one of the other existing helper objects, but that's for a later cleanup)
2323

2424
object BCodeUtils {
25-
val MAX_BYTES_PER_UTF8_CONSTANT = 65535
26-
/** Checks that the given signature if present, or the concatenation of the given name and descriptor, do not exceed the JVM's UTF-8 text size limits. */
27-
def checkConstantStringLength(sig: String | Null, name: String, desc: String = ""): Boolean = {
28-
// The JVM enforces a max length of 65535 bytes per UTF-8 constant.
29-
// In practice the set of UTF-8 that Java uses can't be more than 4 bytes per char.
30-
def count(str: String, startAt: Int): Int =
31-
var byteCount = startAt
32-
for
33-
i <- 0 until str.length
34-
do
25+
// The JVM enforces a max length of 65535 bytes per UTF-8 constant.
26+
// Java uses "Modified UTF-8", in which the null character specifically is two bytes,
27+
// and the rest of the BMP is as usual, see https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8
28+
// Outside the BMP, characters are represented as surrogate pairs, i.e., 2+2 bytes, but `charAt` sees them as separate "characters".
29+
// This means if we see a surrogate pair in a string, we should count each half as 2 bytes, since the encoded UTF-8 character will be 4 bytes.
30+
// One consequence of this is that the maximum number of UTF-8 bytes for a single Java `char` (not codepoint!) is 3.
31+
private val MAX_BYTES_PER_UTF8_CONSTANT = 65535
32+
private val MAX_BYTES_PER_CHAR = 3
33+
34+
/** Checks that the given name, or the concatenation of the given two names and descriptor if present, do not exceed the JVM's UTF-8 text size limits. */
35+
def checkConstantStringLength(name: String, other: String = ""): Boolean = {
36+
var byteCount = 0
37+
def check(str: String): Boolean =
38+
var i = 0
39+
while i < str.length do
3540
val c = str.charAt(i)
3641
byteCount += (
37-
if c <= 0x7F then 1
42+
if c == 0x00 then 2
43+
else if c <= 0x7F then 1
3844
else if c <= 0x7FF then 2
3945
else if Character.isHighSurrogate(c) || Character.isLowSurrogate(c) then 2
4046
else 3
4147
)
42-
byteCount
43-
48+
if byteCount > MAX_BYTES_PER_UTF8_CONSTANT then
49+
return false
50+
i += 1
51+
true
4452
// For performance, since we expect few large strings, check if the string is obviously fine first.
45-
val totalCount =
46-
if sig eq null then
47-
if name.length + desc.length < MAX_BYTES_PER_UTF8_CONSTANT / 4 then 0
48-
else count(desc, count(name, 0))
49-
else if sig.length >= MAX_BYTES_PER_UTF8_CONSTANT then
50-
count(sig, 0)
51-
else
52-
0
53-
54-
totalCount <= MAX_BYTES_PER_UTF8_CONSTANT
53+
name.length + other.length <= MAX_BYTES_PER_UTF8_CONSTANT / MAX_BYTES_PER_CHAR || (check(name) && check(other))
5554
}
5655

5756
/**

0 commit comments

Comments
 (0)