Skip to content
Next Next commit
Change SString::GetUTF8NoConvert to GetUTF8 that converts the SString
This enables SString to get out of the UTF16 state and helps move us away from "SString's natural encoding is UTF16"
  • Loading branch information
jkoritzinsky committed Jun 21, 2022
commit 00913ac19c46039e921552c1953537f55b48e2ad
2 changes: 1 addition & 1 deletion src/coreclr/debug/daccess/daccess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2501,7 +2501,7 @@ namespace serialization { namespace bin {
return ErrOverflow;
}

memcpy_s(dest, destSize, s.GetUTF8NoConvert(), cnt);
memcpy_s(dest, destSize, s.GetUTF8(), cnt);

return cnt;
}
Expand Down
6 changes: 4 additions & 2 deletions src/coreclr/inc/sstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -553,8 +553,9 @@ class EMPTY_BASES_DECL SString : private SBuffer
const UTF8 *GetUTF8(AbstractScratchBuffer &scratch, COUNT_T *pcbUtf8) const;
const ANSI *GetANSI(AbstractScratchBuffer &scratch) const;

// Used when the representation is known, throws if the representation doesn't match
const UTF8 *GetUTF8NoConvert() const;
// You can always get a UTF8 string. This will force a conversion
// if necessary.
const UTF8 *GetUTF8() const;

// Converts/copies into the given output string
void ConvertToUnicode(SString &dest) const;
Expand Down Expand Up @@ -779,6 +780,7 @@ class EMPTY_BASES_DECL SString : private SBuffer
void ConvertASCIIToUnicode(SString &dest) const;
void ConvertToUnicode() const;
void ConvertToUnicode(const CIterator &i) const;
void ConvertToUTF8() const;

const SString &GetCompatibleString(const SString &s, SString &scratch) const;
const SString &GetCompatibleString(const SString &s, SString &scratch, const CIterator &i) const;
Expand Down
19 changes: 19 additions & 0 deletions src/coreclr/inc/sstring.inl
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,25 @@ inline const WCHAR *SString::GetUnicode() const
SS_RETURN GetRawUnicode();
}

// Get a const pointer to the internal buffer as a UTF8 string.
inline const UTF8 *SString::GetUTF8() const
{
SS_CONTRACT(const UTF8 *)
{
GC_NOTRIGGER;
PRECONDITION(CheckPointer(this));
SS_POSTCONDITION(CheckPointer(RETVAL));
if (IsRepresentation(REPRESENTATION_UTF8)) NOTHROW; else THROWS;
GC_NOTRIGGER;
SUPPORTS_DAC;
}
SS_CONTRACT_END;

ConvertToUTF8();

SS_RETURN GetRawUTF8();
}

// Normalize the string to unicode. This will make many operations nonfailing.
inline void SString::Normalize() const
{
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/utilcode/clrconfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,10 @@ namespace
SString valueAsUTF8;
temp.ConvertToUTF8(valueAsUTF8);

CLRConfigNoCache nonCache = CLRConfigNoCache::Get(nameAsUTF8.GetUTF8NoConvert(), noPrefix);
CLRConfigNoCache nonCache = CLRConfigNoCache::Get(nameAsUTF8.GetUTF8(), noPrefix);
LPCSTR valueNoCache = nonCache.AsString();

_ASSERTE(SString::_stricmp(valueNoCache, valueAsUTF8.GetUTF8NoConvert()) == 0);
_ASSERTE(SString::_stricmp(valueNoCache, valueAsUTF8.GetUTF8()) == 0);
#endif // defined(DEBUG) && !defined(SELF_NO_HOST)
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/utilcode/debug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ bool _DbgBreakCheck(
" Image: %s\n\n",
GetCurrentProcessId(), GetCurrentProcessId(),
GetCurrentThreadId(), GetCurrentThreadId(),
szExpr, szFile, iLine, modulePath.GetUTF8NoConvert());
szExpr, szFile, iLine, modulePath.GetUTF8());

formattedMessages = TRUE;
}
Expand Down Expand Up @@ -680,11 +680,11 @@ void DECLSPEC_NORETURN __FreeBuildAssertFail(const char *szFile, int iLine, cons
" File: %s, Line: %d Image:\n%s\n",
GetCurrentProcessId(), GetCurrentProcessId(),
GetCurrentThreadId(), GetCurrentThreadId(),
szExpr, szFile, iLine, modulePath.GetUTF8NoConvert());
OutputDebugStringUtf8(buffer.GetUTF8NoConvert());
szExpr, szFile, iLine, modulePath.GetUTF8());
OutputDebugStringUtf8(buffer.GetUTF8());

// Write out the error to the console
printf(buffer.GetUTF8NoConvert());
printf(buffer.GetUTF8());

// Log to the stress log. Note that we can't include the szExpr b/c that
// may not be a string literal (particularly for formatt-able asserts).
Expand Down
53 changes: 33 additions & 20 deletions src/coreclr/utilcode/sstring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,39 @@ void SString::ConvertToUnicode(const CIterator &i) const
RETURN;
}

//-----------------------------------------------------------------------------
// Convert the internal representation for this String to Unicode.
//-----------------------------------------------------------------------------
void SString::ConvertToUTF8() const
{
CONTRACT_VOID
{
POSTCONDITION(IsRepresentation(REPRESENTATION_UTF8));
if (IsRepresentation(REPRESENTATION_UTF8)) NOTHROW; else THROWS;
GC_NOTRIGGER;
SUPPORTS_DAC_HOST_ONLY;
}
CONTRACT_END;

if (!IsRepresentation(REPRESENTATION_UTF8))
{
if (IsRepresentation(REPRESENTATION_ASCII))
{
// ASCII is a subset of UTF8, so we can just set the representation.
(const_cast<SString*>(this))->SetRepresentation(REPRESENTATION_UTF8);
}
else
{
StackSString s;
ConvertToUTF8(s);
PREFIX_ASSUME(!s.IsImmutable());
(const_cast<SString*>(this))->Set(s);
}
}

RETURN;
}

//-----------------------------------------------------------------------------
// Set s to be a copy of this string's contents, but in the unicode format.
//-----------------------------------------------------------------------------
Expand Down Expand Up @@ -1827,26 +1860,6 @@ const UTF8 *SString::GetUTF8(AbstractScratchBuffer &scratch, COUNT_T *pcbUtf8) c
RETURN ((SString&)scratch).GetRawUTF8();
}

//-----------------------------------------------------------------------------
// Get a const pointer to the internal buffer which must already be a UTF8 string.
// This avoids the need to create a scratch buffer we know will never be used.
//-----------------------------------------------------------------------------
const UTF8 *SString::GetUTF8NoConvert() const
{
CONTRACT(const UTF8 *)
{
INSTANCE_CHECK_NULL;
THROWS;
GC_NOTRIGGER;
}
CONTRACT_END;

if (IsRepresentation(REPRESENTATION_UTF8))
RETURN GetRawUTF8();

ThrowHR(E_INVALIDARG);
}

//-----------------------------------------------------------------------------
// Safe version of sprintf.
// Prints formatted ansi text w/ var args to this buffer.
Expand Down
9 changes: 4 additions & 5 deletions src/coreclr/vm/assemblynative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,7 @@ extern "C" void QCALLTYPE AssemblyNative_InternalLoad(NativeAssemblyNameParts* p
if (pAssemblyNameParts->_pName == NULL)
COMPlusThrow(kArgumentException, W("Format_StringZeroLength"));

StackSString ssName;
SString(SString::Literal, pAssemblyNameParts->_pName).ConvertToUTF8(ssName);
StackSString ssName(SString::Literal, pAssemblyNameParts->_pName);

AssemblyMetaDataInternal asmInfo;

Expand All @@ -87,11 +86,11 @@ extern "C" void QCALLTYPE AssemblyNative_InternalLoad(NativeAssemblyNameParts* p

SmallStackSString ssLocale;
if (pAssemblyNameParts->_pCultureName != NULL)
SString(SString::Literal, pAssemblyNameParts->_pCultureName).ConvertToUTF8(ssLocale);
asmInfo.szLocale = (pAssemblyNameParts->_pCultureName != NULL) ? ssLocale.GetUTF8NoConvert() : NULL;
ssLocale.SetLiteral(pAssemblyNameParts->_pCultureName);
asmInfo.szLocale = (pAssemblyNameParts->_pCultureName != NULL) ? ssLocale.GetUTF8() : NULL;

// Initialize spec
spec.Init(ssName.GetUTF8NoConvert(), &asmInfo,
spec.Init(ssName.GetUTF8(), &asmInfo,
pAssemblyNameParts->_pPublicKeyOrToken, pAssemblyNameParts->_cbPublicKeyOrToken, pAssemblyNameParts->_flags);

if (pParentAssembly != NULL)
Expand Down
12 changes: 6 additions & 6 deletions src/coreclr/vm/class.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2236,7 +2236,7 @@ void MethodTable::DebugRecursivelyDumpInstanceFields(LPCUTF8 pszClassName, BOOL
// Display them
if(debug) {
ssBuff.Printf("%s:\n", pszClassName);
OutputDebugStringUtf8(ssBuff.GetUTF8NoConvert());
OutputDebugStringUtf8(ssBuff.GetUTF8());
}
else {
LOG((LF_CLASSLOADER, LL_ALWAYS, "%s:\n", pszClassName));
Expand All @@ -2250,7 +2250,7 @@ void MethodTable::DebugRecursivelyDumpInstanceFields(LPCUTF8 pszClassName, BOOL
#endif
if(debug) {
ssBuff.Printf("offset %3d %s\n", pFD->GetOffset_NoLogging(), pFD->GetName());
OutputDebugStringUtf8(ssBuff.GetUTF8NoConvert());
OutputDebugStringUtf8(ssBuff.GetUTF8());
}
else {
LOG((LF_CLASSLOADER, LL_ALWAYS, "offset %3d %s\n", pFD->GetOffset_NoLogging(), pFD->GetName()));
Expand Down Expand Up @@ -2299,7 +2299,7 @@ void MethodTable::DebugDumpFieldLayout(LPCUTF8 pszClassName, BOOL debug)
if (debug)
{
ssBuff.Printf("Field layout for '%s':\n\n", pszClassName);
OutputDebugStringUtf8(ssBuff.GetUTF8NoConvert());
OutputDebugStringUtf8(ssBuff.GetUTF8());
}
else
{
Expand All @@ -2326,7 +2326,7 @@ void MethodTable::DebugDumpFieldLayout(LPCUTF8 pszClassName, BOOL debug)
FieldDesc *pFD = GetClass()->GetFieldDescList() + ((GetNumInstanceFields()-cParentInstanceFields) + i);
if(debug) {
ssBuff.Printf("offset %3d %s\n", pFD->GetOffset_NoLogging(), pFD->GetName());
OutputDebugStringUtf8(ssBuff.GetUTF8NoConvert());
OutputDebugStringUtf8(ssBuff.GetUTF8());
}
else
{
Expand Down Expand Up @@ -2404,7 +2404,7 @@ MethodTable::DebugDumpGCDesc(
if (fDebug)
{
ssBuff.Printf("GC description for '%s':\n\n", pszClassName);
OutputDebugStringUtf8(ssBuff.GetUTF8NoConvert());
OutputDebugStringUtf8(ssBuff.GetUTF8());
}
else
{
Expand Down Expand Up @@ -2438,7 +2438,7 @@ MethodTable::DebugDumpGCDesc(
pSeries->GetSeriesOffset() - OBJECT_SIZE,
pSeries->GetSeriesSize(),
pSeries->GetSeriesSize() + GetBaseSize() );
OutputDebugStringUtf8(ssBuff.GetUTF8NoConvert());
OutputDebugStringUtf8(ssBuff.GetUTF8());
}
else
{
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/comcallablewrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,7 @@ void SimpleComCallWrapper::LogRefCount(ComCallWrapper *pWrap, StackSString &ssMe
EX_TRY
{
ssMessage.AppendPrintf(", RefCount=%u\n", dwRefCountToLog);
OutputDebugStringUtf8(ssMessage.GetUTF8NoConvert());
OutputDebugStringUtf8(ssMessage.GetUTF8());
}
EX_CATCH
{ }
Expand Down
9 changes: 4 additions & 5 deletions src/coreclr/vm/coreassemblyspec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,7 @@ extern "C" void QCALLTYPE AssemblyName_InitializeAssemblySpec(NativeAssemblyName

BEGIN_QCALL;

StackSString ssName;
SString(SString::Literal, pAssemblyNameParts->_pName).ConvertToUTF8(ssName);
StackSString ssName(SString::Literal, pAssemblyNameParts->_pName);

AssemblyMetaDataInternal asmInfo;

Expand All @@ -197,11 +196,11 @@ extern "C" void QCALLTYPE AssemblyName_InitializeAssemblySpec(NativeAssemblyName

SmallStackSString ssLocale;
if (pAssemblyNameParts->_pCultureName != NULL)
SString(SString::Literal, pAssemblyNameParts->_pCultureName).ConvertToUTF8(ssLocale);
asmInfo.szLocale = (pAssemblyNameParts->_pCultureName != NULL) ? ssLocale.GetUTF8NoConvert() : NULL;
ssLocale.SetLiteral(pAssemblyNameParts->_pCultureName);
asmInfo.szLocale = (pAssemblyNameParts->_pCultureName != NULL) ? ssLocale.GetUTF8() : NULL;

// Initialize spec
pAssemblySpec->Init(ssName.GetUTF8NoConvert(), &asmInfo,
pAssemblySpec->Init(ssName.GetUTF8(), &asmInfo,
pAssemblyNameParts->_pPublicKeyOrToken, pAssemblyNameParts->_cbPublicKeyOrToken, pAssemblyNameParts->_flags);

// Copy and own any fields we do not own
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/dllimport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1010,7 +1010,7 @@ class ILStubState : public StubState

strILStubCode.AppendPrintf("// Code size\t%d (0x%04x)\n", cbCode, cbCode);
strILStubCode.AppendPrintf(".maxstack %d \n", maxStack);
strILStubCode.AppendPrintf(".locals %s\n", strLocalSig.GetUTF8NoConvert());
strILStubCode.AppendPrintf(".locals %s\n", strLocalSig.GetUTF8());

m_slIL.LogILStub(jitFlags, &strILStubCode);

Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/vm/gdbjit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1301,8 +1301,8 @@ void FunctionMember::DumpLinkageName(char* ptr, int& offset)
namespaceOrClassName.ConvertToUTF8(utf8namespaceOrClassName);
methodName.ConvertToUTF8(utf8methodName);

const char *nspace = utf8namespaceOrClassName.GetUTF8NoConvert();
const char *mname = utf8methodName.GetUTF8NoConvert();
const char *nspace = utf8namespaceOrClassName.GetUTF8();
const char *mname = utf8methodName.GetUTF8();

if (!nspace || !mname)
{
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/jitinterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12937,7 +12937,7 @@ PCODE UnsafeJitFunction(PrepareCodeConfig* config,
(unsigned)(methodJitTimeStop.QuadPart - methodJitTimeStart.QuadPart), //cycle count
methodInfo.ILCodeSize //il size
);
OutputDebugStringUtf8(codeBase.GetUTF8NoConvert());
OutputDebugStringUtf8(codeBase.GetUTF8());
}
#endif // PERF_TRACK_METHOD_JITTIMES

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/mlinfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -916,7 +916,7 @@ CustomMarshalerHelper *EEMarshalingData::GetCustomMarshalerHelper(Assembly *pAss

// Load the custom marshaler class.
BOOL fNameIsAsmQualified = FALSE;
hndCustomMarshalerType = TypeName::GetTypeUsingCASearchRules(strCMMarshalerTypeName.GetUTF8NoConvert(), pAssembly, &fNameIsAsmQualified);
hndCustomMarshalerType = TypeName::GetTypeUsingCASearchRules(strCMMarshalerTypeName.GetUTF8(), pAssembly, &fNameIsAsmQualified);

if (hndCustomMarshalerType.IsGenericTypeDefinition())
{
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/multicorejitplayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ void PlayerModuleInfo::Dump(const CHAR * prefix, int index)
i ++;
}

MulticoreJitTrace(("%s", ssBuff.GetUTF8NoConvert()));
MulticoreJitTrace(("%s", ssBuff.GetUTF8()));
}

#endif
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/pgo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -871,7 +871,7 @@ HRESULT PgoManager::getPgoInstrumentationResults(MethodDesc* pMD, BYTE** pAlloca
TypeHandle th = TypeName::GetTypeManaged(typeString.GetUnicode(), NULL, FALSE, FALSE, FALSE, NULL, NULL);
if (!th.IsNull())
{
MethodDesc* pMD = MemberLoader::FindMethodByName(th.GetMethodTable(), methodString.GetUTF8NoConvert());
MethodDesc* pMD = MemberLoader::FindMethodByName(th.GetMethodTable(), methodString.GetUTF8());
newPtr = (INT_PTR)pMD;
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/vm/profilinghelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ void ProfilingAPIUtility::AppendSupplementaryInformation(int iStringResource, SS

pString->AppendUTF8(" ");
pString->AppendPrintf(
supplementaryInformationUtf8.GetUTF8NoConvert(),
supplementaryInformationUtf8.GetUTF8(),
GetCurrentProcessId(),
iStringResource);
}
Expand Down Expand Up @@ -311,7 +311,7 @@ void ProfilingAPIUtility::LogProfEventVA(
messageFromResource.ConvertToUTF8(messageFromResourceUtf8);

StackSString messageToLog;
messageToLog.VPrintf(messageFromResourceUtf8.GetUTF8NoConvert(), insertionArgs);
messageToLog.VPrintf(messageFromResourceUtf8.GetUTF8(), insertionArgs);

AppendSupplementaryInformation(iStringResourceID, &messageToLog);

Expand All @@ -325,7 +325,7 @@ void ProfilingAPIUtility::LogProfEventVA(
}

// Ouput debug strings for diagnostic messages.
OutputDebugStringUtf8(messageToLog.GetUTF8NoConvert());
OutputDebugStringUtf8(messageToLog.GetUTF8());
}

// See code:ProfilingAPIUtility.LogProfEventVA for description of arguments.
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/vm/stubgen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ void ILStubLinker::DumpIL_FormatToken(mdToken token, SString &strTokenFormatting
SString typeNameUtf8;
typeName.ConvertToUTF8(typeNameUtf8);
SString strFieldName(SString::Utf8, pFD->GetName());
strTokenFormatting.Printf("%s::%s", typeNameUtf8.GetUTF8NoConvert(), strFieldName.GetUTF8NoConvert());
strTokenFormatting.Printf("%s::%s", typeNameUtf8.GetUTF8(), strFieldName.GetUTF8());
}
else if (TypeFromToken(token) == mdtModule)
{
Expand Down Expand Up @@ -550,13 +550,13 @@ ILStubLinker::LogILInstruction(
//
if (pDumpILStubCode)
{
pDumpILStubCode->AppendPrintf("%s /*(%2d)*/ %s %s %s\n", strLabel.GetUTF8NoConvert(), iCurStack, strOpcode.GetUTF8NoConvert(),
strArgument.GetUTF8NoConvert(), strTokenName.GetUTF8NoConvert());
pDumpILStubCode->AppendPrintf("%s /*(%2d)*/ %s %s %s\n", strLabel.GetUTF8(), iCurStack, strOpcode.GetUTF8(),
strArgument.GetUTF8(), strTokenName.GetUTF8());
}
else
{
LOG((LF_STUBS, LL_INFO1000, "%s (%2d) %s %s %s\n", strLabel.GetUTF8NoConvert(), iCurStack, \
strOpcode.GetUTF8NoConvert(), strArgument.GetUTF8NoConvert(), strTokenName.GetUTF8NoConvert()));
LOG((LF_STUBS, LL_INFO1000, "%s (%2d) %s %s %s\n", strLabel.GetUTF8(), iCurStack, \
strOpcode.GetUTF8(), strArgument.GetUTF8(), strTokenName.GetUTF8()));
}
} // ILStubLinker::LogILInstruction

Expand Down