Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Fix comments
  • Loading branch information
uros-db committed Jun 25, 2024
commit b56c032eb020a5b40f8488e5f3bb1b6ad8f5ae56
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ public static UTF8String toUpperCase(final UTF8String target) {
private static UTF8String toUpperCaseSlow(final UTF8String target) {
// Note: In order to achieve the desired behaviour, we use the ICU UCharacter class to
// convert the string to uppercase, which only accepts a Java strings as input.
// TODO: All UTF8String -> String conversions should use `makeValid` (SPARK-48715)
// TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
return UTF8String.fromString(UCharacter.toUpperCase(target.toString()));
}

Expand All @@ -366,7 +366,7 @@ private static UTF8String toUpperCaseSlow(final UTF8String target, final int col
// convert the string to uppercase, which only accepts a Java strings as input.
ULocale locale = CollationFactory.fetchCollation(collationId)
.collator.getLocale(ULocale.ACTUAL_LOCALE);
// TODO: All UTF8String -> String conversions should use `makeValid` (SPARK-48715)
// TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
return UTF8String.fromString(UCharacter.toUpperCase(locale, target.toString()));
}

Expand All @@ -384,7 +384,7 @@ public static UTF8String toLowerCase(final UTF8String target) {
private static UTF8String toLowerCaseSlow(final UTF8String target) {
// Note: In order to achieve the desired behaviour, we use the ICU UCharacter class to
// convert the string to lowercase, which only accepts a Java strings as input.
// TODO: All UTF8String -> String conversions should use `makeValid` (SPARK-48715)
// TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
return UTF8String.fromString(UCharacter.toLowerCase(target.toString()));
}

Expand All @@ -404,7 +404,7 @@ private static UTF8String toLowerCaseSlow(final UTF8String target, final int col
// convert the string to lowercase, which only accepts a Java strings as input.
ULocale locale = CollationFactory.fetchCollation(collationId)
.collator.getLocale(ULocale.ACTUAL_LOCALE);
// TODO: All UTF8String -> String conversions should use `makeValid` (SPARK-48715)
// TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
return UTF8String.fromString(UCharacter.toLowerCase(locale, target.toString()));
}

Expand Down Expand Up @@ -448,6 +448,7 @@ public static UTF8String lowerCaseCodePoints(final UTF8String target) {
}

private static UTF8String lowerCaseCodePointsSlow(final UTF8String target) {
// TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
String targetString = target.toString();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < targetString.length(); ++i) {
Expand All @@ -462,7 +463,7 @@ private static UTF8String lowerCaseCodePointsSlow(final UTF8String target) {
public static UTF8String toTitleCase(final UTF8String target) {
// Note: In order to achieve the desired behaviour, we use the ICU UCharacter class to
// convert the string to titlecase, which only accepts a Java strings as input.
// TODO: All UTF8String -> String conversions should use `makeValid` (SPARK-48715)
// TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
return UTF8String.fromString(UCharacter.toTitleCase(target.toString(),
BreakIterator.getWordInstance()));
}
Expand All @@ -473,6 +474,7 @@ public static UTF8String toTitleCase(final UTF8String target) {
public static UTF8String toTitleCase(final UTF8String target, final int collationId) {
ULocale locale = CollationFactory.fetchCollation(collationId)
.collator.getLocale(ULocale.ACTUAL_LOCALE);
// TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
return UTF8String.fromString(UCharacter.toTitleCase(locale, target.toString(),
BreakIterator.getWordInstance(locale)));
}
Expand All @@ -482,6 +484,7 @@ public static int findInSet(final UTF8String match, final UTF8String set, int co
return 0;
}

// TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
String setString = set.toString();
StringSearch stringSearch = CollationFactory.getStringSearch(setString, match.toString(),
collationId);
Expand Down Expand Up @@ -644,6 +647,7 @@ public static UTF8String lowercaseSubStringIndex(final UTF8String string,

public static Map<String, String> getCollationAwareDict(UTF8String string,
Map<String, String> dict, int collationId) {
// TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
String srcStr = string.toString();

Map<String, String> collationAwareDict = new HashMap<>();
Expand Down