Skip to content

Commit a2d3b59

Browse files
author
John J. Aylward
committed
Implements unicode escaping similar to JSONObject.
* Removes deprecation on XML.stringToValue(). It now provides unescaping for strings to convert XML entities back into values. * New unescape function to handle XML entities -> value conversion.
1 parent c24be0e commit a2d3b59

File tree

2 files changed

+73
-11
lines changed

2 files changed

+73
-11
lines changed

JSONML.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ private static Object parse(
175175
if (!(token instanceof String)) {
176176
throw x.syntaxError("Missing value");
177177
}
178-
newjo.accumulate(attribute, keepStrings ? token :JSONObject.stringToValue((String)token));
178+
newjo.accumulate(attribute, keepStrings ? token :XML.stringToValue((String)token));
179179
token = null;
180180
} else {
181181
newjo.accumulate(attribute, "");
@@ -226,7 +226,7 @@ private static Object parse(
226226
} else {
227227
if (ja != null) {
228228
ja.put(token instanceof String
229-
? keepStrings ? token :JSONObject.stringToValue((String)token)
229+
? keepStrings ? token :XML.stringToValue((String)token)
230230
: token);
231231
}
232232
}

XML.java

Lines changed: 71 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ of this software and associated documentation files (the "Software"), to deal
3535
*/
3636
@SuppressWarnings("boxing")
3737
public class XML {
38-
3938
/** The Character '&'. */
4039
public static final Character AMP = '&';
4140

@@ -71,6 +70,7 @@ public class XML {
7170
* &lt; <small>(less than)</small> is replaced by &amp;lt;
7271
* &gt; <small>(greater than)</small> is replaced by &amp;gt;
7372
* &quot; <small>(double quote)</small> is replaced by &amp;quot;
73+
* &apos; <small>(single quote / apostrophe)</small> is replaced by &amp;apos;
7474
* </pre>
7575
*
7676
* @param string
@@ -98,6 +98,67 @@ public static String escape(String string) {
9898
sb.append("&apos;");
9999
break;
100100
default:
101+
if (c < ' ' || (c >= '\u0080' && c < '\u00a0') || (c >= '\u2000' && c < '\u2100')) {
102+
sb.append("&#x");
103+
sb.append(Integer.toHexString(c));
104+
sb.append(";");
105+
} else {
106+
sb.append(c);
107+
}
108+
}
109+
}
110+
return sb.toString();
111+
}
112+
113+
/**
114+
* Removes XML escapes from the string.
115+
*
116+
* @param string
117+
* string to remove escapes from
118+
* @return string with converted entities
119+
*/
120+
public static String unescape(String string) {
121+
StringBuilder sb = new StringBuilder(string.length());
122+
for (int i = 0, length = string.length(); i < length; i++) {
123+
char c = string.charAt(i);
124+
if (c == AMP) {
125+
final int semic = string.indexOf(';', i);
126+
if (semic > i) {
127+
final String entity = string.substring(i + 1, semic);
128+
if (entity.charAt(0) == '#') {
129+
char cc;
130+
if (entity.charAt(1) == 'x') {
131+
// hex encoded unicode
132+
cc = (char) Integer.parseInt(entity.substring(2), 16);
133+
} else {
134+
// decimal encoded unicode
135+
cc = (char) Integer.parseInt(entity.substring(1));
136+
}
137+
sb.append(cc);
138+
} else {
139+
if ("quot".equalsIgnoreCase(entity)) {
140+
sb.append('"');
141+
} else if ("amp".equalsIgnoreCase(entity)) {
142+
sb.append(AMP);
143+
} else if ("apos".equalsIgnoreCase(entity)) {
144+
sb.append('\'');
145+
} else if ("lt".equalsIgnoreCase(entity)) {
146+
sb.append('<');
147+
} else if ("gt".equalsIgnoreCase(entity)) {
148+
sb.append('>');
149+
} else {
150+
sb.append(AMP).append(entity).append(';');
151+
}
152+
}
153+
// skip past the entity we just parsed.
154+
i += entity.length() + 1;
155+
} else {
156+
// this shouldn't happen in most cases since the parser
157+
// errors on unclosed enties.
158+
sb.append(c);
159+
}
160+
} else {
161+
// not part of an entity
101162
sb.append(c);
102163
}
103164
}
@@ -227,7 +288,6 @@ private static boolean parse(XMLTokener x, JSONObject context, String name, bool
227288
if (token == null) {
228289
token = x.nextToken();
229290
}
230-
231291
// attribute = value
232292
if (token instanceof String) {
233293
string = (String) token;
@@ -238,7 +298,7 @@ private static boolean parse(XMLTokener x, JSONObject context, String name, bool
238298
throw x.syntaxError("Missing value");
239299
}
240300
jsonobject.accumulate(string,
241-
keepStrings ? token : JSONObject.stringToValue((String) token));
301+
keepStrings ? unescape((String)token) : stringToValue((String) token));
242302
token = null;
243303
} else {
244304
jsonobject.accumulate(string, "");
@@ -270,7 +330,7 @@ private static boolean parse(XMLTokener x, JSONObject context, String name, bool
270330
string = (String) token;
271331
if (string.length() > 0) {
272332
jsonobject.accumulate("content",
273-
keepStrings ? token : JSONObject.stringToValue(string));
333+
keepStrings ? unescape(string) : stringToValue(string));
274334
}
275335

276336
} else if (token == LT) {
@@ -297,16 +357,18 @@ private static boolean parse(XMLTokener x, JSONObject context, String name, bool
297357
}
298358

299359
/**
300-
* This method has been deprecated in favor of the
301-
* {@link JSONObject.stringToValue(String)} method. Use it instead.
360+
* This method is the same as {@link JSONObject.stringToValue(String)}
361+
* except that this also tries to unescape String values.
302362
*
303-
* @deprecated Use JSONObject#stringToValue(String) instead.
304363
* @param string String to convert
305364
* @return JSON value of this string or the string
306365
*/
307-
@Deprecated
308366
public static Object stringToValue(String string) {
309-
return JSONObject.stringToValue(string);
367+
Object ret = JSONObject.stringToValue(string);
368+
if(ret instanceof String){
369+
return unescape((String)ret);
370+
}
371+
return ret;
310372
}
311373

312374
/**

0 commit comments

Comments
 (0)