Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Better error handling, test coverage.
  • Loading branch information
petermaxlee committed Jul 8, 2016
commit 4908840f33e60e177b656d37c72b27b733aa9a53
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public class UDFXPathUtil {
private XPathExpression expression = null;
private String oldPath = null;

public Object eval(String xml, String path, QName qname) {
public Object eval(String xml, String path, QName qname) throws XPathExpressionException {
if (xml == null || path == null || qname == null) {
return null;
}
Expand All @@ -56,7 +56,7 @@ public Object eval(String xml, String path, QName qname) {
try {
expression = xpath.compile(path);
} catch (XPathExpressionException e) {
expression = null;
throw new RuntimeException("Invalid XPath '" + path + "'" + e.getMessage(), e);
}
oldPath = path;
}
Expand All @@ -66,31 +66,30 @@ public Object eval(String xml, String path, QName qname) {
}

reader.set(xml);

try {
return expression.evaluate(inputSource, qname);
} catch (XPathExpressionException e) {
throw new RuntimeException("Invalid expression '" + oldPath + "'", e);
throw new RuntimeException("Invalid XML document: " + e.getMessage() + "\n" + xml, e);
}
}

public Boolean evalBoolean(String xml, String path) {
public Boolean evalBoolean(String xml, String path) throws XPathExpressionException {
return (Boolean) eval(xml, path, XPathConstants.BOOLEAN);
}

public String evalString(String xml, String path) {
public String evalString(String xml, String path) throws XPathExpressionException {
return (String) eval(xml, path, XPathConstants.STRING);
}

public Double evalNumber(String xml, String path) {
public Double evalNumber(String xml, String path) throws XPathExpressionException {
return (Double) eval(xml, path, XPathConstants.NUMBER);
}

public Node evalNode(String xml, String path) {
public Node evalNode(String xml, String path) throws XPathExpressionException {
return (Node) eval(xml, path, XPathConstants.NODE);
}

public NodeList evalNodeList(String xml, String path) {
public NodeList evalNodeList(String xml, String path) throws XPathExpressionException {
return (NodeList) eval(xml, path, XPathConstants.NODESET);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.sql.catalyst.expressions.xml

import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
import org.apache.spark.sql.types._
Expand All @@ -26,57 +28,52 @@ import org.apache.spark.unsafe.types.UTF8String
* Base class for xpath_boolean, xpath_double, xpath_int, etc.
*/
abstract class XPathExtract extends BinaryExpression with ExpectsInputTypes with CodegenFallback {

@transient private[this] lazy val xpathUtil = new UDFXPathUtil

// If the path is a constant, cache the path string so that we don't need to convert path
// from UTF8String to String for every row.
@transient private[this] lazy val pathLiteral: String = path match {
case Literal(str: UTF8String, _) => str.toString
case _ => null
}

override def left: Expression = xml
override def right: Expression = path

/** XPath expressions are always nullable, e.g. if the xml string is empty. */
override def nullable: Boolean = true

override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType)

override protected def nullSafeEval(xml: Any, path: Any): Any = {
val xmlString = xml.asInstanceOf[UTF8String].toString
if (pathLiteral ne null) {
xpathEval(xpathUtil, xmlString, pathLiteral)
override def checkInputDataTypes(): TypeCheckResult = {
if (!path.foldable) {
TypeCheckFailure("path should be a string literal")
} else {
xpathEval(xpathUtil, xmlString, path.asInstanceOf[UTF8String].toString)
super.checkInputDataTypes()
}
}

@transient protected lazy val xpathUtil = new UDFXPathUtil
@transient protected lazy val pathString: String = path.eval().asInstanceOf[UTF8String].toString

/** Concrete implementations need to override the following three methods. */
def xml: Expression
def path: Expression
def xpathEval(xpathUtil: UDFXPathUtil, xml: String, path: String): Any
}

@ExpressionDescription(
usage = "_FUNC_(xml, xpath) - Evaluates a boolean xpath expression.",
extended = "> SELECT _FUNC_('<a><b>1</b></a>','a/b');\ntrue")
case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract {

override def prettyName: String = "xpath_boolean"
override def dataType: DataType = BooleanType

def xpathEval(xpathUtil: UDFXPathUtil, xml: String, path: String): Any = {
xpathUtil.evalBoolean(xml, path)
override def nullSafeEval(xml: Any, path: Any): Any = {
xpathUtil.evalBoolean(xml.asInstanceOf[UTF8String].toString, pathString)
}
}

@ExpressionDescription(
usage = "_FUNC_(xml, xpath) - Returns a short value that matches the xpath expression",
extended = "> SELECT _FUNC_('<a><b>1</b><b>2</b></a>','sum(a/b)');\n3")
case class XPathShort(xml: Expression, path: Expression) extends XPathExtract {
override def prettyName: String = "xpath_short"
override def prettyName: String = "xpath_int"
override def dataType: DataType = ShortType

def xpathEval(xpathUtil: UDFXPathUtil, xml: String, path: String): Any = {
val ret = xpathUtil.evalNumber(xml, path)
override def nullSafeEval(xml: Any, path: Any): Any = {
val ret = xpathUtil.evalNumber(xml.asInstanceOf[UTF8String].toString, pathString)
if (ret eq null) null else ret.shortValue()
}
}
Expand All @@ -88,8 +85,8 @@ case class XPathInt(xml: Expression, path: Expression) extends XPathExtract {
override def prettyName: String = "xpath_int"
override def dataType: DataType = IntegerType

def xpathEval(xpathUtil: UDFXPathUtil, xml: String, path: String): Any = {
val ret = xpathUtil.evalNumber(xml, path)
override def nullSafeEval(xml: Any, path: Any): Any = {
val ret = xpathUtil.evalNumber(xml.asInstanceOf[UTF8String].toString, pathString)
if (ret eq null) null else ret.intValue()
}
}
Expand All @@ -101,8 +98,8 @@ case class XPathLong(xml: Expression, path: Expression) extends XPathExtract {
override def prettyName: String = "xpath_long"
override def dataType: DataType = LongType

def xpathEval(xpathUtil: UDFXPathUtil, xml: String, path: String): Any = {
val ret = xpathUtil.evalNumber(xml, path)
override def nullSafeEval(xml: Any, path: Any): Any = {
val ret = xpathUtil.evalNumber(xml.asInstanceOf[UTF8String].toString, pathString)
if (ret eq null) null else ret.longValue()
}
}
Expand All @@ -114,8 +111,8 @@ case class XPathFloat(xml: Expression, path: Expression) extends XPathExtract {
override def prettyName: String = "xpath_float"
override def dataType: DataType = FloatType

def xpathEval(xpathUtil: UDFXPathUtil, xml: String, path: String): Any = {
val ret = xpathUtil.evalNumber(xml, path)
override def nullSafeEval(xml: Any, path: Any): Any = {
val ret = xpathUtil.evalNumber(xml.asInstanceOf[UTF8String].toString, pathString)
if (ret eq null) null else ret.floatValue()
}
}
Expand All @@ -127,8 +124,8 @@ case class XPathDouble(xml: Expression, path: Expression) extends XPathExtract {
override def prettyName: String = "xpath_float"
override def dataType: DataType = DoubleType

def xpathEval(xpathUtil: UDFXPathUtil, xml: String, path: String): Any = {
val ret = xpathUtil.evalNumber(xml, path)
override def nullSafeEval(xml: Any, path: Any): Any = {
val ret = xpathUtil.evalNumber(xml.asInstanceOf[UTF8String].toString, pathString)
if (ret eq null) null else ret.doubleValue()
}
}
Expand All @@ -142,7 +139,8 @@ case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
override def prettyName: String = "xpath_string"
override def dataType: DataType = StringType

def xpathEval(xpathUtil: UDFXPathUtil, xml: String, path: String): Any = {
UTF8String.fromString(xpathUtil.evalString(xml, path))
override def nullSafeEval(xml: Any, path: Any): Any = {
val ret = xpathUtil.evalString(xml.asInstanceOf[UTF8String].toString, pathString)
UTF8String.fromString(ret)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,12 @@ trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks {
}

protected def checkEvaluation(
expression: => Expression, expected: Any, inputRow: InternalRow = EmptyRow): Unit = {
originalExpr: => Expression, expected: Any, inputRow: InternalRow = EmptyRow): Unit = {
val expression = originalExpr match {
case replaceable: RuntimeReplaceable => replaceable.replaced
case _ => originalExpr
}

val catalystValue = CatalystTypeConverters.convertToCatalyst(expected)
checkEvaluationWithoutCodegen(expression, catalystValue, inputRow)
checkEvaluationWithGeneratedMutableProjection(expression, catalystValue, inputRow)
Expand Down
Loading