Skip to content

Commit 77fc11d

Browse files
Mikhail Gorbovekrivokonmapr
authored andcommitted
MapR [SPARK-139] Remove "update" related APIs from connector (apache#203)
1 parent 788e4eb commit 77fc11d

19 files changed

+37
-9402
lines changed

external/maprdb/src/main/scala/com/mapr/db/spark/RDD/DocumentRDDFunctions.scala

Lines changed: 13 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,25 @@ package com.mapr.db.spark.RDD
33

44
import com.mapr.db.exceptions.TableNotFoundException
55
import com.mapr.db.spark.RDD.partitioner.MapRDBPartitioner
6-
import com.mapr.db.spark.condition.{DBQueryCondition, Predicate}
76
import com.mapr.db.spark.configuration.SerializableConfiguration
8-
import com.mapr.db.spark.dbclient.DBClient
97
import com.mapr.db.spark.utils.{LoggingTrait, MapRDBUtils}
108
import com.mapr.db.spark.writers._
119
import org.apache.hadoop.conf.Configuration
1210
import org.ojai.{Document, DocumentConstants, Value}
13-
import org.ojai.store.DocumentMutation
1411

12+
import org.apache.spark.Partitioner
1513
import org.apache.spark.broadcast.Broadcast
1614
import org.apache.spark.rdd.RDD
17-
import org.apache.spark.Partitioner
1815

1916
private[spark] class DocumentRDDFunctions extends LoggingTrait {
2017
protected def saveToMapRDBInternal[T](
21-
rdd: RDD[T],
22-
tablename: String,
23-
createTable: Boolean = false,
24-
bulkInsert: Boolean = false,
25-
function1: (Broadcast[SerializableConfiguration],
26-
Boolean) => ((Iterator[T]) => Unit)): Unit = {
18+
rdd: RDD[T],
19+
tableName: String,
20+
createTable: Boolean = false,
21+
bulkInsert: Boolean = false,
22+
function1: (Broadcast[SerializableConfiguration], Boolean) =>
23+
((Iterator[T]) => Unit)): Unit = {
24+
2725
var isNewAndBulkLoad = (false, false)
2826

2927
val partitioner: Option[Partitioner] = rdd.partitioner
@@ -40,11 +38,10 @@ private[spark] class DocumentRDDFunctions extends LoggingTrait {
4038

4139
try {
4240
isNewAndBulkLoad =
43-
MapRDBUtils.checkOrCreateTable(tablename, bulkInsert, createTable, keys)
41+
MapRDBUtils.checkOrCreateTable(tableName, bulkInsert, createTable, keys)
4442
} catch {
4543
case e: TableNotFoundException =>
46-
logError(
47-
"Table: " + tablename + " not found and createTable set to: " + createTable)
44+
logError("Table: " + tableName + " not found and createTable set to: " + createTable)
4845
throw e
4946
case any: Exception => throw any
5047
}
@@ -55,7 +52,7 @@ private[spark] class DocumentRDDFunctions extends LoggingTrait {
5552
rdd.context.broadcast(serializableConf)
5653
rdd.foreachPartition(function1(cnf, isNewAndBulkLoad._2))
5754
if (isNewAndBulkLoad._1 && isNewAndBulkLoad._2) {
58-
MapRDBUtils.setBulkLoad(tablename, false)
55+
MapRDBUtils.setBulkLoad(tableName, false)
5956
}
6057
}
6158
}
@@ -133,59 +130,6 @@ private[spark] case class OJAIDocumentRDDFunctions[T](rdd: RDD[T])(
133130
}
134131
)
135132
}
136-
137-
def updateToMapRDB(tableName: String,
138-
mutation: (T) => DocumentMutation,
139-
getID: (T) => Value): Unit = {
140-
logDebug(
141-
"updateToMapRDB in OJAIDocumentRDDFunctions is called for table: " + tableName)
142-
this.saveToMapRDBInternal(
143-
rdd,
144-
tableName,
145-
false,
146-
false,
147-
(cnf: Broadcast[SerializableConfiguration], isnewAndBulkLoad: Boolean) =>
148-
(iter: Iterator[T]) =>
149-
if (iter.nonEmpty) {
150-
val writer = TableUpdateWriter(DBClient().getTable(tableName))
151-
while (iter.hasNext) {
152-
val element = iter.next
153-
f.update(mutation(element), getID(element), writer)
154-
}
155-
writer.close()
156-
}
157-
)
158-
}
159-
160-
def updateToMapRDB(tableName: String,
161-
mutation: (T) => DocumentMutation,
162-
getID: (T) => Value,
163-
condition: Predicate): Unit = {
164-
logDebug(
165-
"updateToMapRDB in OJAIDocumentRDDFunctions is called for table: " + tableName)
166-
val queryCondition = DBQueryCondition(condition.build.build())
167-
168-
this.saveToMapRDBInternal(
169-
rdd,
170-
tableName,
171-
false,
172-
false,
173-
(cnf: Broadcast[SerializableConfiguration], isnewAndBulkLoad: Boolean) =>
174-
(iter: Iterator[T]) =>
175-
if (iter.nonEmpty) {
176-
val writer =
177-
TableCheckAndMutateWriter(DBClient().getTable(tableName))
178-
while (iter.hasNext) {
179-
val element = iter.next
180-
f.checkAndUpdate(mutation(element),
181-
queryCondition,
182-
getID(element),
183-
writer)
184-
}
185-
writer.close()
186-
}
187-
)
188-
}
189133
}
190134

191135
private[spark] case class PairedDocumentRDDFunctions[K, V](rdd: RDD[(K, V)])(
@@ -223,8 +167,8 @@ private[spark] case class PairedDocumentRDDFunctions[K, V](rdd: RDD[(K, V)])(
223167
def insertToMapRDB(tablename: String,
224168
createTable: Boolean = false,
225169
bulkInsert: Boolean = false): Unit = {
226-
logDebug(
227-
"insertToMapRDB in PairedDocumentRDDFunctions is called for table: " +
170+
171+
logDebug("insertToMapRDB in PairedDocumentRDDFunctions is called for table: " +
228172
tablename + " with bulkinsert flag set: " + bulkInsert + " and createTable:" + createTable)
229173

230174
this.saveToMapRDBInternal[(K, V)](
@@ -245,57 +189,4 @@ private[spark] case class PairedDocumentRDDFunctions[K, V](rdd: RDD[(K, V)])(
245189
}
246190
)
247191
}
248-
249-
def updateToMapRDB(tablename: String,
250-
mutation: (V) => DocumentMutation): Unit = {
251-
logDebug(
252-
"updateToMapRDB in PairedDocumentRDDFunctions is called for table: " + tablename)
253-
254-
this.saveToMapRDBInternal[(K, V)](
255-
rdd,
256-
tablename,
257-
false,
258-
false,
259-
(cnf: Broadcast[SerializableConfiguration], isnewAndBulkLoad: Boolean) =>
260-
(iter: Iterator[(K, V)]) =>
261-
if (iter.nonEmpty) {
262-
val writer = TableUpdateWriter(DBClient().getTable(tablename))
263-
while (iter.hasNext) {
264-
val element = iter.next
265-
f.update(mutation(element._2), f.getValue(element._1), writer)
266-
}
267-
writer.close()
268-
}
269-
)
270-
}
271-
272-
def updateToMapRDB(tablename: String,
273-
mutation: (V) => DocumentMutation,
274-
condition: Predicate): Unit = {
275-
logDebug(
276-
"updateToMapRDB in PairedDocumentRDDFunctions is called for table: " + tablename)
277-
278-
val queryCondition = DBQueryCondition(condition.build.build())
279-
280-
this.saveToMapRDBInternal[(K, V)](
281-
rdd,
282-
tablename,
283-
false,
284-
false,
285-
(cnf: Broadcast[SerializableConfiguration], isnewAndBulkLoad: Boolean) =>
286-
(iter: Iterator[(K, V)]) =>
287-
if (iter.nonEmpty) {
288-
val writer =
289-
TableCheckAndMutateWriter(DBClient().getTable(tablename))
290-
while (iter.hasNext) {
291-
val element = iter.next
292-
f.checkAndMutate(mutation(element._2),
293-
queryCondition,
294-
f.getValue(element._1),
295-
writer)
296-
}
297-
writer.close()
298-
}
299-
)
300-
}
301192
}

external/maprdb/src/main/scala/com/mapr/db/spark/sql/DefaultSource.scala

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,6 @@ class DefaultSource
118118
idFieldPath,
119119
createTable = true,
120120
bulkInsert = bulkMode)
121-
case "Update" =>
122-
MapRSpark.update(data,
123-
tableName,
124-
idFieldPath,
125-
createTable = false,
126-
bulkInsert = bulkMode)
127121
case _ =>
128122
throw new UnsupportedOperationException("Not supported operation")
129123
}

external/maprdb/src/main/scala/com/mapr/db/spark/sql/MapRDBDataFrameFunctions.scala

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,4 @@ private[spark] case class MapRDBDataFrameFunctions(@transient df: DataFrame)
2222
createTable: Boolean = false,
2323
bulkInsert: Boolean = false): Unit =
2424
MapRSpark.insert(df, tableName, idFieldPath, createTable, bulkInsert)
25-
26-
def updateToMapRDB(tableName: String,
27-
mutation: (Row) => DocumentMutation,
28-
getID: (Row) => org.ojai.Value): Unit =
29-
MapRSpark.update(df, tableName, mutation, getID)
30-
31-
def updateToMapRDB(tableName: String,
32-
mutation: (Row) => DocumentMutation,
33-
getID: (Row) => org.ojai.Value,
34-
condition: Predicate): Unit =
35-
MapRSpark.update(df, tableName, mutation, getID, condition)
3625
}

external/maprdb/src/main/scala/com/mapr/db/spark/utils/MapRSpark.scala

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -51,35 +51,6 @@ object MapRSpark {
5151
idFieldPath = idFieldPath)
5252
}
5353

54-
def update[D](dataset: Dataset[D],
55-
tableName: String,
56-
idFieldPath: String,
57-
createTable: Boolean,
58-
bulkInsert: Boolean): Unit = {
59-
val documentRdd = dataset.toDF.rdd.map(MapRSqlUtils.rowToDocument)
60-
documentRdd.saveToMapRDB(tableName,
61-
createTable = createTable,
62-
bulkInsert = bulkInsert,
63-
idFieldPath = idFieldPath)
64-
}
65-
66-
def update(df: DataFrame,
67-
tableName: String,
68-
mutation: (Row) => DocumentMutation,
69-
getID: (Row) => org.ojai.Value): Unit = {
70-
val documentRdd = df.rdd
71-
documentRdd.updateToMapRDB(tableName, mutation, getID)
72-
}
73-
74-
def update(df: DataFrame,
75-
tableName: String,
76-
mutation: (Row) => DocumentMutation,
77-
getID: (Row) => org.ojai.Value,
78-
condition: Predicate): Unit = {
79-
val documentRdd = df.rdd
80-
documentRdd.updateToMapRDB(tableName, mutation, getID, condition)
81-
}
82-
8354
def save(
8455
dfw: DataFrameWriter[_],
8556
tableName: String,

external/maprdb/src/main/scala/com/mapr/db/spark/writers/OJAIKeyWriterHelper.scala

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,52 +14,47 @@ private[spark] sealed trait OJAIValue[T] extends Serializable {
1414
type Self
1515
def getValue(elem: T): Document
1616
def write(doc: Document, getID: (Document) => Value, writer: Writer)
17-
def update(mutation: DocumentMutation, getID: Value, writer: TableUpdateWriter)
18-
def checkAndUpdate(mutation: DocumentMutation, queryCondition: DBQueryCondition, getID: Value, writer: TableCheckAndMutateWriter)
1917
}
2018

2119
private[spark] object OJAIValue extends BaseOJAIValue {
2220

2321
implicit def rowOJAIDocument[T]: OJAIValue[Row] = new OJAIValue[Row] {
2422
override type Self = Row
2523

26-
override def getValue(elem: Row): Document = MapRSqlUtils.rowToDocument(elem).getDoc
27-
28-
override def write(doc: Document, getID: (Document) => Value, writer: Writer) = writer.write(doc, getID(doc))
29-
30-
override def update(mutation: DocumentMutation, getID: Value, writer: TableUpdateWriter) = writer.write(mutation, getID)
31-
32-
override def checkAndUpdate(mutation: DocumentMutation, queryCondition: DBQueryCondition, getID: Value, writer: TableCheckAndMutateWriter): Unit =
33-
writer.write(mutation, queryCondition, getID)
24+
override def getValue(elem: Row): Document =
25+
MapRSqlUtils.rowToDocument(elem).getDoc
3426

27+
override def write(doc: Document,
28+
getID: (Document) => Value,
29+
writer: Writer) = writer.write(doc, getID(doc))
3530
}
3631

37-
implicit def defaultOJAIDocument[T]: OJAIValue[OJAIDocument] = new OJAIValue[OJAIDocument] {
38-
type Self = OJAIDocument
39-
override def getValue(elem: OJAIDocument): Document = elem.getDoc
40-
override def write(doc: Document, getID: (Document)=> Value, writer: Writer) = writer.write(doc, getID(doc))
41-
override def update(mutation: DocumentMutation, getID: Value, writer: TableUpdateWriter) = writer.write(mutation, getID)
42-
override def checkAndUpdate(mutation: DocumentMutation, queryCondition: DBQueryCondition, getID: Value, writer: TableCheckAndMutateWriter): Unit =
43-
writer.write(mutation, queryCondition, getID)
44-
}
32+
implicit def defaultOJAIDocument[T]: OJAIValue[OJAIDocument] =
33+
new OJAIValue[OJAIDocument] {
34+
type Self = OJAIDocument
35+
override def getValue(elem: OJAIDocument): Document = elem.getDoc
36+
override def write(doc: Document,
37+
getID: (Document) => Value,
38+
writer: Writer) = writer.write(doc, getID(doc))
39+
}
4540
}
4641

4742
private[spark] trait BaseOJAIValue {
4843
implicit def overrideDefault[T <: AnyRef]: OJAIValue[T] = new OJAIValue[T] {
4944
type Self = AnyRef
50-
override def getValue(elem: T): Document = BeanCodec.decode(DBClient().newDocumentBuilder(), elem)
51-
override def write(doc: Document, getID: (Document) => Value, writer: Writer) = writer.write(doc, getID(doc))
52-
override def update(mutation: DocumentMutation, getID: Value, writer: TableUpdateWriter) = writer.write(mutation, getID)
53-
override def checkAndUpdate(mutation: DocumentMutation, queryCondition: DBQueryCondition, getID: Value, writer: TableCheckAndMutateWriter): Unit =
54-
writer.write(mutation, queryCondition, getID)
45+
override def getValue(elem: T): Document =
46+
BeanCodec.decode(DBClient().newDocumentBuilder(), elem)
47+
override def write(doc: Document,
48+
getID: (Document) => Value,
49+
writer: Writer) = writer.write(doc, getID(doc))
5550
}
5651

5752
def overrideJavaDefault[T <: AnyRef]: OJAIValue[T] = new OJAIValue[T] {
5853
type Self = AnyRef
59-
override def getValue(elem: T): Document = org.ojai.beans.BeanCodec.decode(DBClient().newDocumentBuilder(), elem)
60-
override def write(doc: Document, getID: (Document) => Value, writer: Writer) = writer.write(doc, getID(doc))
61-
override def update(mutation: DocumentMutation, getID: Value, writer: TableUpdateWriter) = writer.write(mutation, getID)
62-
override def checkAndUpdate(mutation: DocumentMutation, queryCondition: DBQueryCondition, getID: Value, writer: TableCheckAndMutateWriter): Unit =
63-
writer.write(mutation, queryCondition, getID)
54+
override def getValue(elem: T): Document =
55+
org.ojai.beans.BeanCodec.decode(DBClient().newDocumentBuilder(), elem)
56+
override def write(doc: Document,
57+
getID: (Document) => Value,
58+
writer: Writer) = writer.write(doc, getID(doc))
6459
}
6560
}

external/maprdb/src/main/scala/com/mapr/db/spark/writers/OJAIValueWriterHelper.scala

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ private[spark] sealed trait OJAIKey[T] extends Serializable {
1212
type Self
1313
def getValue(elem: T): Self
1414
def write(doc: Document, key: Self, table: Writer)
15-
def update(mutation: DocumentMutation, key: Self, table: TableUpdateWriter)
1615
def checkAndMutate(mutation: DocumentMutation,
1716
queryCondition: DBQueryCondition,
1817
key: Self,
@@ -25,9 +24,6 @@ private[spark] object OJAIKey {
2524
override def getValue(elem: String) = elem
2625
override def write(doc: Document, key: String, table: Writer) =
2726
table.write(doc, key)
28-
override def update(mutation: DocumentMutation,
29-
key: String,
30-
table: TableUpdateWriter) = table.write(mutation, key)
3127
override def checkAndMutate(mutation: DocumentMutation,
3228
queryCondition: DBQueryCondition,
3329
key: String,
@@ -40,9 +36,7 @@ private[spark] object OJAIKey {
4036
override def getValue(elem: ByteBuffer) = elem
4137
override def write(doc: Document, key: ByteBuffer, table: Writer) =
4238
table.write(doc, key)
43-
override def update(mutation: DocumentMutation,
44-
key: ByteBuffer,
45-
table: TableUpdateWriter) = table.write(mutation, key)
39+
4640
override def checkAndMutate(mutation: DocumentMutation,
4741
queryCondition: DBQueryCondition,
4842
key: ByteBuffer,
@@ -55,9 +49,6 @@ private[spark] object OJAIKey {
5549
override def getValue(elem: DBBinaryValue) = elem.getByteBuffer()
5650
override def write(doc: Document, key: ByteBuffer, table: Writer) =
5751
table.write(doc, key)
58-
override def update(mutation: DocumentMutation,
59-
key: ByteBuffer,
60-
table: TableUpdateWriter) = table.write(mutation, key)
6152
override def checkAndMutate(mutation: DocumentMutation,
6253
queryCondition: DBQueryCondition,
6354
key: ByteBuffer,

external/maprdb/src/main/scala/com/mapr/db/spark/writers/TableWriter.scala

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -51,26 +51,6 @@ private[spark] case class TableInsertWriter(@transient table: DocumentStore)
5151
}
5252
}
5353

54-
private[spark] case class TableUpdateWriter(@transient table: DocumentStore) {
55-
56-
def write(mutation: DocumentMutation, key: ByteBuffer): Unit = {
57-
write(mutation, DBValueBuilderImpl.KeyValueBuilder.initFrom(key))
58-
}
59-
60-
def write(mutation: DocumentMutation, key: String): Unit = {
61-
write(mutation, DBValueBuilderImpl.KeyValueBuilder.initFrom(key))
62-
}
63-
64-
def write(mutation: DocumentMutation, key: org.ojai.Value): Unit = {
65-
table.update(key, mutation)
66-
}
67-
68-
def close(): Unit = {
69-
table.flush()
70-
table.close()
71-
}
72-
}
73-
7454
private[spark] case class TableCheckAndMutateWriter(
7555
@transient table: DocumentStore) {
7656

0 commit comments

Comments
 (0)