-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-10342] [SPARK-10309] [SPARK-10474] [SPARK-10929] [SQL] Cooperative memory management #9241
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
d0ada7b
ee6b9a4
49b8135
7087f2f
ce24f03
8470fc9
827d4f0
a3e01d0
7bf76e5
51278f8
27ff4fc
4491013
c044afe
afc8c7c
cda4b2a
4ee1f42
e943e74
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,16 +28,28 @@ | |
| */ | ||
| public abstract class MemoryConsumer { | ||
|
|
||
| private TaskMemoryManager memoryManager; | ||
| private long pageSize; | ||
| private final TaskMemoryManager taskMemoryManager; | ||
| private final long pageSize; | ||
| private long used; | ||
|
|
||
| protected MemoryConsumer(TaskMemoryManager memoryManager, long pageSize) { | ||
| this.memoryManager = memoryManager; | ||
| protected MemoryConsumer(TaskMemoryManager taskMemoryManager, long pageSize) { | ||
| this.taskMemoryManager = taskMemoryManager; | ||
| if (pageSize == 0) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this necessary since we have another constructor? |
||
| pageSize = taskMemoryManager.pageSizeBytes(); | ||
| } | ||
| this.pageSize = pageSize; | ||
| this.used = 0; | ||
| } | ||
|
|
||
| protected MemoryConsumer(TaskMemoryManager taskMemoryManager) { | ||
| this(taskMemoryManager, taskMemoryManager.pageSizeBytes()); | ||
| } | ||
|
|
||
| protected MemoryConsumer(TaskMemoryManager memoryManager) { | ||
| this(memoryManager, memoryManager.pageSizeBytes()); | ||
| /** | ||
| * Returns the size of used memory in bytes. | ||
| */ | ||
| long getUsed() { | ||
| return used; | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -70,19 +82,21 @@ public void spill() throws IOException { | |
| * If there is not enough memory, throws OutOfMemoryError. | ||
| */ | ||
| protected void acquireMemory(long size) { | ||
| long got = memoryManager.acquireExecutionMemory(size, this); | ||
| long got = taskMemoryManager.acquireExecutionMemory(size, this); | ||
| if (got < size) { | ||
| memoryManager.releaseExecutionMemory(got, this); | ||
| memoryManager.showMemoryUsage(); | ||
| taskMemoryManager.releaseExecutionMemory(got, this); | ||
| taskMemoryManager.showMemoryUsage(); | ||
| throw new OutOfMemoryError("Could not acquire " + size + " bytes of memory, got " + got); | ||
| } | ||
| used += got; | ||
| } | ||
|
|
||
| /** | ||
| * Release `size` bytes memory. | ||
| */ | ||
| protected void releaseMemory(long size) { | ||
| memoryManager.releaseExecutionMemory(size, this); | ||
| taskMemoryManager.releaseExecutionMemory(size, this); | ||
| used -= size; | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -93,23 +107,25 @@ protected void releaseMemory(long size) { | |
| * @throws OutOfMemoryError | ||
| */ | ||
| protected MemoryBlock allocatePage(long required) { | ||
| MemoryBlock page = memoryManager.allocatePage(Math.max(pageSize, required), this); | ||
| MemoryBlock page = taskMemoryManager.allocatePage(Math.max(pageSize, required), this); | ||
| if (page == null || page.size() < required) { | ||
| long got = 0; | ||
| if (page != null) { | ||
| got = page.size(); | ||
| freePage(page); | ||
| } | ||
| memoryManager.showMemoryUsage(); | ||
| taskMemoryManager.showMemoryUsage(); | ||
| throw new OutOfMemoryError("Unable to acquire " + required + " bytes of memory, got " + got); | ||
| } | ||
| used += page.size(); | ||
| return page; | ||
| } | ||
|
|
||
| /** | ||
| * Free a memory block. | ||
| */ | ||
| protected void freePage(MemoryBlock page) { | ||
| memoryManager.freePage(page, this); | ||
| taskMemoryManager.freePage(page, this); | ||
| used -= page.size(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Again, maybe an invalid concern, but is it safe to call |
||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,14 +20,12 @@ | |
| import java.io.IOException; | ||
| import java.util.Arrays; | ||
| import java.util.BitSet; | ||
| import java.util.HashMap; | ||
| import java.util.HashSet; | ||
|
|
||
| import com.google.common.annotations.VisibleForTesting; | ||
| import org.slf4j.Logger; | ||
| import org.slf4j.LoggerFactory; | ||
|
|
||
| import org.apache.spark.SparkException; | ||
| import org.apache.spark.unsafe.Platform; | ||
| import org.apache.spark.unsafe.memory.MemoryBlock; | ||
| import org.apache.spark.util.Utils; | ||
|
|
||
|
|
@@ -109,7 +107,7 @@ public class TaskMemoryManager { | |
| /** | ||
| * The size of memory granted to each consumer. | ||
| */ | ||
| private final HashMap<MemoryConsumer, Long> consumers; | ||
| private final HashSet<MemoryConsumer> consumers; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a comment to explain that this field is guarded by synchronizing on |
||
|
|
||
| /** | ||
| * Construct a new TaskMemoryManager. | ||
|
|
@@ -118,7 +116,7 @@ public TaskMemoryManager(MemoryManager memoryManager, long taskAttemptId) { | |
| this.inHeap = memoryManager.tungstenMemoryIsAllocatedInHeap(); | ||
| this.memoryManager = memoryManager; | ||
| this.taskAttemptId = taskAttemptId; | ||
| this.consumers = new HashMap<>(); | ||
| this.consumers = new HashSet<>(); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -135,12 +133,9 @@ public long acquireExecutionMemory(long required, MemoryConsumer consumer) { | |
| // try to release memory from other consumers first, then we can reduce the frequency of | ||
| // spilling, avoid to have too many spilled files. | ||
| if (got < required) { | ||
| // consumers could be modified by spill(), so we should have a copy here. | ||
| MemoryConsumer[] cs = new MemoryConsumer[consumers.size()]; | ||
| consumers.keySet().toArray(cs); | ||
| // Call spill() on other consumers to release memory | ||
| for (MemoryConsumer c: cs) { | ||
| if (c != null && c != consumer) { | ||
| for (MemoryConsumer c: consumers) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this approach still have the same concern about concurrent modification of
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, we never remove it, and it will not add more under this lock. |
||
| if (c != null && c != consumer && c.getUsed() > 0) { | ||
| try { | ||
| long released = c.spill(required - got, consumer); | ||
| if (released > 0) { | ||
|
|
@@ -176,15 +171,7 @@ public long acquireExecutionMemory(long required, MemoryConsumer consumer) { | |
| } | ||
| } | ||
|
|
||
| // Update the accounting, even consumer is null | ||
| if (got > 0) { | ||
| long old = 0L; | ||
| if (consumers.containsKey(consumer)) { | ||
| old = consumers.get(consumer); | ||
| } | ||
| consumers.put(consumer, got + old); | ||
| } | ||
|
|
||
| consumers.add(consumer); | ||
| logger.debug("Task {} acquire {} for {}", taskAttemptId, Utils.bytesToString(got), consumer); | ||
| return got; | ||
| } | ||
|
|
@@ -194,67 +181,20 @@ public long acquireExecutionMemory(long required, MemoryConsumer consumer) { | |
| * Release N bytes of execution memory for a MemoryConsumer. | ||
| */ | ||
| public void releaseExecutionMemory(long size, MemoryConsumer consumer) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add an assert to make sure
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| assert(size >= 0); | ||
| if (size == 0) { | ||
| return; | ||
| } | ||
| synchronized (this) { | ||
| if (consumers.containsKey(consumer)) { | ||
| long old = consumers.get(consumer); | ||
| if (old > size) { | ||
| consumers.put(consumer, old - size); | ||
| } else { | ||
| if (old < size) { | ||
| String msg = "Release " + size + " bytes memory (more than acquired " + old + ") for " | ||
| + consumer; | ||
| logger.warn(msg); | ||
| if (Utils.isTesting()) { | ||
| Platform.throwException(new SparkException(msg)); | ||
| } | ||
| } | ||
| consumers.remove(consumer); | ||
| } | ||
| } else { | ||
| String msg = "Release " + size + " bytes memory for non-existent " + consumer; | ||
| logger.warn(msg); | ||
| if (Utils.isTesting()) { | ||
| Platform.throwException(new SparkException(msg)); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| logger.debug("Task {} release {} from {}", taskAttemptId, Utils.bytesToString(size), consumer); | ||
| memoryManager.releaseExecutionMemory(size, taskAttemptId); | ||
| } | ||
|
|
||
| public void transferOwnership(long size, MemoryConsumer from, MemoryConsumer to) { | ||
| assert(size >= 0); | ||
| synchronized (this) { | ||
| if (consumers.containsKey(from)) { | ||
| long old = consumers.get(from); | ||
| if (old > size) { | ||
| consumers.put(from, old - size); | ||
| } else { | ||
| consumers.remove(from); | ||
| } | ||
| if (consumers.containsKey(to)) { | ||
| old = consumers.get(to); | ||
| } else { | ||
| old = 0L; | ||
| } | ||
| consumers.put(to, old + size); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Dump the memory usage of all consumers. | ||
| */ | ||
| public void showMemoryUsage() { | ||
| logger.info("Memory used in task " + taskAttemptId); | ||
| synchronized (this) { | ||
| for (MemoryConsumer c: consumers.keySet()) { | ||
| logger.info("Acquired by " + c + ": " + Utils.bytesToString(consumers.get(c))); | ||
| for (MemoryConsumer c: consumers) { | ||
| if (c.getUsed() > 0) { | ||
| logger.info("Acquired by " + c + ": " + Utils.bytesToString(c.getUsed())); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
@@ -399,8 +339,11 @@ public long getOffsetInPage(long pagePlusOffsetAddress) { | |
| public long cleanUpAllAllocatedMemory() { | ||
| synchronized (this) { | ||
| Arrays.fill(pageTable, null); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea. |
||
| for (MemoryConsumer c: consumers.keySet()) { | ||
| logger.warn("leak " + Utils.bytesToString(consumers.get(c)) + " memory from " + c); | ||
| for (MemoryConsumer c: consumers) { | ||
| if (c != null && c.getUsed() > 0) { | ||
| // In case of failed task, it's normal to see leaked memory | ||
| logger.warn("leak " + Utils.bytesToString(c.getUsed()) + " memory from " + c); | ||
| } | ||
| } | ||
| consumers.clear(); | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -103,11 +103,11 @@ public ShuffleExternalSorter( | |
| int numPartitions, | ||
| SparkConf conf, | ||
| ShuffleWriteMetrics writeMetrics) { | ||
| super(memoryManager); | ||
| super(memoryManager, (int) Math.min(PackedRecordPointer.MAXIMUM_PAGE_SIZE_BYTES, | ||
| memoryManager.pageSizeBytes())); | ||
| this.taskMemoryManager = memoryManager; | ||
| this.blockManager = blockManager; | ||
| this.taskContext = taskContext; | ||
| this.peakMemoryUsedBytes = initialSize; | ||
| this.numPartitions = numPartitions; | ||
| // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided | ||
| this.fileBufferSizeBytes = (int) conf.getSizeAsKb("spark.shuffle.file.buffer", "32k") * 1024; | ||
|
|
@@ -116,6 +116,7 @@ public ShuffleExternalSorter( | |
| this.writeMetrics = writeMetrics; | ||
| acquireMemory(initialSize * 8L); | ||
| this.inMemSorter = new ShuffleInMemorySorter(initialSize); | ||
| this.peakMemoryUsedBytes = getMemoryUsage(); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -372,6 +373,7 @@ public void insertRecord(Object recordBase, long recordOffset, int length, int p | |
| } | ||
|
|
||
| growPointerArrayIfNecessary(); | ||
| // Need 4 bytes to store the record length. | ||
| final int required = length + 4; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd add back the now-missing comment that says "Need 4 bytes to store the record length."
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added. |
||
| acquireNewPageIfNecessary(required); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about naming this class SpillableMemoryConsumer ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it too long?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The length is about the same as TaskMemoryManager - so not too long.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm neutral on the name change. At first I thought that the name
MemoryConsumermight not make sense if it was used by places that can't spill, but I suppose that those places could just havespill()return 0. So I'm fine sticking with the current name.