Skip to content
This repository was archived by the owner on Sep 22, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Add outer merge and columns sharing name in merge renamed with _x and _y
  • Loading branch information
mjclawar committed Dec 17, 2016
commit 4cd7f0d2d7547a9e8d9c19e4982032496bf7a479
54 changes: 43 additions & 11 deletions dist/__tests__/core/frame.js
Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,50 @@ describe('frame', function () {
});

describe('mergeDataFrame', function () {
it('merges two DataFrames on a given key', function () {
var vals1 = [{ x: 1, y: 2 }, { x: 2, y: 3 }, { x: 3, y: 4 }, { x: 4, y: 10 }];
var df1 = new _frame2.default(vals1);
var vals2 = [{ x: 2, z: 6 }, { x: 1, z: 1 }, { x: 3, z: 100 }];
var df2 = new _frame2.default(vals2);
describe('innerMerge', function () {
it('merges two DataFrames on a given key', function () {
var vals1 = [{ x: 1, y: 2 }, { x: 2, y: 3 }, { x: 3, y: 4 }, { x: 4, y: 10 }];
var df1 = new _frame2.default(vals1);
var vals2 = [{ x: 2, z: 6 }, { x: 1, z: 1 }, { x: 3, z: 100 }];
var df2 = new _frame2.default(vals2);

var df3 = (0, _frame.mergeDataFrame)(df1, df2, ['x'], 'inner');
expect(df3).toBeInstanceOf(_frame2.default);
expect(df3.length).toEqual(3);
expect(df3.x.values.toArray()).toEqual([1, 2, 3]);
expect(df3.y.values.toArray()).toEqual([2, 3, 4]);
expect(df3.z.values.toArray()).toEqual([1, 6, 100]);
});

var df3 = (0, _frame.mergeDataFrame)(df1, df2, ['x']);
expect(df3).toBeInstanceOf(_frame2.default);
expect(df3.length).toEqual(3);
expect(df3.x.values.toArray()).toEqual([1, 2, 3]);
expect(df3.y.values.toArray()).toEqual([2, 3, 4]);
expect(df3.z.values.toArray()).toEqual([1, 6, 100]);
it('replaces a common column with _x and _y', function () {
var vals1 = [{ x: 1, y: 2 }, { x: 2, y: 3 }, { x: 3, y: 4 }, { x: 4, y: 10 }];
var df1 = new _frame2.default(vals1);
var vals2 = [{ x: 2, y: 6 }, { x: 1, y: 1 }, { x: 3, y: 100 }];
var df2 = new _frame2.default(vals2);

var df3 = (0, _frame.mergeDataFrame)(df1, df2, ['x'], 'inner');
expect(df3).toBeInstanceOf(_frame2.default);
expect(df3.length).toEqual(3);
expect(df3.x.values.toArray()).toEqual([1, 2, 3]);
expect(df3.y_x.values.toArray()).toEqual([2, 3, 4]);
expect(df3.y_y.values.toArray()).toEqual([1, 6, 100]);
});
});

describe('outerMerge', function () {
it('merges two DataFrames on a given key', function () {
var vals1 = [{ x: 1, y: 2 }, { x: 2, y: 3 }, { x: 3, y: 4 }, { x: 4, y: 10 }];
var df1 = new _frame2.default(vals1);
var vals2 = [{ x: 2, z: 6 }, { x: 1, z: 1 }, { x: 3, z: 100 }, { x: 5, z: 200 }];
var df2 = new _frame2.default(vals2);

var df3 = (0, _frame.mergeDataFrame)(df1, df2, ['x'], 'outer');
expect(df3).toBeInstanceOf(_frame2.default);
expect(df3.length).toEqual(5);
expect(df3.x.values.toArray()).toEqual([1, 2, 3, 4, 5]);
expect(df3.y.values.toArray()).toEqual([2, 3, 4, 10, null]);
expect(df3.z.values.toArray()).toEqual([1, 6, 100, null, 200]);
});
});
});

Expand Down
199 changes: 193 additions & 6 deletions dist/core/frame.js
Original file line number Diff line number Diff line change
Expand Up @@ -226,9 +226,10 @@ exports.default = DataFrame;
var innerMerge = function innerMerge(df1, df2, on) {
var data = [];

var nonMergeCols1 = df1.columns.filter(function (k) {
return on.indexOf(k) < 0;
});
var cols1 = (0, _utils.nonMergeColumns)(df1.columns, on);
var cols2 = (0, _utils.nonMergeColumns)(df2.columns, on);

var intersectCols = (0, _utils.intersectingColumns)(cols1, cols2);

var _iteratorNormalCompletion = true;
var _didIteratorError = false;
Expand Down Expand Up @@ -282,12 +283,21 @@ var innerMerge = function innerMerge(df1, df2, on) {
if (match) {
(function () {
var rowData = {};
nonMergeCols1.forEach(function (k) {

on.forEach(function (k) {
rowData[k] = row1[k].iloc(0);
});
df2.columns.forEach(function (k) {
rowData[k] = row2[k].iloc(0);

cols1.forEach(function (k) {
var nextColName = intersectCols.length > 0 && intersectCols.indexOf(k) >= 0 ? k + '_x' : k;
rowData[nextColName] = row1[k].iloc(0);
});

cols2.forEach(function (k) {
var nextColName = intersectCols.length > 0 && intersectCols.indexOf(k) >= 0 ? k + '_y' : k;
rowData[nextColName] = row2[k].iloc(0);
});

data.push(rowData);
})();
}
Expand Down Expand Up @@ -333,6 +343,181 @@ var innerMerge = function innerMerge(df1, df2, on) {
return new DataFrame(data);
};

/**
* Perform an outer merge of two DataFrames
*
* @param {DataFrame} df1
* @param {DataFrame} df2
* @param {Array} on
*
* @returns {DataFrame}
*/
var outerMerge = function outerMerge(df1, df2, on) {
var data = [];

var cols1 = (0, _utils.nonMergeColumns)(df1.columns, on);
var cols2 = (0, _utils.nonMergeColumns)(df2.columns, on);

var intersectCols = (0, _utils.intersectingColumns)(cols1, cols2);

var matched1 = new Array(df1.length).fill(false);
var matched2 = new Array(df2.length).fill(false);

var _iteratorNormalCompletion4 = true;
var _didIteratorError4 = false;
var _iteratorError4 = undefined;

try {
var _loop4 = function _loop4() {
var _step4$value = (0, _slicedToArray3.default)(_step4.value, 2),
row1 = _step4$value[0],
idx_1 = _step4$value[1];

var _iteratorNormalCompletion5 = true;
var _didIteratorError5 = false;
var _iteratorError5 = undefined;

try {
var _loop5 = function _loop5() {
var _step5$value = (0, _slicedToArray3.default)(_step5.value, 2),
row2 = _step5$value[0],
idx_2 = _step5$value[1];

var match = true;
var _iteratorNormalCompletion6 = true;
var _didIteratorError6 = false;
var _iteratorError6 = undefined;

try {
for (var _iterator6 = on[Symbol.iterator](), _step6; !(_iteratorNormalCompletion6 = (_step6 = _iterator6.next()).done); _iteratorNormalCompletion6 = true) {
var c = _step6.value;

if (row1[c].iloc(0) !== row2[c].iloc(0)) {
match = false;
break;
}
}
} catch (err) {
_didIteratorError6 = true;
_iteratorError6 = err;
} finally {
try {
if (!_iteratorNormalCompletion6 && _iterator6.return) {
_iterator6.return();
}
} finally {
if (_didIteratorError6) {
throw _iteratorError6;
}
}
}

var rowData = {};

on.forEach(function (k) {
rowData[k] = row1[k].iloc(0);
});

cols1.forEach(function (k) {
var nextColName = intersectCols.length > 0 && intersectCols.indexOf(k) >= 0 ? k + '_x' : k;
rowData[nextColName] = row1[k].iloc(0);
});

if (match) {
cols2.forEach(function (k) {
var nextColName = intersectCols.length > 0 && intersectCols.indexOf(k) >= 0 ? k + '_y' : k;
rowData[nextColName] = row2[k].iloc(0);
});
data.push(rowData);
matched1[idx_1] = true;
matched2[idx_2] = true;
}
};

for (var _iterator5 = df2.iterrows()[Symbol.iterator](), _step5; !(_iteratorNormalCompletion5 = (_step5 = _iterator5.next()).done); _iteratorNormalCompletion5 = true) {
_loop5();
}
} catch (err) {
_didIteratorError5 = true;
_iteratorError5 = err;
} finally {
try {
if (!_iteratorNormalCompletion5 && _iterator5.return) {
_iterator5.return();
}
} finally {
if (_didIteratorError5) {
throw _iteratorError5;
}
}
}
};

for (var _iterator4 = df1.iterrows()[Symbol.iterator](), _step4; !(_iteratorNormalCompletion4 = (_step4 = _iterator4.next()).done); _iteratorNormalCompletion4 = true) {
_loop4();
}
} catch (err) {
_didIteratorError4 = true;
_iteratorError4 = err;
} finally {
try {
if (!_iteratorNormalCompletion4 && _iterator4.return) {
_iterator4.return();
}
} finally {
if (_didIteratorError4) {
throw _iteratorError4;
}
}
}

matched1.forEach(function (m, idx) {
if (!m) {
(function () {
var rowData = {};
on.forEach(function (k) {
rowData[k] = df1[k].iloc(idx);
});

cols1.forEach(function (k) {
var nextColName = intersectCols.length > 0 && intersectCols.indexOf(k) >= 0 ? k + '_x' : k;
rowData[nextColName] = df1[k].iloc(idx);
});

cols2.forEach(function (k) {
var nextColName = intersectCols.length > 0 && intersectCols.indexOf(k) >= 0 ? k + '_y' : k;
rowData[nextColName] = null;
});
data.push(rowData);
})();
}
});

matched2.forEach(function (m, idx) {
if (!m) {
(function () {
var rowData = {};
on.forEach(function (k) {
rowData[k] = df2[k].iloc(idx);
});

cols1.forEach(function (k) {
var nextColName = intersectCols.length > 0 && intersectCols.indexOf(k) >= 0 ? k + '_x' : k;
rowData[nextColName] = null;
});

cols2.forEach(function (k) {
var nextColName = intersectCols.length > 0 && intersectCols.indexOf(k) >= 0 ? k + '_y' : k;
rowData[nextColName] = df2[k].iloc(idx);
});
data.push(rowData);
})();
}
});

return new DataFrame(data);
};

/**
* Perform a merge of two DataFrames
*
Expand Down Expand Up @@ -364,6 +549,8 @@ var mergeDataFrame = exports.mergeDataFrame = function mergeDataFrame(df1, df2,
switch (how) {
case 'inner':
return innerMerge(df1, df2, mergeOn);
case 'outer':
return outerMerge(df1, df2, mergeOn);
default:
throw new Error('MergeError: ' + how + ' not a supported merge type');
}
Expand Down
31 changes: 29 additions & 2 deletions dist/core/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.sum = undefined;
exports.intersectingColumns = exports.nonMergeColumns = exports.sum = undefined;

var _regenerator = require("babel-runtime/regenerator");

Expand Down Expand Up @@ -104,4 +104,31 @@ function enumerate(iterable) {
}
}
}, _marked[0], this, [[4, 16, 20, 28], [21,, 23, 27]]);
}
}

// Merge utils
/**
* Columns in DataFrame that will not be used as merge keys
*
* @param {Array<string>} columns
* @param {Array<string>} on
* @returns {Array<string>}
*/
var nonMergeColumns = exports.nonMergeColumns = function nonMergeColumns(columns, on) {
return columns.filter(function (k) {
return on.indexOf(k) < 0;
});
};

/**
* Columns appearing in both
*
* @param {Array<string>} cols1
* @param {Array<string>} cols2
* @returns {Array<string>}
*/
var intersectingColumns = exports.intersectingColumns = function intersectingColumns(cols1, cols2) {
return cols1.filter(function (k) {
return cols2.indexOf(k) >= 0;
});
};
54 changes: 43 additions & 11 deletions src/es6/__tests__/core/frame.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,50 @@ describe('frame', () => {
});

describe('mergeDataFrame', () => {
it('merges two DataFrames on a given key', () => {
const vals1 = [{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}, {x: 4, y: 10}];
const df1 = new DataFrame(vals1);
const vals2 = [{x: 2, z: 6}, {x: 1, z: 1}, {x: 3, z: 100}];
const df2 = new DataFrame(vals2);
describe('innerMerge', () => {
it('merges two DataFrames on a given key', () => {
const vals1 = [{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}, {x: 4, y: 10}];
const df1 = new DataFrame(vals1);
const vals2 = [{x: 2, z: 6}, {x: 1, z: 1}, {x: 3, z: 100}];
const df2 = new DataFrame(vals2);

const df3 = mergeDataFrame(df1, df2, ['x'], 'inner');
expect(df3).toBeInstanceOf(DataFrame);
expect(df3.length).toEqual(3);
expect(df3.x.values.toArray()).toEqual([1, 2, 3]);
expect(df3.y.values.toArray()).toEqual([2, 3, 4]);
expect(df3.z.values.toArray()).toEqual([1, 6, 100]);
});

const df3 = mergeDataFrame(df1, df2, ['x']);
expect(df3).toBeInstanceOf(DataFrame);
expect(df3.length).toEqual(3);
expect(df3.x.values.toArray()).toEqual([1, 2, 3]);
expect(df3.y.values.toArray()).toEqual([2, 3, 4]);
expect(df3.z.values.toArray()).toEqual([1, 6, 100]);
it('replaces a common column with _x and _y', () => {
const vals1 = [{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}, {x: 4, y: 10}];
const df1 = new DataFrame(vals1);
const vals2 = [{x: 2, y: 6}, {x: 1, y: 1}, {x: 3, y: 100}];
const df2 = new DataFrame(vals2);

const df3 = mergeDataFrame(df1, df2, ['x'], 'inner');
expect(df3).toBeInstanceOf(DataFrame);
expect(df3.length).toEqual(3);
expect(df3.x.values.toArray()).toEqual([1, 2, 3]);
expect(df3.y_x.values.toArray()).toEqual([2, 3, 4]);
expect(df3.y_y.values.toArray()).toEqual([1, 6, 100]);
});
});

describe('outerMerge', () => {
it('merges two DataFrames on a given key', () => {
const vals1 = [{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}, {x: 4, y: 10}];
const df1 = new DataFrame(vals1);
const vals2 = [{x: 2, z: 6}, {x: 1, z: 1}, {x: 3, z: 100}, {x: 5, z: 200}];
const df2 = new DataFrame(vals2);

const df3 = mergeDataFrame(df1, df2, ['x'], 'outer');
expect(df3).toBeInstanceOf(DataFrame);
expect(df3.length).toEqual(5);
expect(df3.x.values.toArray()).toEqual([1, 2, 3, 4, 5]);
expect(df3.y.values.toArray()).toEqual([2, 3, 4, 10, null]);
expect(df3.z.values.toArray()).toEqual([1, 6, 100, null, 200]);
});
});
});

Expand Down
Loading