Skip to content
This repository was archived by the owner on Sep 22, 2025. It is now read-only.

Commit 4e675c3

Browse files
authored
Merge pull request #8 from StratoDem/6-merge-column-collision
Add outer merge and columns sharing name in merge renamed with _x and _y
2 parents dcf4c44 + 4cd7f0d commit 4e675c3

File tree

11 files changed

+757
-69
lines changed

11 files changed

+757
-69
lines changed

dist/__tests__/core/frame.js

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -86,18 +86,50 @@ describe('frame', function () {
8686
});
8787

8888
describe('mergeDataFrame', function () {
89-
it('merges two DataFrames on a given key', function () {
90-
var vals1 = [{ x: 1, y: 2 }, { x: 2, y: 3 }, { x: 3, y: 4 }, { x: 4, y: 10 }];
91-
var df1 = new _frame2.default(vals1);
92-
var vals2 = [{ x: 2, z: 6 }, { x: 1, z: 1 }, { x: 3, z: 100 }];
93-
var df2 = new _frame2.default(vals2);
89+
describe('innerMerge', function () {
90+
it('merges two DataFrames on a given key', function () {
91+
var vals1 = [{ x: 1, y: 2 }, { x: 2, y: 3 }, { x: 3, y: 4 }, { x: 4, y: 10 }];
92+
var df1 = new _frame2.default(vals1);
93+
var vals2 = [{ x: 2, z: 6 }, { x: 1, z: 1 }, { x: 3, z: 100 }];
94+
var df2 = new _frame2.default(vals2);
95+
96+
var df3 = (0, _frame.mergeDataFrame)(df1, df2, ['x'], 'inner');
97+
expect(df3).toBeInstanceOf(_frame2.default);
98+
expect(df3.length).toEqual(3);
99+
expect(df3.x.values.toArray()).toEqual([1, 2, 3]);
100+
expect(df3.y.values.toArray()).toEqual([2, 3, 4]);
101+
expect(df3.z.values.toArray()).toEqual([1, 6, 100]);
102+
});
94103

95-
var df3 = (0, _frame.mergeDataFrame)(df1, df2, ['x']);
96-
expect(df3).toBeInstanceOf(_frame2.default);
97-
expect(df3.length).toEqual(3);
98-
expect(df3.x.values.toArray()).toEqual([1, 2, 3]);
99-
expect(df3.y.values.toArray()).toEqual([2, 3, 4]);
100-
expect(df3.z.values.toArray()).toEqual([1, 6, 100]);
104+
it('replaces a common column with _x and _y', function () {
105+
var vals1 = [{ x: 1, y: 2 }, { x: 2, y: 3 }, { x: 3, y: 4 }, { x: 4, y: 10 }];
106+
var df1 = new _frame2.default(vals1);
107+
var vals2 = [{ x: 2, y: 6 }, { x: 1, y: 1 }, { x: 3, y: 100 }];
108+
var df2 = new _frame2.default(vals2);
109+
110+
var df3 = (0, _frame.mergeDataFrame)(df1, df2, ['x'], 'inner');
111+
expect(df3).toBeInstanceOf(_frame2.default);
112+
expect(df3.length).toEqual(3);
113+
expect(df3.x.values.toArray()).toEqual([1, 2, 3]);
114+
expect(df3.y_x.values.toArray()).toEqual([2, 3, 4]);
115+
expect(df3.y_y.values.toArray()).toEqual([1, 6, 100]);
116+
});
117+
});
118+
119+
describe('outerMerge', function () {
120+
it('merges two DataFrames on a given key', function () {
121+
var vals1 = [{ x: 1, y: 2 }, { x: 2, y: 3 }, { x: 3, y: 4 }, { x: 4, y: 10 }];
122+
var df1 = new _frame2.default(vals1);
123+
var vals2 = [{ x: 2, z: 6 }, { x: 1, z: 1 }, { x: 3, z: 100 }, { x: 5, z: 200 }];
124+
var df2 = new _frame2.default(vals2);
125+
126+
var df3 = (0, _frame.mergeDataFrame)(df1, df2, ['x'], 'outer');
127+
expect(df3).toBeInstanceOf(_frame2.default);
128+
expect(df3.length).toEqual(5);
129+
expect(df3.x.values.toArray()).toEqual([1, 2, 3, 4, 5]);
130+
expect(df3.y.values.toArray()).toEqual([2, 3, 4, 10, null]);
131+
expect(df3.z.values.toArray()).toEqual([1, 6, 100, null, 200]);
132+
});
101133
});
102134
});
103135

dist/core/frame.js

Lines changed: 193 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,10 @@ exports.default = DataFrame;
226226
var innerMerge = function innerMerge(df1, df2, on) {
227227
var data = [];
228228

229-
var nonMergeCols1 = df1.columns.filter(function (k) {
230-
return on.indexOf(k) < 0;
231-
});
229+
var cols1 = (0, _utils.nonMergeColumns)(df1.columns, on);
230+
var cols2 = (0, _utils.nonMergeColumns)(df2.columns, on);
231+
232+
var intersectCols = (0, _utils.intersectingColumns)(cols1, cols2);
232233

233234
var _iteratorNormalCompletion = true;
234235
var _didIteratorError = false;
@@ -282,12 +283,21 @@ var innerMerge = function innerMerge(df1, df2, on) {
282283
if (match) {
283284
(function () {
284285
var rowData = {};
285-
nonMergeCols1.forEach(function (k) {
286+
287+
on.forEach(function (k) {
286288
rowData[k] = row1[k].iloc(0);
287289
});
288-
df2.columns.forEach(function (k) {
289-
rowData[k] = row2[k].iloc(0);
290+
291+
cols1.forEach(function (k) {
292+
var nextColName = intersectCols.length > 0 && intersectCols.indexOf(k) >= 0 ? k + '_x' : k;
293+
rowData[nextColName] = row1[k].iloc(0);
290294
});
295+
296+
cols2.forEach(function (k) {
297+
var nextColName = intersectCols.length > 0 && intersectCols.indexOf(k) >= 0 ? k + '_y' : k;
298+
rowData[nextColName] = row2[k].iloc(0);
299+
});
300+
291301
data.push(rowData);
292302
})();
293303
}
@@ -333,6 +343,181 @@ var innerMerge = function innerMerge(df1, df2, on) {
333343
return new DataFrame(data);
334344
};
335345

346+
/**
347+
* Perform an outer merge of two DataFrames
348+
*
349+
* @param {DataFrame} df1
350+
* @param {DataFrame} df2
351+
* @param {Array} on
352+
*
353+
* @returns {DataFrame}
354+
*/
355+
var outerMerge = function outerMerge(df1, df2, on) {
356+
var data = [];
357+
358+
var cols1 = (0, _utils.nonMergeColumns)(df1.columns, on);
359+
var cols2 = (0, _utils.nonMergeColumns)(df2.columns, on);
360+
361+
var intersectCols = (0, _utils.intersectingColumns)(cols1, cols2);
362+
363+
var matched1 = new Array(df1.length).fill(false);
364+
var matched2 = new Array(df2.length).fill(false);
365+
366+
var _iteratorNormalCompletion4 = true;
367+
var _didIteratorError4 = false;
368+
var _iteratorError4 = undefined;
369+
370+
try {
371+
var _loop4 = function _loop4() {
372+
var _step4$value = (0, _slicedToArray3.default)(_step4.value, 2),
373+
row1 = _step4$value[0],
374+
idx_1 = _step4$value[1];
375+
376+
var _iteratorNormalCompletion5 = true;
377+
var _didIteratorError5 = false;
378+
var _iteratorError5 = undefined;
379+
380+
try {
381+
var _loop5 = function _loop5() {
382+
var _step5$value = (0, _slicedToArray3.default)(_step5.value, 2),
383+
row2 = _step5$value[0],
384+
idx_2 = _step5$value[1];
385+
386+
var match = true;
387+
var _iteratorNormalCompletion6 = true;
388+
var _didIteratorError6 = false;
389+
var _iteratorError6 = undefined;
390+
391+
try {
392+
for (var _iterator6 = on[Symbol.iterator](), _step6; !(_iteratorNormalCompletion6 = (_step6 = _iterator6.next()).done); _iteratorNormalCompletion6 = true) {
393+
var c = _step6.value;
394+
395+
if (row1[c].iloc(0) !== row2[c].iloc(0)) {
396+
match = false;
397+
break;
398+
}
399+
}
400+
} catch (err) {
401+
_didIteratorError6 = true;
402+
_iteratorError6 = err;
403+
} finally {
404+
try {
405+
if (!_iteratorNormalCompletion6 && _iterator6.return) {
406+
_iterator6.return();
407+
}
408+
} finally {
409+
if (_didIteratorError6) {
410+
throw _iteratorError6;
411+
}
412+
}
413+
}
414+
415+
var rowData = {};
416+
417+
on.forEach(function (k) {
418+
rowData[k] = row1[k].iloc(0);
419+
});
420+
421+
cols1.forEach(function (k) {
422+
var nextColName = intersectCols.length > 0 && intersectCols.indexOf(k) >= 0 ? k + '_x' : k;
423+
rowData[nextColName] = row1[k].iloc(0);
424+
});
425+
426+
if (match) {
427+
cols2.forEach(function (k) {
428+
var nextColName = intersectCols.length > 0 && intersectCols.indexOf(k) >= 0 ? k + '_y' : k;
429+
rowData[nextColName] = row2[k].iloc(0);
430+
});
431+
data.push(rowData);
432+
matched1[idx_1] = true;
433+
matched2[idx_2] = true;
434+
}
435+
};
436+
437+
for (var _iterator5 = df2.iterrows()[Symbol.iterator](), _step5; !(_iteratorNormalCompletion5 = (_step5 = _iterator5.next()).done); _iteratorNormalCompletion5 = true) {
438+
_loop5();
439+
}
440+
} catch (err) {
441+
_didIteratorError5 = true;
442+
_iteratorError5 = err;
443+
} finally {
444+
try {
445+
if (!_iteratorNormalCompletion5 && _iterator5.return) {
446+
_iterator5.return();
447+
}
448+
} finally {
449+
if (_didIteratorError5) {
450+
throw _iteratorError5;
451+
}
452+
}
453+
}
454+
};
455+
456+
for (var _iterator4 = df1.iterrows()[Symbol.iterator](), _step4; !(_iteratorNormalCompletion4 = (_step4 = _iterator4.next()).done); _iteratorNormalCompletion4 = true) {
457+
_loop4();
458+
}
459+
} catch (err) {
460+
_didIteratorError4 = true;
461+
_iteratorError4 = err;
462+
} finally {
463+
try {
464+
if (!_iteratorNormalCompletion4 && _iterator4.return) {
465+
_iterator4.return();
466+
}
467+
} finally {
468+
if (_didIteratorError4) {
469+
throw _iteratorError4;
470+
}
471+
}
472+
}
473+
474+
matched1.forEach(function (m, idx) {
475+
if (!m) {
476+
(function () {
477+
var rowData = {};
478+
on.forEach(function (k) {
479+
rowData[k] = df1[k].iloc(idx);
480+
});
481+
482+
cols1.forEach(function (k) {
483+
var nextColName = intersectCols.length > 0 && intersectCols.indexOf(k) >= 0 ? k + '_x' : k;
484+
rowData[nextColName] = df1[k].iloc(idx);
485+
});
486+
487+
cols2.forEach(function (k) {
488+
var nextColName = intersectCols.length > 0 && intersectCols.indexOf(k) >= 0 ? k + '_y' : k;
489+
rowData[nextColName] = null;
490+
});
491+
data.push(rowData);
492+
})();
493+
}
494+
});
495+
496+
matched2.forEach(function (m, idx) {
497+
if (!m) {
498+
(function () {
499+
var rowData = {};
500+
on.forEach(function (k) {
501+
rowData[k] = df2[k].iloc(idx);
502+
});
503+
504+
cols1.forEach(function (k) {
505+
var nextColName = intersectCols.length > 0 && intersectCols.indexOf(k) >= 0 ? k + '_x' : k;
506+
rowData[nextColName] = null;
507+
});
508+
509+
cols2.forEach(function (k) {
510+
var nextColName = intersectCols.length > 0 && intersectCols.indexOf(k) >= 0 ? k + '_y' : k;
511+
rowData[nextColName] = df2[k].iloc(idx);
512+
});
513+
data.push(rowData);
514+
})();
515+
}
516+
});
517+
518+
return new DataFrame(data);
519+
};
520+
336521
/**
337522
* Perform a merge of two DataFrames
338523
*
@@ -364,6 +549,8 @@ var mergeDataFrame = exports.mergeDataFrame = function mergeDataFrame(df1, df2,
364549
switch (how) {
365550
case 'inner':
366551
return innerMerge(df1, df2, mergeOn);
552+
case 'outer':
553+
return outerMerge(df1, df2, mergeOn);
367554
default:
368555
throw new Error('MergeError: ' + how + ' not a supported merge type');
369556
}

dist/core/utils.js

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
Object.defineProperty(exports, "__esModule", {
44
value: true
55
});
6-
exports.sum = undefined;
6+
exports.intersectingColumns = exports.nonMergeColumns = exports.sum = undefined;
77

88
var _regenerator = require("babel-runtime/regenerator");
99

@@ -104,4 +104,31 @@ function enumerate(iterable) {
104104
}
105105
}
106106
}, _marked[0], this, [[4, 16, 20, 28], [21,, 23, 27]]);
107-
}
107+
}
108+
109+
// Merge utils
110+
/**
111+
* Columns in DataFrame that will not be used as merge keys
112+
*
113+
* @param {Array<string>} columns
114+
* @param {Array<string>} on
115+
* @returns {Array<string>}
116+
*/
117+
var nonMergeColumns = exports.nonMergeColumns = function nonMergeColumns(columns, on) {
118+
return columns.filter(function (k) {
119+
return on.indexOf(k) < 0;
120+
});
121+
};
122+
123+
/**
124+
* Columns appearing in both
125+
*
126+
* @param {Array<string>} cols1
127+
* @param {Array<string>} cols2
128+
* @returns {Array<string>}
129+
*/
130+
var intersectingColumns = exports.intersectingColumns = function intersectingColumns(cols1, cols2) {
131+
return cols1.filter(function (k) {
132+
return cols2.indexOf(k) >= 0;
133+
});
134+
};

src/es6/__tests__/core/frame.js

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,50 @@ describe('frame', () => {
5050
});
5151

5252
describe('mergeDataFrame', () => {
53-
it('merges two DataFrames on a given key', () => {
54-
const vals1 = [{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}, {x: 4, y: 10}];
55-
const df1 = new DataFrame(vals1);
56-
const vals2 = [{x: 2, z: 6}, {x: 1, z: 1}, {x: 3, z: 100}];
57-
const df2 = new DataFrame(vals2);
53+
describe('innerMerge', () => {
54+
it('merges two DataFrames on a given key', () => {
55+
const vals1 = [{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}, {x: 4, y: 10}];
56+
const df1 = new DataFrame(vals1);
57+
const vals2 = [{x: 2, z: 6}, {x: 1, z: 1}, {x: 3, z: 100}];
58+
const df2 = new DataFrame(vals2);
59+
60+
const df3 = mergeDataFrame(df1, df2, ['x'], 'inner');
61+
expect(df3).toBeInstanceOf(DataFrame);
62+
expect(df3.length).toEqual(3);
63+
expect(df3.x.values.toArray()).toEqual([1, 2, 3]);
64+
expect(df3.y.values.toArray()).toEqual([2, 3, 4]);
65+
expect(df3.z.values.toArray()).toEqual([1, 6, 100]);
66+
});
5867

59-
const df3 = mergeDataFrame(df1, df2, ['x']);
60-
expect(df3).toBeInstanceOf(DataFrame);
61-
expect(df3.length).toEqual(3);
62-
expect(df3.x.values.toArray()).toEqual([1, 2, 3]);
63-
expect(df3.y.values.toArray()).toEqual([2, 3, 4]);
64-
expect(df3.z.values.toArray()).toEqual([1, 6, 100]);
68+
it('replaces a common column with _x and _y', () => {
69+
const vals1 = [{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}, {x: 4, y: 10}];
70+
const df1 = new DataFrame(vals1);
71+
const vals2 = [{x: 2, y: 6}, {x: 1, y: 1}, {x: 3, y: 100}];
72+
const df2 = new DataFrame(vals2);
73+
74+
const df3 = mergeDataFrame(df1, df2, ['x'], 'inner');
75+
expect(df3).toBeInstanceOf(DataFrame);
76+
expect(df3.length).toEqual(3);
77+
expect(df3.x.values.toArray()).toEqual([1, 2, 3]);
78+
expect(df3.y_x.values.toArray()).toEqual([2, 3, 4]);
79+
expect(df3.y_y.values.toArray()).toEqual([1, 6, 100]);
80+
});
81+
});
82+
83+
describe('outerMerge', () => {
84+
it('merges two DataFrames on a given key', () => {
85+
const vals1 = [{x: 1, y: 2}, {x: 2, y: 3}, {x: 3, y: 4}, {x: 4, y: 10}];
86+
const df1 = new DataFrame(vals1);
87+
const vals2 = [{x: 2, z: 6}, {x: 1, z: 1}, {x: 3, z: 100}, {x: 5, z: 200}];
88+
const df2 = new DataFrame(vals2);
89+
90+
const df3 = mergeDataFrame(df1, df2, ['x'], 'outer');
91+
expect(df3).toBeInstanceOf(DataFrame);
92+
expect(df3.length).toEqual(5);
93+
expect(df3.x.values.toArray()).toEqual([1, 2, 3, 4, 5]);
94+
expect(df3.y.values.toArray()).toEqual([2, 3, 4, 10, null]);
95+
expect(df3.z.values.toArray()).toEqual([1, 6, 100, null, 200]);
96+
});
6597
});
6698
});
6799

0 commit comments

Comments
 (0)