Skip to content
This repository was archived by the owner on Sep 22, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
inefficient but functional DataFrame.pivot. Next up: pivot_table and …
…optimizations
  • Loading branch information
mjclawar committed Jan 12, 2017
commit 15b8a743c1189da37519b9e0cdab20039db284a4
31 changes: 31 additions & 0 deletions src/es6/__tests__/core/frame.js
Original file line number Diff line number Diff line change
Expand Up @@ -596,4 +596,35 @@ describe('frame', () => {
expect(df2.columns.toArray()).toEqual(['x', 'y']);
});
});

describe('pivot', () => {
it('pivots a DataFrame with unique index, column pairs', () => {
const df = new DataFrame([{x: 1, y: 2, z: 3}, {x: 2, y: 1, z: 1}]);

let dfPv = df.pivot('x', 'y', 'z');

expect(dfPv).toBeInstanceOf(DataFrame);

expect(dfPv.get(1).values.toArray()).toEqual([null, 1]);
expect(dfPv.get(2).values.toArray()).toEqual([3, null]);

dfPv = df.pivot('z', 'x', 'y');

expect(dfPv).toBeInstanceOf(DataFrame);
expect(dfPv.get(1).values.toArray()).toEqual([null, 2]);
expect(dfPv.get(2).values.toArray()).toEqual([1, null]);
});

it('throws an error if column not in df', () => {
const df = new DataFrame([{x: 1, y: 2}, {x: 2, y: 3}]);

expect(() => df.pivot('x', 'y', 'z')).toThrow();
});

it('throws an error if index or column not unique', () => {
const df = new DataFrame([{x: 1, y: 2, z: 3}, {x: 1, y: 2, z: 4}]);

expect(() => df.pivot('x', 'y', 'z')).toThrow();
});
});
});
66 changes: 59 additions & 7 deletions src/es6/core/frame.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,30 @@ import { enumerate, nonMergeColumns, intersectingColumns, parseIndex } from './u


const parseArrayToSeriesMap = (array, index) => {
const dataMap = {};
let dataMap = Immutable.Map({});

array.forEach((el) => {
if (typeof el === 'object') {
if (el instanceof Immutable.Map) {
el.keySeq().forEach(k => {
if (dataMap.has(k)) {
dataMap = dataMap.set(k, dataMap.get(k).push(el.get(k)));
} else {
dataMap = dataMap.set(k, Immutable.List.of(el.get(k)));
}
});
} else if (typeof el === 'object') {
Object.keys(el).forEach((k) => {
if (k in dataMap) {
dataMap[k] = dataMap[k].push(el[k]);
if (dataMap.has(k)) {
dataMap = dataMap.set(k, dataMap.get(k).push(el[k]));
} else {
dataMap[k] = Immutable.List.of(el[k]);
dataMap = dataMap.set(k, Immutable.List.of(el[k]));
}
});
}
});

Object.keys(dataMap).forEach((k) => {
dataMap[k] = new Series(dataMap[k], {name: k, index});
dataMap.keySeq().forEach(k => {
dataMap = dataMap.set(k, new Series(dataMap.get(k), {name: k, index}));
});

return Immutable.Map(dataMap);
Expand Down Expand Up @@ -930,6 +938,50 @@ export default class DataFrame extends NDFrame {
return [k, v.filter(iterBool)];
})));
}

/**
* Reshape data (produce a “pivot” table) based on column values. Uses unique values from
* index / columns to form axes of the resulting DataFrame.
*
* pandas equivalent: [DataFrame.pivot](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.pivot.html)
*
* @param {string|number} index
* Name of the column to use as index
* @param {string|number} columns
* Name of the column to use as column values
* @param {string|number} values
* Name of the column to use as the value
*
* @returns {DataFrame}
*/
pivot(index, columns, values) {
let uniqueVals = Immutable.Map({});
let uniqueCols = Immutable.List([]);

this.index.forEach((v, idx) => {
const idxVal = this.get(index).iloc(idx);
const colVal = this.get(columns).iloc(idx);

if (uniqueVals.hasIn([idxVal, colVal]))
throw new Error('pivot index and column must be unique');

const val = this.get(values).iloc(idx);

uniqueVals = uniqueVals.setIn([idxVal, colVal], val);
if (!uniqueCols.has(colVal))
uniqueCols = uniqueCols.push(colVal);
});
const sortedIndex = uniqueVals.keySeq().sort().toArray();
const sortedColumns = uniqueCols.sort();
return new DataFrame(sortedIndex.map(idx => {
let rowMap = Immutable.Map({});
sortedColumns.forEach(col => {
const val = uniqueVals.getIn([idx, col]);
rowMap = rowMap.set(col, typeof val === 'undefined' ? null : val);
});
return rowMap;
}), {index: sortedIndex});
}
}

const innerMerge = (df1, df2, on) => {
Expand Down
35 changes: 35 additions & 0 deletions src/js/__tests__/core/frame.js
Original file line number Diff line number Diff line change
Expand Up @@ -644,4 +644,39 @@ describe('frame', function () {
expect(df2.columns.toArray()).toEqual(['x', 'y']);
});
});

describe('pivot', function () {
it('pivots a DataFrame with unique index, column pairs', function () {
var df = new _frame2.default([{ x: 1, y: 2, z: 3 }, { x: 2, y: 1, z: 1 }]);

var dfPv = df.pivot('x', 'y', 'z');

expect(dfPv).toBeInstanceOf(_frame2.default);

expect(dfPv.get(1).values.toArray()).toEqual([null, 1]);
expect(dfPv.get(2).values.toArray()).toEqual([3, null]);

dfPv = df.pivot('z', 'x', 'y');

expect(dfPv).toBeInstanceOf(_frame2.default);
expect(dfPv.get(1).values.toArray()).toEqual([null, 2]);
expect(dfPv.get(2).values.toArray()).toEqual([1, null]);
});

it('throws an error if column not in df', function () {
var df = new _frame2.default([{ x: 1, y: 2 }, { x: 2, y: 3 }]);

expect(function () {
return df.pivot('x', 'y', 'z');
}).toThrow();
});

it('throws an error if index or column not unique', function () {
var df = new _frame2.default([{ x: 1, y: 2, z: 3 }, { x: 1, y: 2, z: 4 }]);

expect(function () {
return df.pivot('x', 'y', 'z');
}).toThrow();
});
});
});
83 changes: 69 additions & 14 deletions src/js/core/frame.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,22 +56,30 @@ var _utils = require('./utils');
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }

var parseArrayToSeriesMap = function parseArrayToSeriesMap(array, index) {
var dataMap = {};
var dataMap = _immutable2.default.Map({});

array.forEach(function (el) {
if ((typeof el === 'undefined' ? 'undefined' : (0, _typeof3.default)(el)) === 'object') {
if (el instanceof _immutable2.default.Map) {
el.keySeq().forEach(function (k) {
if (dataMap.has(k)) {
dataMap = dataMap.set(k, dataMap.get(k).push(el.get(k)));
} else {
dataMap = dataMap.set(k, _immutable2.default.List.of(el.get(k)));
}
});
} else if ((typeof el === 'undefined' ? 'undefined' : (0, _typeof3.default)(el)) === 'object') {
Object.keys(el).forEach(function (k) {
if (k in dataMap) {
dataMap[k] = dataMap[k].push(el[k]);
if (dataMap.has(k)) {
dataMap = dataMap.set(k, dataMap.get(k).push(el[k]));
} else {
dataMap[k] = _immutable2.default.List.of(el[k]);
dataMap = dataMap.set(k, _immutable2.default.List.of(el[k]));
}
});
}
});

Object.keys(dataMap).forEach(function (k) {
dataMap[k] = new _series2.default(dataMap[k], { name: k, index: index });
dataMap.keySeq().forEach(function (k) {
dataMap = dataMap.set(k, new _series2.default(dataMap.get(k), { name: k, index: index }));
});

return _immutable2.default.Map(dataMap);
Expand Down Expand Up @@ -1003,6 +1011,53 @@ var DataFrame = function (_NDFrame) {
return [k, v.filter(iterBool)];
})));
}

/**
* Reshape data (produce a “pivot” table) based on column values. Uses unique values from
* index / columns to form axes of the resulting DataFrame.
*
* pandas equivalent: [DataFrame.pivot](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.pivot.html)
*
* @param {string|number} index
* Name of the column to use as index
* @param {string|number} columns
* Name of the column to use as column values
* @param {string|number} values
* Name of the column to use as the value
*
* @returns {DataFrame}
*/

}, {
key: 'pivot',
value: function pivot(index, columns, values) {
var _this11 = this;

var uniqueVals = _immutable2.default.Map({});
var uniqueCols = _immutable2.default.List([]);

this.index.forEach(function (v, idx) {
var idxVal = _this11.get(index).iloc(idx);
var colVal = _this11.get(columns).iloc(idx);

if (uniqueVals.hasIn([idxVal, colVal])) throw new Error('pivot index and column must be unique');

var val = _this11.get(values).iloc(idx);

uniqueVals = uniqueVals.setIn([idxVal, colVal], val);
if (!uniqueCols.has(colVal)) uniqueCols = uniqueCols.push(colVal);
});
var sortedIndex = uniqueVals.keySeq().sort().toArray();
var sortedColumns = uniqueCols.sort();
return new DataFrame(sortedIndex.map(function (idx) {
var rowMap = _immutable2.default.Map({});
sortedColumns.forEach(function (col) {
var val = uniqueVals.getIn([idx, col]);
rowMap = rowMap.set(col, typeof val === 'undefined' ? null : val);
});
return rowMap;
}), { index: sortedIndex });
}
}, {
key: 'values',
get: function get() {
Expand Down Expand Up @@ -1046,14 +1101,14 @@ var DataFrame = function (_NDFrame) {
*/
,
set: function set(columns) {
var _this11 = this;
var _this12 = this;

if (!Array.isArray(columns) || columns.length !== this.columns.size) throw new Error('Columns must be array of same dimension');

var nextData = {};
columns.forEach(function (k, idx) {
var prevColumn = _this11.columns.get(idx);
var prevSeries = _this11.get(prevColumn);
var prevColumn = _this12.columns.get(idx);
var prevSeries = _this12.get(prevColumn);

prevSeries.name = k;
nextData[k] = prevSeries;
Expand Down Expand Up @@ -1098,7 +1153,7 @@ var DataFrame = function (_NDFrame) {
*/
,
set: function set(index) {
var _this12 = this;
var _this13 = this;

this.set_axis(0, (0, _utils.parseIndex)(index, this._data.get(this.columns.get(0)).values));

Expand All @@ -1109,7 +1164,7 @@ var DataFrame = function (_NDFrame) {
v = _ref10[1];

// noinspection Eslint
v.index = _this12.index;
v.index = _this13.index;
});
}

Expand All @@ -1130,10 +1185,10 @@ var DataFrame = function (_NDFrame) {
}, {
key: 'length',
get: function get() {
var _this13 = this;
var _this14 = this;

return Math.max.apply(Math, (0, _toConsumableArray3.default)(this._data.keySeq().map(function (k) {
return _this13.get(k).length;
return _this14.get(k).length;
}).toArray()));
}
}]);
Expand Down