Links
Comment on page

Getting Started

Installation guides for Node and Browser based environments, including a quick 10 minute walk through of danfo.js
A stable version of Danfojs (v1), has been released, and it comes with full Typescript support, new features, and many bug fixes. See release note here.
There are a couple of breaking changes, so we have prepared a short migration guide for pre-v1 users.

Installation

There are three ways to install and use Danfo.js in your application
For Nodejs applications, you can install the danfojs-node version via package managers like yarn and npm:
npm install danfojs-node
or
yarn add danfojs-node
For client-side applications built with frameworks like React, Vue, Next.js, etc, you can install the danfojs version:
npm install danfojs
or
yarn add danfojs
For use directly in HTML files, you can add the latest script tag from JsDelivr:
<script src="https://cdn.jsdelivr.net/npm/[email protected]/lib/bundle.min.js"></script>
To play with Danfo.js in a Notebook-like environment, see Dnotebooks here or the VS-Code Nodejs notebook extension.

10 minutes to danfo.js

This is a short introduction to Danfo.js, and its flow is adapted from the official 10 minutes to Pandas
We will show you how to use danfo.js in a browser, client-side libraries, and Node.js environments. Most functions except plotting which require a DOM work the same way in all environments.
Node
Browser
React
const dfd = require("danfojs-node")
//or using ES6
import * as dfd from "danfojs-node"
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<script src="https://cdn.jsdelivr.net/npm/[email protected]/lib/bundle.min.js"></script>
</head>
<body>
<script>
//danfo is exposed on dfd namespace
s = new dfd.Series([1,2,3,4,5])
</script>
</body>
</html>
import * as dfd from "danfojs"
//import specific methods/classes
import { readCSV, DataFrame } from "danfojs"

Creating a DataFrame/Series

You can create a Series by passing a list of values, letting Danfo.js create a default integer index:
Node
Browser
import * as dfd from "danfojs-node"
s = new dfd.Series([1, 3, 5, undefined, 6, 8])
s.print()
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<script src="https://cdn.jsdelivr.net/npm/[email protected]/lib/bundle.min.js"></script> <title>Document</title>
</head>
<body>
<script>
s = new dfd.Series([1, 3, 5, undefined, 6, 8])
s.print()
</script>
</body>
</html>
//output
╔═══╤══════════════════════╗
║ │ 0 ║
╟───┼──────────────────────╢
║ 0 │ 1 ║
╟───┼──────────────────────╢
║ 1 │ 3 ║
╟───┼──────────────────────╢
║ 2 │ 5 ║
╟───┼──────────────────────╢
║ 3 │ undefined ║
╟───┼──────────────────────╢
║ 4 │ 6 ║
╟───┼──────────────────────╢
║ 5 │ 8 ║
╚═══╧══════════════════════╝
Creating a Series from a tensor
Node
Browser
const dfd = require("danfojs-node")
const tf = dfd.tensorflow //Tensorflow.js is exportedfrom Danfojs
let tensor_arr = tf.tensor([12,34,56,2])
let s = new dfd.Series(tensor_arr)
s.print()
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<script src="https://cdn.jsdelivr.net/npm/[email protected]/lib/bundle.min.js"></script>
<title>Document</title>
</head>
<body>
<script>
const tf = dfd.tensorflow //get tensorflow lib from danfo
let tensor_arr = tf.tensor([12,34,56,2])
let s = new dfd.Series(tensor_arr)
s.print()
</script>
</body>
</html>
╔═══╤════╗
║ 0 │ 12 ║
╟───┼────╢
║ 1 │ 34 ║
╟───┼────╢
║ 2 │ 56 ║
╟───┼────╢
║ 3 │ 2 ║
╚═══╧════╝
Creating a DataFrame by passing a JSON object:
Node
Browser
const dfd = require("danfojs-node")
json_data = [{ A: 0.4612, B: 4.28283, C: -1.509, D: -1.1352 },
{ A: 0.5112, B: -0.22863, C: -3.39059, D: 1.1632 },
{ A: 0.6911, B: -0.82863, C: -1.5059, D: 2.1352 },
{ A: 0.4692, B: -1.28863, C: 4.5059, D: 4.1632 }]
df = new dfd.DataFrame(json_data)
df.print()
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!--danfojs CDN -->
<script src="https://cdn.jsdelivr.net/npm/[email protected]/lib/bundle.min.js"></script>
<title>Document</title>
</head>
<body>
<script>
json_data = [{ A: 0.4612, B: 4.28283, C: -1.509, D: -1.1352 },
{ A: 0.5112, B: -0.22863, C: -3.39059, D: 1.1632 },
{ A: 0.6911, B: -0.82863, C: -1.5059, D: 2.1352 },
{ A: 0.4692, B: -1.28863, C: 4.5059, D: 4.1632 }]
df = new dfd.DataFrame(json_data)
df.print()
</script>
</body>
</html>
Creating a DataFrame from a 2D tensor
Node
Browser
const dfd = require("danfojs-node")
const tf = dfd.tensorflow //Tensorflow.js is exported from Danfojs
let tensor_arr = tf.tensor2d([[12, 34, 2.2, 2], [30, 30, 2.1, 7]])
let df = new dfd.DataFrame(tensor_arr, {columns: ["A", "B", "C", "D"]})
df.print()
df.ctypes.print()
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!--danfojs CDN -->
<script src="https://cdn.jsdelivr.net/npm/[email protected]/lib/bundle.min.js"></script> <title>Document</title>
</head>
<body>
<script>
json_data = [{ A: 0.4612, B: 4.28283, C: -1.509, D: -1.1352 },
{ A: 0.5112, B: -0.22863, C: -3.39059, D: 1.1632 },
{ A: 0.6911, B: -0.82863, C: -1.5059, D: 2.1352 },
{ A: 0.4692, B: -1.28863, C: 4.5059, D: 4.1632 }]
df = new dfd.DataFrame(json_data)
df.print()
</script>
</body>
</html>
╔═══╤═══════════════════╤═══════════════════╤═══════════════════╤═══════════════════╗
║ │ A │ B │ C │ D ║
╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 0 │ 12 │ 34 │ 2.20000004768... │ 2 ║
╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 1 │ 30 │ 30 │ 2.09999990463... │ 7 ║
╚═══╧═══════════════════╧═══════════════════╧═══════════════════╧═══════════════════╝
╔═══╤══════════════════════╗
║ │ 0 ║
╟───┼──────────────────────╢
║ A │ int32 ║
╟───┼──────────────────────╢
║ B │ int32 ║
╟───┼──────────────────────╢
║ C │ float32 ║
╟───┼──────────────────────╢
║ D │ int32 ║
╚═══╧══════════════════════╝
Creating a DataFrame by passing a dictionary of objects with the same length
Nodejs
Browser
const dfd = require("danfojs-node")
// Danfojs v1.0.0 and above
dates = new dfd.dateRange({ start: '2017-01-01', end: "2020-01-01", period: 4, freq: "Y" })
console.log(dates);
obj_data = {'A': dates,
'B': ["bval1", "bval2", "bval3", "bval4"],
'C': [10, 20, 30, 40],
'D': [1.2, 3.45, 60.1, 45],
'E': ["test", "train", "test", "train"]
}
df = new dfd.DataFrame(obj_data)
df.print()
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!--danfojs CDN -->
<script src="https://cdn.jsdelivr.net/npm/[email protected]/lib/bundle.min.js"></script> <title>Document</title>
</head>
<body>
<script>
dates = new dfd.dateRange({ start: '2017-01-01', end: "2020-01-01", period: 4, freq: "Y" })
console.log(dates);
obj_data = {'A': dates,
'B': ["bval1", "bval2", "bval3", "bval4"],
'C': [10, 20, 30, 40],
'D': [1.2, 3.45, 60.1, 45],
'E': ["test", "train", "test", "train"]
}
df = new dfd.DataFrame(obj_data)
df.print()
</script>
</body>
</html>
//output in console
╔═══╤═══════════════════╤═══════════════════╤═══════════════════╤═══════════════════╤═══════════════════╗
║ │ A │ B │ C │ D │ E ║
╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 0 │ 1/1/2017, 1:0... │ bval1 │ 10 │ 1.2 │ test ║
╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 1 │ 1/1/2018, 1:0... │ bval2 │ 20 │ 3.45 │ train ║
╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 2 │ 1/1/2019, 1:0... │ bval3 │ 30 │ 60.1 │ test ║
╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 3 │ 1/1/2020, 1:0... │ bval4 │ 40 │ 45 │ train ║
╚═══╧═══════════════════╧═══════════════════╧═══════════════════╧═══════════════════╧═══════════════════╝
The columns of the resulting DataFrame have different dtypes.
df.ctypes.print()
//output
╔═══╤═════════╗
║ A │ string ║
╟───┼─────────╢
║ B │ string ║
╟───┼─────────╢
║ C │ int32 ║
╟───┼─────────╢
║ D │ float32 ║
╟───┼─────────╢
║ E │ string ║
╚═══╧═════════╝
Creating a DataFrame by passing an array of arrays. Index and column labels are automatically generated for you.
Node
Browser
const dfd = require("danfojs-node")
arr_data = [["bval1", 10, 1.2, "test"],
["bval2", 20, 3.45, "train"],
["bval3", 30, 60.1, "train"],
["bval4", 35, 3.2, "test"]]
df = new dfd.DataFrame(arr_data)
df.print()
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!--danfojs CDN -->
<script src="https://cdn.jsdelivr.net/npm/[email protected]/lib/bundle.min.js"></script> <title>Document</title>
</head>
<body>
<script>
arr_data = [["bval1", 10, 1.2, "test"],
["bval2", 20, 3.45, "train"],
["bval3", 30, 60.1, "train"],
["bval4", 35, 3.2, "test"]]
df = new dfd.DataFrame(arr_data)
df.print()
</script>
</body>
</html>
//output in console
╔═══╤═══════════════════╤═══════════════════╤═══════════════════╤═══════════════════╗
║ │ 0 │ 1 │ 2 │ 3 ║
╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 0 │ bval1 │ 10 │ 1.2 │ test ║
╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 1 │ bval2 │ 20 │ 3.45 │ train ║
╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 2 │ bval3 │ 30 │ 60.1 │ train ║
╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 3 │ bval4 │ 35 │ 3.2 │ test ║
╚═══╧═══════════════════╧═══════════════════╧═══════════════════╧═══════════════════╝

Viewing data

Here is how to view the top and bottom rows of the frame above:
df.head(2).print()
df.tail(2).print()
//output from head
╔═══╤═══════════════════╤═══════════════════╤═══════════════════╤═══════════════════╗
║ │ 0 │ 1 │ 2 │ 3 ║
╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 0 │ bval1 │ 10 │ 1.2 │ test ║
╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 1 │ bval2 │ 20 │ 3.45 │ train ║
╚═══╧═══════════════════╧═══════════════════╧═══════════════════╧═══════════════════╝
//output from tail
╔═══╤═══════════════════╤═══════════════════╤═══════════════════╤═══════════════════╗
║ │ 0 │ 1 │ 2 │ 3 ║
╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 2 │ bval3 │ 30 │ 60.1 │ train ║
╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 3 │ bval4 │ 35 │ 3.2 │ test ║
╚═══╧═══════════════════╧═══════════════════╧═══════════════════╧═══════════════════╝
Display the index, columns:
JavaScript
Browser
const dfd = require('danfojs-node')
let dates = new dfd.dateRange({
start: "2017-01-01",
end: "2020-01-01",
period: 4,
freq: "Y",
});
let obj_data = {
A: dates,
B: ["bval1", "bval2", "bval3", "bval4"],
C: [10, 20, 30, 40],
D: [1.2, 3.45, 60.1, 45],
E: ["test", "train", "test", "train"],
};
let df = new dfd.DataFrame(obj_data);
df.print();
console.log(df.index);
console.log(df.columns);
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!--danfojs CDN -->
<script src="https://cdn.jsdelivr.net/npm/[email protected]/lib/bundle.min.js"></script> <title>Document</title>
</head>
<body>
<script>
let dates = new dfd.dateRange({
start: "2017-01-01",
end: "2020-01-01",
period: 4,
freq: "Y",
});
let obj_data = {
A: dates,
B: ["bval1", "bval2", "bval3", "bval4"],
C: [10, 20, 30, 40],
D: [1.2, 3.45, 60.1, 45],
E: ["test", "train", "test", "train"],
};
let df = new dfd.DataFrame(obj_data);
df.print();
console.log(df.index);
console.log(df.columns)
</script>
</body>
</html>
//output
╔════════════╤═══════════════════╤═══════════════════╤═══════════════════╤═══════════════════╤═══════════════════╗
║ │ A │ B │ C │ D │ E ║
╟────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 0 │ 1/1/2017, 1:00:… │ bval1 │ 10 │ 1.2 │ test ║
╟────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 1 │ 1/1/2018, 1:00:… │ bval2 │ 20 │ 3.45 │ train ║
╟────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 2 │ 1/1/2019, 1:00:… │ bval3 │ 30 │ 60.1 │ test ║
╟────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ 3 │ 1/1/2020, 1:00:… │ bval4 │ 40 │ 45 │ train ║
╚════════════╧═══════════════════╧═══════════════════╧═══════════════════╧═══════════════════╧═══════════════════╝
[ 0, 1, 2, 3 ]
[ 'A', 'B', 'C', 'D', 'E' ]
DataFrame.tensor returns a Tensorflow tensor representation of the underlying data. Note that Tensorflow tensors have one dtype for the entire array, while danfo DataFrames have one dtype per column.
For df, our DataFrame of all floating-point values, DataFrame.tensoris fast and doesn’t require copying data.
Node
Browser
const dfd = require("danfojs-node")
j son_data = [{ A: 0.4612, B: 4.28283, C: -1.509, D: -1.1352 },
{ A: 0.5112, B: -0.22863, C: -3.39059, D: 1.1632 },
{ A: 0.6911, B: -0.82863, C: -1.5059, D: 2.1352 },
{ A: 0.4692, B: -1.28863, C: 4.5059, D: 4.1632 }]
let df = new dfd.DataFrame(json_data)
console.log(df.tensor);
//or
df.tensor.print()
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!--danfojs CDN -->
<script src="https://cdn.jsdelivr.net/npm/[email protected]/lib/bundle.min.js"></script> <title>Document</title>
</head>
<body>
<script>
json_data = [{ A: 0.4612, B: 4.28283, C: -1.509, D: -1.1352 },
{ A: 0.5112, B: -0.22863, C: -3.39059, D: 1.1632 },
{ A: 0.6911, B: -0.82863, C: -1.5059, D: 2.1352 },
{ A: 0.4692, B: -1.28863, C: 4.5059, D: 4.1632 }]
df = new dfd.DataFrame(json_data)
console.log(df.tensor);
//or
df.tensor.print()
</script>
</body>
</html>
//output
Tensor {
kept: false,
isDisposedInternal: false,
shape: [ 4, 4 ],
dtype: 'float32',
size: 16,
strides: [ 4 ],
dataId: {},
id: 0,
rankType: '2'
}
Tensor
[[0.4612, 4.2828302, -1.5089999, -1.1352 ],
[0.5112, -0.22863 , -3.39059 , 1.1632 ],
[0.6911, -0.82863 , -1.5059 , 2.1352 ],
[0.4692, -1.28863 , 4.5058999 , 4.1631999]]
Note
DataFrame.tensor does not include the index or column labels in the output.
describe() shows a quick statistic summary of your data:
Node
Browser
const dfd = require("danfojs-node")
let json_data = [{ A: 0.4612, B: 4.28283, C: -1.509, D: -1.1352 },
{ A: 0.5112, B: -0.22863, C: -3.39059, D: 1.1632 },
{ A: 0.6911, B: -0.82863, C: -1.5059, D: 2.1352 },
{ A: 0.4692, B: -1.28863, C: 4.5059, D: 4.1632 }]
let df = new dfd.DataFrame(json_data)
df.describe().print()
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!--danfojs CDN -->
<script src="https://cdn.jsdelivr.net/npm/[email protected]/lib/bundle.min.js"></script> <title>Document</title>
</head>
<body>
<script>
json_data = [{ A: 0.4612, B: 4.28283, C: -1.509, D: -1.1352 },
{ A: 0.5112, B: -0.22863, C: -3.39059, D: 1.1632 },
{ A: 0.6911, B: -0.82863, C: -1.5059, D: 2.1352 },
{ A: 0.4692, B: -1.28863, C: 4.5059, D: 4.1632 }]
df = new dfd.DataFrame(json_data)
df.describe().print()
</script>
</body>
</html>
//output in console
╔════════════╤═══════════════════╤═══════════════════╤═══════════════════╤═══════════════════╗
║ │ A │ B │ C │ D ║
╟────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ count │ 4 │ 4 │ 4 │ 4 ║
╟────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ mean │ 0.533175 │ 0.4842349999999… │ -0.474897500000… │ 1.5816 ║
╟────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ std │ 0.1075428712963… │ 2.5693167249095… │ 3.4371471031498… │ 2.2005448052698… ║
╟────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ min │ 0.4612 │ -1.28863 │ -3.39059 │ -1.1352 ║
╟────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ median │ 0.4901999999999… │ -0.528629999999… │ -1.50745 │ 1.6492 ║
╟────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ max │ 0.6911 │ 4.28283 │ 4.5059 │ 4.1632 ║
╟────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢
║ variance │ 0.0115654691666… │ 6.6013884328999… │ 11.813980208691… │ 4.84239744 ║
╚════════════╧═══════════════════╧═══════════════════╧═══════════════════╧═══════════════════╝
Sorting by values (Defaults to ascending):
Node
Browser
const dfd = require("danfojs")
let data = {"A": [-20, 30, 47.3, NaN],
"B": [34, -4, 5, 6] ,
"C": [20, 2, 3, 30] }
let df = new dfd.DataFrame(data)
df.sortValues("C", {inplace: true})
df.print()
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!--danfojs CDN -->
<script src="https://cdn.jsdelivr.net/npm/[email protected]/lib/bundle.min.js"></script> <title>Document</title>
</head>
<body>
<script>
let data = {"A": [-20, 30, 47.3, NaN],
"B": [34, -4, 5, 6] ,
"C": [