There are two ways to get danfo.js. We built an optimized and fast version for node.js and its available under the danfojs-node namespace. To install it via npm, you can do the following:
npm install danfojs-node
You can also install and use it in the browsers by using the CDN below:
<script src="https://cdn.jsdelivr.net/npm/danfojs@0.2.2/lib/bundle.min.js"></script>
To play with Danfo.js in a Notebook-like environment without doing an installation, use Dnotebooks here
This is a short introduction to danfo.js, and its flow is adapted from the official 10 minutes to Pandas
We will show you how to use danfo.js in both browser environment and Node.js environment. Most functions except plotting which require a DOM works the same way in both environments.
const dfd = require("danfojs-node")
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><script src="https://cdn.jsdelivr.net/npm/danfojs@0.2.2/lib/bundle.min.js"></script> <title>Document</title></head><body><script>//danfo is exposed on dfd namespaces = new dfd.Series([1,2,3,4,5])</script></body></html>
Creating a Series
by passing a list of values, letting danfo.js create a default integer index:
s = new dfd.Series([1, 3, 5, undefined, 6, 8])s.print()
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><script src="https://cdn.jsdelivr.net/npm/danfojs@0.2.2/lib/bundle.min.js"></script> <title>Document</title></head><body><script>s = new dfd.Series([1, 3, 5, undefined, 6, 8])s.print()</script></body></html>
//output╔═══╤══════════════════════╗║ │ 0 ║╟───┼──────────────────────╢║ 0 │ 1 ║╟───┼──────────────────────╢║ 1 │ 3 ║╟───┼──────────────────────╢║ 2 │ 5 ║╟───┼──────────────────────╢║ 3 │ NaN ║╟───┼──────────────────────╢║ 4 │ 6 ║╟───┼──────────────────────╢║ 5 │ 8 ║╚═══╧══════════════════════╝
Creating a Series
from a tensor
const dfd = require("danfojs-node")const tf = require("@tensorflow/tfjs-node")let tensor_arr = tf.tensor([12,34,56,2])let s = new dfd.Series(tensor_arr)s.print()
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><script src="https://cdn.jsdelivr.net/npm/danfojs@0.2.2/lib/bundle.min.js"></script><title>Document</title></head><body><script>const tf = dfd.tf //get tensorflow lib from danfolet tensor_arr = tf.tensor([12,34,56,2])let s = new dfd.Series(tensor_arr)s.print()</script></body></html>
╔═══╤══════════════════════╗║ │ 0 ║╟───┼──────────────────────╢║ 0 │ 12 ║╟───┼──────────────────────╢║ 1 │ 34 ║╟───┼──────────────────────╢║ 2 │ 56 ║╟───┼──────────────────────╢║ 3 │ 2 ║╚═══╧══════════════════════╝
Creating a DataFrame
by passing a JSON object:
const dfd = require("danfojs-node")json_data = [{ A: 0.4612, B: 4.28283, C: -1.509, D: -1.1352 },{ A: 0.5112, B: -0.22863, C: -3.39059, D: 1.1632 },{ A: 0.6911, B: -0.82863, C: -1.5059, D: 2.1352 },{ A: 0.4692, B: -1.28863, C: 4.5059, D: 4.1632 }]df = new dfd.DataFrame(json_data)df.print()
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><!--danfojs CDN --><script src="https://cdn.jsdelivr.net/npm/danfojs@0.2.2/lib/bundle.min.js"></script> <title>Document</title></head><body><script>json_data = [{ A: 0.4612, B: 4.28283, C: -1.509, D: -1.1352 },{ A: 0.5112, B: -0.22863, C: -3.39059, D: 1.1632 },{ A: 0.6911, B: -0.82863, C: -1.5059, D: 2.1352 },{ A: 0.4692, B: -1.28863, C: 4.5059, D: 4.1632 }]df = new dfd.DataFrame(json_data)df.print()</script></body></html>
Creating a DataFrame
from a 2D tensor
const dfd = require("danfojs-node")const tf = require("@tensorflow/tfjs-node")let tensor_arr = tf.tensor2d([[12, 34, 2.2, 2], [30, 30, 2.1, 7]])let df = new dfd.DataFrame(tensor_arr, {columns: ["A", "B", "C", "D"]})df.print()df.ctypes.print()
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><!--danfojs CDN --><script src="https://cdn.jsdelivr.net/npm/danfojs@0.2.2/lib/bundle.min.js"></script> <title>Document</title></head><body><script>json_data = [{ A: 0.4612, B: 4.28283, C: -1.509, D: -1.1352 },{ A: 0.5112, B: -0.22863, C: -3.39059, D: 1.1632 },{ A: 0.6911, B: -0.82863, C: -1.5059, D: 2.1352 },{ A: 0.4692, B: -1.28863, C: 4.5059, D: 4.1632 }]df = new dfd.DataFrame(json_data)df.print()</script></body></html>
╔═══╤═══════════════════╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ A │ B │ C │ D ║╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ 0 │ 12 │ 34 │ 2.20000004768... │ 2 ║╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ 1 │ 30 │ 30 │ 2.09999990463... │ 7 ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╧═══════════════════╝╔═══╤══════════════════════╗║ │ 0 ║╟───┼──────────────────────╢║ A │ int32 ║╟───┼──────────────────────╢║ B │ int32 ║╟───┼──────────────────────╢║ C │ float32 ║╟───┼──────────────────────╢║ D │ int32 ║╚═══╧══════════════════════╝
Creating a DataFrame
by passing a dictionary of objects with the same length
const dfd = require("danfojs-node")dates = new dfd.date_range({ start: '2017-01-01', end: "2020-01-01", period: 4, freq: "Y" })console.log(dates);obj_data = {'A': dates,'B': ["bval1", "bval2", "bval3", "bval4"],'C': [10, 20, 30, 40],'D': [1.2, 3.45, 60.1, 45],'E': ["test", "train", "test", "train"]}df = new dfd.DataFrame(obj_data)df.print()
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><!--danfojs CDN --><script src="https://cdn.jsdelivr.net/npm/danfojs@0.2.2/lib/bundle.min.js"></script> <title>Document</title></head><body><script>dates = new dfd.date_range({ start: '2017-01-01', end: "2020-01-01", period: 4, freq: "Y" })console.log(dates);obj_data = {'A': dates,'B': ["bval1", "bval2", "bval3", "bval4"],'C': [10, 20, 30, 40],'D': [1.2, 3.45, 60.1, 45],'E': ["test", "train", "test", "train"]}df = new dfd.DataFrame(obj_data)df.print()</script></body></html>
//output in console╔═══╤═══════════════════╤═══════════════════╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ A │ B │ C │ D │ E ║╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ 0 │ 1/1/2017, 1:0... │ bval1 │ 10 │ 1.2 │ test ║╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ 1 │ 1/1/2018, 1:0... │ bval2 │ 20 │ 3.45 │ train ║╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ 2 │ 1/1/2019, 1:0... │ bval3 │ 30 │ 60.1 │ test ║╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ 3 │ 1/1/2020, 1:0... │ bval4 │ 40 │ 45 │ train ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╧═══════════════════╧═══════════════════╝
The columns of the resulting DataFrame
have different dtypes.
df.ctypes.print()
//output╔═══╤══════════════════════╗║ │ 0 ║╟───┼──────────────────────╢║ A │ string ║╟───┼──────────────────────╢║ B │ string ║╟───┼──────────────────────╢║ C │ int32 ║╟───┼──────────────────────╢║ D │ float32 ║╟───┼──────────────────────╢║ E │ string ║╚═══╧══════════════════════╝
Creating a DataFrame
by passing an array of arrays. Index and column labels are automatically generated for you.
const dfd = require("danfojs-node")arr_data = [["bval1", 10, 1.2, "test"],["bval2", 20, 3.45, "train"],["bval3", 30, 60.1, "train"],["bval4", 35, 3.2, "test"]]df = new dfd.DataFrame(arr_data)df.print()
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><!--danfojs CDN --><script src="https://cdn.jsdelivr.net/npm/danfojs@0.2.2/lib/bundle.min.js"></script> <title>Document</title></head><body><script>arr_data = [["bval1", 10, 1.2, "test"],["bval2", 20, 3.45, "train"],["bval3", 30, 60.1, "train"],["bval4", 35, 3.2, "test"]]df = new dfd.DataFrame(arr_data)df.print()</script></body></html>
//output in console╔═══╤═══════════════════╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ 0 │ 1 │ 2 │ 3 ║╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ 0 │ bval1 │ 10 │ 1.2 │ test ║╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ 1 │ bval2 │ 20 │ 3.45 │ train ║╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ 2 │ bval3 │ 30 │ 60.1 │ train ║╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ 3 │ bval4 │ 35 │ 3.2 │ test ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╧═══════════════════╝
Here is how to view the top and bottom rows of the frame above:
df.head(2).print()df.tail(2).print()
//output from head╔═══╤═══════════════════╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ 0 │ 1 │ 2 │ 3 ║╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ 0 │ bval1 │ 10 │ 1.2 │ test ║╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ 1 │ bval2 │ 20 │ 3.45 │ train ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╧═══════════════════╝//output from tail╔═══╤═══════════════════╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ 0 │ 1 │ 2 │ 3 ║╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ 2 │ bval3 │ 30 │ 60.1 │ train ║╟───┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ 3 │ bval4 │ 35 │ 3.2 │ test ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╧═══════════════════╝
Display the index, columns:
const dfd = require("danfojs-node")dates = new dfd.date_range({ start: '2017-01-01', end: "2020-01-01", period: 4, freq: "Y" })obj_data = {'A': dates,'B': ["bval1", "bval2", "bval3", "bval4"],'C': [10, 20, 30, 40],'D': [1.2, 3.45, 60.1, 45],'E': ["test", "train", "test", "train"]}df = new dfd.DataFrame(obj_data)console.log(df.index);console.log(df.columns);
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><!--danfojs CDN --><script src="https://cdn.jsdelivr.net/npm/danfojs@0.2.2/lib/bundle.min.js"></script> <title>Document</title></head><body><script>dates = new dfd.date_range({ start: '2017-01-01', end: "2020-01-01", period: 4, freq: "Y" })obj_data = {'A': dates,'B': ["bval1", "bval2", "bval3", "bval4"],'C': [10, 20, 30, 40],'D': [1.2, 3.45, 60.1, 45],'E': ["test", "train", "test", "train"]}df = new dfd.DataFrame(obj_data)console.log(df.index);console.log(df.columns);</script></body></html>
//output[ 0, 1, 2, 3 ][ 'A', 'B', 'C', 'D', 'E' ]
DataFrame.tensor
returns a Tensorflow tensor representation of the underlying data. Note that Tensorflow tensors have one dtype for the entire array, while danfo DataFrames have one dtype per column.
For df
, our DataFrame
of all floating-point values, DataFrame.tensor
is fast and doesn’t require copying data.
const dfd = require("danfojs-node")json_data = [{ A: 0.4612, B: 4.28283, C: -1.509, D: -1.1352 },{ A: 0.5112, B: -0.22863, C: -3.39059, D: 1.1632 },{ A: 0.6911, B: -0.82863, C: -1.5059, D: 2.1352 },{ A: 0.4692, B: -1.28863, C: 4.5059, D: 4.1632 }]df = new dfd.DataFrame(json_data)console.log(df.tensor);//ordf.tensor.print()
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><!--danfojs CDN --><script src="https://cdn.jsdelivr.net/npm/danfojs@0.2.2/lib/bundle.min.js"></script> <title>Document</title></head><body><script>json_data = [{ A: 0.4612, B: 4.28283, C: -1.509, D: -1.1352 },{ A: 0.5112, B: -0.22863, C: -3.39059, D: 1.1632 },{ A: 0.6911, B: -0.82863, C: -1.5059, D: 2.1352 },{ A: 0.4692, B: -1.28863, C: 4.5059, D: 4.1632 }]df = new dfd.DataFrame(json_data)console.log(df.tensor);//ordf.tensor.print()</script></body></html>
//outputTensor {kept: false,isDisposedInternal: false,shape: [ 4, 4 ],dtype: 'float32',size: 16,strides: [ 4 ],dataId: {},id: 0,rankType: '2'}Tensor[[0.4612, 4.2828302, -1.5089999, -1.1352 ],[0.5112, -0.22863 , -3.39059 , 1.1632 ],[0.6911, -0.82863 , -1.5059 , 2.1352 ],[0.4692, -1.28863 , 4.5058999 , 4.1631999]]
Note
DataFrame.tensor
does not include the index or column labels in the output.
describe()
shows a quick statistic summary of your data:
const dfd = require("danfojs-node")json_data = [{ A: 0.4612, B: 4.28283, C: -1.509, D: -1.1352 },{ A: 0.5112, B: -0.22863, C: -3.39059, D: 1.1632 },{ A: 0.6911, B: -0.82863, C: -1.5059, D: 2.1352 },{ A: 0.4692, B: -1.28863, C: 4.5059, D: 4.1632 }]df = new dfd.DataFrame(json_data)df.describe().print()
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><!--danfojs CDN --><script src="https://cdn.jsdelivr.net/npm/danfojs@0.2.2/lib/bundle.min.js"></script> <title>Document</title></head><body><script>json_data = [{ A: 0.4612, B: 4.28283, C: -1.509, D: -1.1352 },{ A: 0.5112, B: -0.22863, C: -3.39059, D: 1.1632 },{ A: 0.6911, B: -0.82863, C: -1.5059, D: 2.1352 },{ A: 0.4692, B: -1.28863, C: 4.5059, D: 4.1632 }]df = new dfd.DataFrame(json_data)df.describe().print()</script></body></html>
//output in console╔════════╤═══════════════════╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ A │ B │ C │ D ║╟────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ count │ 4 │ 4 │ 4 │ 4 ║╟────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ mean │ 0.533175 │ 0.484235 │ -0.474898 │ 1.5816 ║╟────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ std │ 0.107543 │ 2.569317 │ 3.437147 │ 2.200545 ║╟────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ min │ 0.4612 │ -1.28863 │ -3.39059 │ -1.1352 ║╟────────┼───────────────────┼───────────────────┼───────────────────┼───────────────────╢║ median │ 0.4902 │ -0.52863 │ -1.50745 │ 1.6492 ║╚════════╧═══════════════════╧═══════════════════╧═══════════════════╧═══════════════════╝
Sorting by values (Defaults to ascending):
const dfd = require("danfojs")let data = [{ "A": [-20, 30, 47.3] },{ "B": [34, -4, 5, 6] },{ "C": [20, 2, 3, 30] }]let df = new dfd.DataFrame(data)df.sort_values({by: "C", inplace: true})df.print()
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><!--danfojs CDN --><script src="https://cdn.jsdelivr.net/npm/danfojs@0.2.2/lib/bundle.min.js"></script> <title>Document</title></head><body><script>let data = [{ "A": [-20, 30, 47.3] },{ "B": [34, -4, 5, 6] },{ "C": [20, 2, 3, 30] }]let df = new dfd.DataFrame(data)df.sort_values({by: "C", inplace: true})df.print()</script></body></html>
╔═══╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ A │ B │ C ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 0 │ 30 │ -4 │ 2 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 1 │ 47.3 │ 5 │ 3 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 2 │ -20 │ 34 │ 20 ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╝
Selecting a single column, which yields a Series
, equivalent to df.A
:
const dfd = require("danfojs-node")json_data = [{ A: 0.4612, B: 4.28283, C: -1.509, D: -1.1352 },{ A: 0.5112, B: -0.22863, C: -3.39059, D: 1.1632 },{ A: 0.6911, B: -0.82863, C: -1.5059, D: 2.1352 },{ A: 0.4692, B: -1.28863, C: 4.5059, D: 4.1632 }]df = new dfd.DataFrame(json_data)df['A'].print()
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><!--danfojs CDN --><script src="https://cdn.jsdelivr.net/npm/danfojs@0.2.2/lib/bundle.min.js"></script> <title>Document</title></head><body><script>json_data = [{ A: 0.4612, B: 4.28283, C: -1.509, D: -1.1352 },{ A: 0.5112, B: -0.22863, C: -3.39059, D: 1.1632 },{ A: 0.6911, B: -0.82863, C: -1.5059, D: 2.1352 },{ A: 0.4692, B: -1.28863, C: 4.5059, D: 4.1632 }]df = new dfd.DataFrame(json_data)df['A'].print()</script></body></html>
//output╔═══╤══════════════════════╗║ │ A ║╟───┼──────────────────────╢║ 0 │ 0.4612 ║╟───┼──────────────────────╢║ 1 │ 0.5112 ║╟───┼──────────────────────╢║ 2 │ 0.6911 ║╟───┼──────────────────────╢║ 3 │ 0.4692 ║╚═══╧══════════════════════╝
For getting a cross section using a label:
const dfd = require("danfojs")let data = [{ "Name": ["Apples", "Mango", "Banana", "Pear"] },{ "Count": [21, 5, 30, 10] },{ "Price": [200, 300, 40, 250] }]let df = new dfd.DataFrame(data, {index: ["a", "b", "c", "d"]})df.print()let sub_df = df.loc({rows: ["a", "c"]})sub_df.print()
╔═══╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ Name │ Count │ Price ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ a │ Apples │ 21 │ 200 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ b │ Mango │ 5 │ 300 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ c │ Banana │ 30 │ 40 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ d │ Pear │ 10 │ 250 ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╝Shape: (2,3)╔═══╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ Name │ Count │ Price ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ a │ Apples │ 21 │ 200 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ c │ Banana │ 30 │ 40 ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╝
Selecting on a multi-axis by label:
const dfd = require("danfojs-node")let data = { "Name": ["Apples", "Mango", "Banana", "Pear"] ,"Count": [21, 5, 30, 10],"Price": [200, 300, 40, 250] }let df = new dfd.DataFrame(data)df.print()let sub_df = df.loc({ rows: [0,1], columns: ["Name", "Price"] })sub_df.print()
╔═══╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ Name │ Count │ Price ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 0 │ Apples │ 21 │ 200 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 1 │ Mango │ 5 │ 300 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 2 │ Banana │ 30 │ 40 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 3 │ Pear │ 10 │ 250 ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╝Shape: (2,2)╔═══╤═══════════════════╤═══════════════════╗║ │ Name │ Price ║╟───┼───────────────────┼───────────────────╢║ 0 │ Apples │ 200 ║╟───┼───────────────────┼───────────────────╢║ 1 │ Mango │ 300 ║╚═══╧═══════════════════╧═══════════════════╝
Showing label slicing, both endpoints are included:
const dfd = require("danfojs-node")let data = { "Name": ["Apples", "Mango", "Banana", "Pear"] ,"Count": [21, 5, 30, 10],"Price": [200, 300, 40, 250] }let df = new dfd.DataFrame(data)df.print()let sub_df = df.loc({ rows: ["0:2"], columns: ["Name", "Price"] })sub_df.print()
//before slicing╔═══╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ Name │ Count │ Price ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 0 │ Apples │ 21 │ 200 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 1 │ Mango │ 5 │ 300 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 2 │ Banana │ 30 │ 40 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 3 │ Pear │ 10 │ 250 ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╝//after slicingShape: (3,2)╔═══╤═══════════════════╤═══════════════════╗║ │ Name │ Price ║╟───┼───────────────────┼───────────────────╢║ 0 │ Apples │ 200 ║╟───┼───────────────────┼───────────────────╢║ 1 │ Mango │ 300 ║╟───┼───────────────────┼───────────────────╢║ 2 │ Banana │ 40 ║╚═══╧═══════════════════╧═══════════════════╝
Select via the position of the passed integers:
const dfd = require("danfojs-node")let data = { "Name": ["Apples", "Mango", "Banana", "Pear"] ,"Count": [21, 5, 30, 10] ,"Price": [200, 300, 40, 250] }let df = new dfd.DataFrame(data)let sub_df = df.iloc({rows: [1,3]})sub_df.print()
╔═══╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ Name │ Count │ Price ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 1 │ Mango │ 5 │ 300 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 3 │ Pear │ 10 │ 250 ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╝
By integer slices:
const dfd = require("danfojs-node")let data = { "Name": ["Apples", "Mango", "Banana", "Pear"] ,"Count": [21, 5, 30, 10] ,"Price": [200, 300, 40, 250] }let df = new dfd.DataFrame(data)let sub_df = df.iloc({rows: ["1:3"]})sub_df.print()
╔═══╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ Name │ Count │ Price ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 1 │ Mango │ 5 │ 300 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 2 │ Banana │ 30 │ 40 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 3 │ Pear │ 10 │ 250 ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╝
By lists of integer position locations:
const dfd = require("danfojs-node")let data = { "Name": ["Apples", "Mango", "Banana", "Pear"] ,"Count": [21, 5, 30, 10] ,"Price": [200, 300, 40, 250] }let df = new dfd.DataFrame(data)let sub_df = df.iloc({rows: [1,3], columns: [0,2]})sub_df.print()
╔═══╤═══════════════════╤═══════════════════╗║ │ Name │ Price ║╟───┼───────────────────┼───────────────────╢║ 1 │ Mango │ 300 ║╟───┼───────────────────┼───────────────────╢║ 3 │ Pear │ 250 ║╚═══╧═══════════════════╧═══════════════════╝
For slicing rows explicitly:
const dfd = require("danfojs-node")let data = { "Name": ["Apples", "Mango", "Banana", "Pear"] ,"Count": [21, 5, 30, 10] ,"Price": [200, 300, 40, 250] }let df = new dfd.DataFrame(data)let sub_df = df.iloc({rows: ["2:3"], columns: [":"]})sub_df.print()
╔═══╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ Name │ Count │ Price ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 2 │ Banana │ 30 │ 40 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 3 │ Pear │ 10 │ 250 ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╝
For slicing columns explicitly:
const dfd = require("danfojs-node")let data = { "Name": ["Apples", "Mango", "Banana", "Pear"] ,"Count": [21, 5, 30, 10] ,"Price": [200, 300, 40, 250] }let df = new dfd.DataFrame(data)let sub_df = df.iloc({rows: [":"], columns: ["1:2"]})sub_df.print()
╔═══╤═══════════════════╤═══════════════════╗║ │ Count │ Price ║╟───┼───────────────────┼───────────────────╢║ 0 │ 21 │ 200 ║╟───┼───────────────────┼───────────────────╢║ 1 │ 5 │ 300 ║╟───┼───────────────────┼───────────────────╢║ 2 │ 30 │ 40 ║╟───┼───────────────────┼───────────────────╢║ 3 │ 10 │ 250 ║╚═══╧═══════════════════╧═══════════════════╝
Using a single column’s values to select data.
const dfd = require("danfojs-node")let data = [[1, 2, 3], [4, 5, 6], [20, 30, 40], [39, 89, 78]]let cols = ["A", "B", "C"]let df = new dfd.DataFrame(data, { columns: cols })df.print() //before querylet query_df = df.query({ "column": "B", "is": ">", "to": 5 })query_df.print() //after query
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><!--danfojs CDN --><script src="https://cdn.jsdelivr.net/npm/danfojs@0.2.2/lib/bundle.min.js"></script> <title>Document</title></head><body><script>let data = [[1, 2, 3], [4, 5, 6], [20, 30, 40], [39, 89, 78]]let cols = ["A", "B", "C"]let df = new dfd.DataFrame(data, { columns: cols })df.print() //before querylet query_df = df.query({ "column": "B", "is": ">", "to": 5 })query_df.print() //after query</script></body></html>
//before query╔═══╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ A │ B │ C ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 0 │ 1 │ 2 │ 3 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 1 │ 4 │ 5 │ 6 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 2 │ 20 │ 30 │ 40 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 3 │ 39 │ 89 │ 78 ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╝//after query╔═══╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ A │ B │ C ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 2 │ 20 │ 30 │ 40 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 3 │ 39 │ 89 │ 78 ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╝
Selecting values from a DataFrame works on string columns:
let data = [{"A": ["Ng", "Yu", "Mo", "Ng"]},{"B": [34, 4, 5, 6]},{"C": [20, 20, 30, 40]}]let df = new dfd.DataFrame(data)df.print()let query_df = df.query({ column: "A", is: "==", to: "Ng"})query_df.print() //after query
╔═══╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ A │ B │ C ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 0 │ Ng │ 34 │ 20 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 1 │ Yu │ 4 │ 20 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 2 │ Mo │ 5 │ 30 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 3 │ Ng │ 6 │ 40 ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╝//after query╔═══╤═══════════════════╤═══════════════════╤═══════════════════╗║ │ A │ B │ C ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 0 │ Ng │ 34 │ 20 ║╟───┼───────────────────┼───────────────────┼───────────────────╢║ 3 │ Ng │ 6 │ 40 ║╚═══╧═══════════════════╧═══════════════════╧═══════════════════╝
Setting a new column automatically aligns the data by the indexes.
const dfd = require("danfojs-node")let data = { "A": [30, 1, 2, 3] ,"B": [34, 4, 5, 6] ,"C": [20, 20, 30, 40] }let df = new dfd.DataFrame(data)df.print()let new_col = [1, 2, 3, 4]df.addColumn({ "column": "D", "value": new_col }); //happens inplacedf.print()
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0"><!--danfojs CDN --><script src="https://cdn.jsdelivr.net/npm/danfojs@0.2.2/lib/bundle.min.js"></script> <title>Document</title></head><body><script>let data = { "A": [30, 1, 2, 3] ,"B": [34, 4, 5, 6] ,"C": [20, 20, 30