Show Sidebar Hide Sidebar

Distplots in Python

How to make interactive Distplots in Python with Plotly.

Combined statistical representations with px.histogramΒΆ

Several representations of statistical distributions are available in plotly, such as histograms, violin plots, box plots (see the complete list here). It is also possible to combine several representations in the same plot.

For example, the plotly.express function px.histogram can add a subplot with a different statistical representation than the histogram, given by the parameter marginal. Plotly Express functions take as a first argument a tidy pandas.DataFrame.

In [1]:
import plotly.express as px
tips = px.data.tips()
fig = px.histogram(tips, x="total_bill", y="tip", color="sex", marginal="rug",
                   hover_data=tips.columns)
fig.show()
In [2]:
import plotly.express as px
tips = px.data.tips()
fig = px.histogram(tips, x="total_bill", y="tip", color="sex",
                   marginal="box", # or violin, rug
                   hover_data=tips.columns)
fig.show()

Combined statistical representations with distplot figure factoryΒΆ

The distplot figure factory displays a combination of statistical representations of numerical data, such as histogram, kernel density estimation or normal curve, and rug plot.

Basic DistplotΒΆ

A histogram, a kde plot and a rug plot are displayed.

In [3]:
import plotly.figure_factory as ff
import numpy as np
np.random.seed(1)

x = np.random.randn(1000)
hist_data = [x]
group_labels = ['distplot'] # name of the dataset

fig = ff.create_distplot(hist_data, group_labels)
fig.show()

Plot Multiple DatasetsΒΆ

In [4]:
import plotly.figure_factory as ff
import numpy as np

# Add histogram data
x1 = np.random.randn(200) - 2
x2 = np.random.randn(200)
x3 = np.random.randn(200) + 2
x4 = np.random.randn(200) + 4

# Group data together
hist_data = [x1, x2, x3, x4]

group_labels = ['Group 1', 'Group 2', 'Group 3', 'Group 4']

# Create distplot with custom bin_size
fig = ff.create_distplot(hist_data, group_labels, bin_size=.2)
fig.show()

Use Multiple Bin SizesΒΆ

Different bin sizes are used for the different datasets with the bin_size argument.

In [5]:
import plotly.figure_factory as ff
import numpy as np

# Add histogram data
x1 = np.random.randn(200)-2
x2 = np.random.randn(200)
x3 = np.random.randn(200)+2
x4 = np.random.randn(200)+4

# Group data together
hist_data = [x1, x2, x3, x4]

group_labels = ['Group 1', 'Group 2', 'Group 3', 'Group 4']

# Create distplot with custom bin_size
fig = ff.create_distplot(hist_data, group_labels, bin_size=[.1, .25, .5, 1])
fig.show()

Customize Rug Text, Colors & TitleΒΆ

In [6]:
import plotly.figure_factory as ff
import numpy as np

x1 = np.random.randn(26)
x2 = np.random.randn(26) + .5

group_labels = ['2014', '2015']

rug_text_one = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
                'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
                'u', 'v', 'w', 'x', 'y', 'z']

rug_text_two = ['aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg', 'hh', 'ii', 'jj',
                'kk', 'll', 'mm', 'nn', 'oo', 'pp', 'qq', 'rr', 'ss', 'tt',
                'uu', 'vv', 'ww', 'xx', 'yy', 'zz']

rug_text = [rug_text_one, rug_text_two] # for hover in rug plot
colors = ['rgb(0, 0, 100)', 'rgb(0, 200, 200)']

# Create distplot with custom bin_size
fig = ff.create_distplot(
    [x1, x2], group_labels, bin_size=.2,
    rug_text=rug_text, colors=colors)

fig.update_layout(title_text='Customized Distplot')
fig.show()

Plot Normal CurveΒΆ

In [7]:
import plotly.figure_factory as ff
import numpy as np

x1 = np.random.randn(200)
x2 = np.random.randn(200) + 2

group_labels = ['Group 1', 'Group 2']

colors = ['slategray', 'magenta']

# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot([x1, x2], group_labels, bin_size=.5,
                         curve_type='normal', # override default 'kde'
                         colors=colors)

# Add title
fig.update_layout(title_text='Distplot with Normal Distribution')
fig.show()

Plot Only Curve and RugΒΆ

In [8]:
import plotly.figure_factory as ff
import numpy as np

x1 = np.random.randn(200) - 1
x2 = np.random.randn(200)
x3 = np.random.randn(200) + 1

hist_data = [x1, x2, x3]

group_labels = ['Group 1', 'Group 2', 'Group 3']
colors = ['#333F44', '#37AA9C', '#94F3E4']

# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot(hist_data, group_labels, show_hist=False, colors=colors)

# Add title
fig.update_layout(title_text='Curve and Rug Plot')
fig.show()

Plot Only Hist and RugΒΆ

In [9]:
import plotly.figure_factory as ff
import numpy as np

x1 = np.random.randn(200) - 1
x2 = np.random.randn(200)
x3 = np.random.randn(200) + 1

hist_data = [x1, x2, x3]

group_labels = ['Group 1', 'Group 2', 'Group 3']
colors = ['#835AF1', '#7FA6EE', '#B8F7D4']

# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot(hist_data, group_labels, colors=colors, bin_size=.25,
                         show_curve=False)

# Add title
fig.update_layout(title_text='Hist and Rug Plot')
fig.show()

Plot Hist and Rug with Different Bin SizesΒΆ

In [10]:
import plotly.figure_factory as ff
import numpy as np

x1 = np.random.randn(200) - 2
x2 = np.random.randn(200)
x3 = np.random.randn(200) + 2

hist_data = [x1, x2, x3]

group_labels = ['Group 1', 'Group 2', 'Group 3']
colors = ['#393E46', '#2BCDC1', '#F66095']

fig = ff.create_distplot(hist_data, group_labels, colors=colors,
                         bin_size=[0.3, 0.2, 0.1], show_curve=False)

# Add title
fig.update(layout_title_text='Hist and Rug Plot')
fig.show()

Plot Only Hist and CurveΒΆ

In [11]:
import plotly.figure_factory as ff
import numpy as np

x1 = np.random.randn(200) - 2
x2 = np.random.randn(200)
x3 = np.random.randn(200) + 2

hist_data = [x1, x2, x3]

group_labels = ['Group 1', 'Group 2', 'Group 3']
colors = ['#A56CC1', '#A6ACEC', '#63F5EF']

# Create distplot with curve_type set to 'normal'
fig = ff.create_distplot(hist_data, group_labels, colors=colors,
                         bin_size=.2, show_rug=False)

# Add title
fig.update_layout(title_text='Hist and Curve Plot')
fig.show()

Distplot with PandasΒΆ

In [12]:
import plotly.figure_factory as ff
import numpy as np
import pandas as pd

df = pd.DataFrame({'2012': np.random.randn(200),
                   '2013': np.random.randn(200)+1})
fig = ff.create_distplot([df[c] for c in df.columns], df.columns, bin_size=.25)
fig.show()

ReferenceΒΆ

In [13]:
help(ff.create_distplot)
Help on function create_distplot in module plotly.figure_factory._distplot:

create_distplot(hist_data, group_labels, bin_size=1.0, curve_type='kde', colors=None, rug_text=None, histnorm='probability density', show_hist=True, show_curve=True, show_rug=True)
    BETA function that creates a distplot similar to seaborn.distplot
    
    The distplot can be composed of all or any combination of the following
    3 components: (1) histogram, (2) curve: (a) kernel density estimation
    or (b) normal curve, and (3) rug plot. Additionally, multiple distplots
    (from multiple datasets) can be created in the same plot.
    
    :param (list[list]) hist_data: Use list of lists to plot multiple data
        sets on the same plot.
    :param (list[str]) group_labels: Names for each data set.
    :param (list[float]|float) bin_size: Size of histogram bins.
        Default = 1.
    :param (str) curve_type: 'kde' or 'normal'. Default = 'kde'
    :param (str) histnorm: 'probability density' or 'probability'
        Default = 'probability density'
    :param (bool) show_hist: Add histogram to distplot? Default = True
    :param (bool) show_curve: Add curve to distplot? Default = True
    :param (bool) show_rug: Add rug to distplot? Default = True
    :param (list[str]) colors: Colors for traces.
    :param (list[list]) rug_text: Hovertext values for rug_plot,
    :return (dict): Representation of a distplot figure.
    
    Example 1: Simple distplot of 1 data set
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_distplot
    
    hist_data = [[1.1, 1.1, 2.5, 3.0, 3.5,
                  3.5, 4.1, 4.4, 4.5, 4.5,
                  5.0, 5.0, 5.2, 5.5, 5.5,
                  5.5, 5.5, 5.5, 6.1, 7.0]]
    
    group_labels = ['distplot example']
    
    fig = create_distplot(hist_data, group_labels)
    
    url = py.plot(fig, filename='Simple distplot', validate=False)
    ```
    
    Example 2: Two data sets and added rug text
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_distplot
    
    # Add histogram data
    hist1_x = [0.8, 1.2, 0.2, 0.6, 1.6,
               -0.9, -0.07, 1.95, 0.9, -0.2,
               -0.5, 0.3, 0.4, -0.37, 0.6]
    hist2_x = [0.8, 1.5, 1.5, 0.6, 0.59,
               1.0, 0.8, 1.7, 0.5, 0.8,
               -0.3, 1.2, 0.56, 0.3, 2.2]
    
    # Group data together
    hist_data = [hist1_x, hist2_x]
    
    group_labels = ['2012', '2013']
    
    # Add text
    rug_text_1 = ['a1', 'b1', 'c1', 'd1', 'e1',
          'f1', 'g1', 'h1', 'i1', 'j1',
          'k1', 'l1', 'm1', 'n1', 'o1']
    
    rug_text_2 = ['a2', 'b2', 'c2', 'd2', 'e2',
          'f2', 'g2', 'h2', 'i2', 'j2',
          'k2', 'l2', 'm2', 'n2', 'o2']
    
    # Group text together
    rug_text_all = [rug_text_1, rug_text_2]
    
    # Create distplot
    fig = create_distplot(
        hist_data, group_labels, rug_text=rug_text_all, bin_size=.2)
    
    # Add title
    fig['layout'].update(title='Dist Plot')
    
    # Plot!
    url = py.plot(fig, filename='Distplot with rug text', validate=False)
    ```
    
    Example 3: Plot with normal curve and hide rug plot
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_distplot
    import numpy as np
    
    x1 = np.random.randn(190)
    x2 = np.random.randn(200)+1
    x3 = np.random.randn(200)-1
    x4 = np.random.randn(210)+2
    
    hist_data = [x1, x2, x3, x4]
    group_labels = ['2012', '2013', '2014', '2015']
    
    fig = create_distplot(
        hist_data, group_labels, curve_type='normal',
        show_rug=False, bin_size=.4)
    
    url = py.plot(fig, filename='hist and normal curve', validate=False)
    
    Example 4: Distplot with Pandas
    ```
    import plotly.plotly as py
    from plotly.figure_factory import create_distplot
    import numpy as np
    import pandas as pd
    
    df = pd.DataFrame({'2012': np.random.randn(200),
                       '2013': np.random.randn(200)+1})
    py.iplot(create_distplot([df[c] for c in df.columns], df.columns),
                             filename='examples/distplot with pandas',
                             validate=False)
    ```