Source code for snapedautility.plot_histograms

import pandas as pd
import numpy as np
import altair as alt


[docs]def plot_histograms(df, features, facet_columns=3, width=125, height=125): """ Plots histogram given numeric features of the input dataframe, and plots bar charts for categorical features of the input dataframe Parameters ---------- df : pandas.core.frame.DataFrame Input dataframe features : list List of feature names as string facet_columns : int Number of columns in Facet options. width: int The width of sub-plot for each feature. Default set to 125 height: int The height of sub-plot for each feature Default set to 125 Returns ------- `altair plot` Returns altair plot Examples -------- >>> from snapedautility.plot_histograms import plot_histograms >>> df = penguins_data >>> plot_histograms(df, ["species", "bill_length_mm", "island"], 100, 100) """ features_set = set(features) # Some basic validation on the function's input parameters. if not isinstance(df, pd.DataFrame): raise ValueError("The data must be in type of Pandas DataFrame.") elif ((len(features) == 0) or (features_set.issubset(set(df.columns)) == False)): raise ValueError("All features must exist in the columns of the input Pandas DataFrame.") elif (not isinstance(width, int)) or (not isinstance(height, int)) or (height <= 0) or (width <= 0): raise ValueError("Width and height of the plot must be a positive integer") # Select categorical columns and numeric columns cat_cols = list(set(df.select_dtypes(include=["object"]).columns).intersection(features_set)) numeric_cols = list(set(df.select_dtypes(include=np.number).columns).intersection(features_set)) # Create alt.Chart for categorical features categorical_barplot = ( alt.Chart(df) .mark_bar() .encode( x=alt.X(alt.repeat(), type='nominal'), y=alt.Y('count()', title="Count of Records")) .properties(width=width, height=height) .repeat(cat_cols, columns=facet_columns) ) # Create alt.Chart for numeric features. numeric_barplot = ( alt.Chart(df) .mark_bar() .encode( x=alt.X(alt.repeat(), type='quantitative', bin=alt.Bin(maxbins=30)), y=alt.Y('count()', title="Numberic Features")) .properties(width=width, height=height) .repeat(numeric_cols, columns=facet_columns) ) #categorical_barplot histograms_plot = categorical_barplot & numeric_barplot histograms_plot.title = "Histograms for Specified Features" return histograms_plot