Source code for snapedautility.detect_outliers

import numpy as np
import pandas as pd
import altair as alt

[docs]def detect_outliers(s, width=250, height=250): """ Detects outliers in a pandas series Returns a threshold value for the lower bound and upper bound of the outliers and Plot a violin plot of the observations Parameters ---------- s : pandas.core.series.Series Pandas Series for which the outliers need to be found width: int, default = 250 The width of the plot. Default set to 250 height: int, default = 250 The height of the plot. Default set to 250 Returns ------- List of integers Boolean array with same length as the input, indices of outlier marked. `altair plot` An interactive altair correlation plot Examples -------- >>> from snapedautility.detect_outliers import detect_outliers >>> s = pd.Series([1,1,2,3,4,5,6,9,10,13,40]) >>> detect_outliers(s) [-8.0 , 20.0], alt.Chart(...) """ if not isinstance(s, pd.Series): raise TypeError("s should be a pandas series") if s.shape[0] < 1: raise ValueError("s should have at least one element") if (not isinstance(height, int)) or (not isinstance(width, int)): raise TypeError("height and width should be integers") q1 = s.quantile(0.25) q3 = s.quantile(0.75) factor = 1.5 inter_quantile_range = q3 - q1 df = s.rename("values").to_frame() # Box Plot plt = alt.Chart(df, title="Box Plot with Outliers").mark_boxplot().encode( x='values' ).properties(width=width, height=height) return [q1 - factor * inter_quantile_range, q3 + factor * inter_quantile_range], plt