relevanceai.recipes.model_observability.cluster.evaluator
#
Module Contents#
- class relevanceai.recipes.model_observability.cluster.evaluator.ClusterEvaluator(X: Union[list, numpy.ndarray], cluster_labels: Union[List[Union[str, float]], numpy.ndarray], centroids=None, cluster_names: Union[list, dict] = None, feature_names: Union[list, dict] = None, model=None, outlier_label: Union[str, int] = - 1, metric: str = 'euclidean', verbose: bool = False)#
Evaluate your clusters
- Parameters
X (np.ndarray) – The original data
cluster_labels (List[str]) – A list of cluster labels
centroids (Union[list, np.ndarray, str]) – The centroid vectors. If supplied it will use that. Otherwise, it will try to infer them from the model or calculate it based off the labels. To calculate mediods, let centroids = “mediods”
model – The model used for clustering.
num_clusters (Optional[int]) – The number of clusters. This is required if we can’t actually tell how many clusters there are
outlier_label (Optional[str, int]) – The label if it is an outlier
metric – The metric to use for calculating distance, “euclidean”, “cosine”, …
verbose – Whether to print stuff
- hierarchy_linkage(self, hierarchy_method: str = 'ward')#
- pyplot_dendrogram(self, hierarchy_method=None, color_threshold=1.25, orientation='left', ax=None)#
- plotly_dendrogram(self, hierarchy_method=None, color_threshold=1.25, orientation='left')#
- plot_dendrogram(self, plot_method=None, hierarchy_method=None, color_threshold: float = 1, orientation: str = 'left')#
- plot_distance_matrix(self, metric='euclidean', decimals=4)#
- plot_boxplot(self, summary_stats, name='')#
- static summary_statistics(array: numpy.ndarray, axis=0, simple=False)#
Basic summary statistics
- silhouette_samples(self)#
- silhouette_score(self)#
- calinski_harabasz_score(self)#
- davies_bouldin_score(self)#
- static dunn_index(min_distance_from_centroid, max_centroid_distance)#
- distance_from_centroid(self, cluster_data, centroid)#
- distance_from_centroid_to_another(self, other_cluster_data, centroid)#
Store the distances from a centroid to another.
- squared_error_samples(self)#
- squared_error_score(self)#
- mean_squared_error_score(self)#
- squared_error_features_from_samples(self, squared_error_samples)#
- static z_score(value, mean, std)#
- closest_clusters(self, cluster_index, n_clusters)#
- furthest_clusters(self, cluster_index, n_clusters)#
- internal_overview_report(self, store_centroids: bool = True, store_distance_matrix: bool = True, save=True)#
- internal_report(self, top_n_clusters: int = 5, top_n_features: int = 5, store_squared_errors: bool = True, store_distances: bool = True, store_centroids: bool = False, store_distance_matrix: bool = False, save=True)#