relevanceai.recipes.model_observability.cluster.evaluator#

Module Contents#

class relevanceai.recipes.model_observability.cluster.evaluator.ClusterEvaluator(X: Union[list, numpy.ndarray], cluster_labels: Union[List[Union[str, float]], numpy.ndarray], centroids=None, cluster_names: Union[list, dict] = None, feature_names: Union[list, dict] = None, model=None, outlier_label: Union[str, int] = - 1, metric: str = 'euclidean', verbose: bool = False)#

Evaluate your clusters

Parameters
  • X (np.ndarray) – The original data

  • cluster_labels (List[str]) – A list of cluster labels

  • centroids (Union[list, np.ndarray, str]) – The centroid vectors. If supplied it will use that. Otherwise, it will try to infer them from the model or calculate it based off the labels. To calculate mediods, let centroids = “mediods”

  • model – The model used for clustering.

  • num_clusters (Optional[int]) – The number of clusters. This is required if we can’t actually tell how many clusters there are

  • outlier_label (Optional[str, int]) – The label if it is an outlier

  • metric – The metric to use for calculating distance, “euclidean”, “cosine”, …

  • verbose – Whether to print stuff

hierarchy_linkage(self, hierarchy_method: str = 'ward')#
pyplot_dendrogram(self, hierarchy_method=None, color_threshold=1.25, orientation='left', ax=None)#
plotly_dendrogram(self, hierarchy_method=None, color_threshold=1.25, orientation='left')#
plot_dendrogram(self, plot_method=None, hierarchy_method=None, color_threshold: float = 1, orientation: str = 'left')#
plot_distance_matrix(self, metric='euclidean', decimals=4)#
plot_boxplot(self, summary_stats, name='')#
static summary_statistics(array: numpy.ndarray, axis=0, simple=False)#

Basic summary statistics

silhouette_samples(self)#
silhouette_score(self)#
calinski_harabasz_score(self)#
davies_bouldin_score(self)#
static dunn_index(min_distance_from_centroid, max_centroid_distance)#
distance_from_centroid(self, cluster_data, centroid)#
distance_from_centroid_to_another(self, other_cluster_data, centroid)#

Store the distances from a centroid to another.

squared_error_samples(self)#
squared_error_score(self)#
mean_squared_error_score(self)#
squared_error_features_from_samples(self, squared_error_samples)#
static z_score(value, mean, std)#
closest_clusters(self, cluster_index, n_clusters)#
furthest_clusters(self, cluster_index, n_clusters)#
internal_overview_report(self, store_centroids: bool = True, store_distance_matrix: bool = True, save=True)#
internal_report(self, top_n_clusters: int = 5, top_n_features: int = 5, store_squared_errors: bool = True, store_distances: bool = True, store_centroids: bool = False, store_distance_matrix: bool = False, save=True)#