diff --git a/CHANGELOG.md b/CHANGELOG.md
index 100d48d9..addc6860 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@
 - [API Change] A new figure and axes is created (via `plt.subplots()`) when calling a plotting method with `ax=None`. Previously, the current axes was used (via `plt.gca()`) ([#211](https://github.com/ploomber/sklearn-evaluation/pull/211))
 - [Fix] Validating input elbow curve model has "score" method [#146] 
 - [Fix] Adds class labels for multi class roc plot (#209)
+- [API Change] `silhouette_analysis_from_results` function now accepts a list of cluster labels [#213](https://github.com/ploomber/sklearn-evaluation/pull/213)
 
 ## 0.9.0 (2023-01-13)
 
diff --git a/docs/api/plot.rst b/docs/api/plot.rst
index 503cd3be..7907ed55 100644
--- a/docs/api/plot.rst
+++ b/docs/api/plot.rst
@@ -59,6 +59,8 @@ elbow_curve
 -----------
 .. autofunction:: sklearn_evaluation.plot.elbow_curve
 
+.. _elbow-curve-from-results-label:
+
 elbow_curve_from_results
 ------------------------
 .. autofunction:: sklearn_evaluation.plot.elbow_curve_from_results
@@ -115,6 +117,8 @@ silhouette_analysis
 -------------------
 .. autofunction:: sklearn_evaluation.plot.silhouette_analysis
 
+.. _silhouette-analysis-from-results-label:
+
 silhouette_analysis_from_results
 --------------------------------
 .. autofunction:: sklearn_evaluation.plot.silhouette_analysis_from_results
diff --git a/docs/clustering/clustering_evaluation.md b/docs/clustering/clustering_evaluation.md
index e4cc47c2..8716081d 100644
--- a/docs/clustering/clustering_evaluation.md
+++ b/docs/clustering/clustering_evaluation.md
@@ -54,13 +54,11 @@ Elbow curve helps to identify the point at which the plot starts to become paral
 plot.elbow_curve(X, kmeans, range_n_clusters=range(1, 30))
 ```
 
-##### Elbow curve from results
+```{eval-rst}
+.. tip::
+
+   If you want to train the models yourself, you can use :ref:`elbow-curve-from-results-label` to plot.
 
-```{code-cell} ipython3
-import numpy as np
-n_clusters = range(1, 10, 2)
-sum_of_squares = np.array([4572.2, 470.7, 389.9, 335.1, 305.5])
-plot.elbow_curve_from_results(n_clusters, sum_of_squares, times=None)
 ```
 
 ##### Silhouette plot
@@ -71,23 +69,9 @@ The below plot shows that n_clusters value of 3, 5 and 6 are a bad pick for the
 silhouette = plot.silhouette_analysis(X, kmeans)
 ```
 
-##### Silhouette plot from cluster labels
-
-```{code-cell} ipython3
-X, y = datasets.make_blobs(
-    n_samples=500,
-    n_features=2,
-    centers=4,
-    cluster_std=1,
-    center_box=(-10.0, 10.0),
-    shuffle=True,
-    random_state=1,
-)
+```{eval-rst}
+.. tip::
 
-kmeans = KMeans(n_clusters=4, random_state=1, n_init=5)
-cluster_labels = kmeans.fit_predict(X)
-```
+   If you want to train the models yourself, you can use :ref:`silhouette-analysis-from-results-label` to plot.
 
-```{code-cell} ipython3
-silhouette = plot.silhouette_analysis_from_results(X, cluster_labels)
-```
+```
\ No newline at end of file
diff --git a/examples/elbow_curve_from_results.py b/examples/elbow_curve_from_results.py
new file mode 100644
index 00000000..d3630ffa
--- /dev/null
+++ b/examples/elbow_curve_from_results.py
@@ -0,0 +1,21 @@
+import time
+import numpy as np
+
+from sklearn.cluster import KMeans
+from sklearn.datasets import make_blobs
+
+from sklearn_evaluation import plot
+
+X, _ = make_blobs(n_samples=100, centers=3, n_features=5, random_state=0)
+
+n_clusters = range(1, 30)
+sum_of_squares = []
+cluster_times = []
+for i in n_clusters:
+    start = time.time()
+    kmeans = KMeans(n_clusters=i, n_init=5)
+    sum_of_squares.append(kmeans.fit(X).score(X))
+    cluster_times.append(time.time() - start)
+
+sum_of_squares = np.absolute(sum_of_squares)
+plot.elbow_curve_from_results(n_clusters, sum_of_squares, cluster_times)
diff --git a/examples/silhouette_plot_basic.py b/examples/silhouette_plot_basic.py
index 9d74b825..49a2eced 100644
--- a/examples/silhouette_plot_basic.py
+++ b/examples/silhouette_plot_basic.py
@@ -1,6 +1,5 @@
 from sklearn.cluster import KMeans
 from sklearn.datasets import make_blobs
-import matplotlib.pyplot as plt
 
 from sklearn_evaluation import plot
 
@@ -16,4 +15,3 @@
 
 kmeans = KMeans(random_state=1, n_init=5)
 plot.silhouette_analysis(X, kmeans, range_n_clusters=[3])
-plt.show()
diff --git a/examples/silhouette_plot_from_results.py b/examples/silhouette_plot_from_results.py
index b2f38ab6..a8f6f2cb 100644
--- a/examples/silhouette_plot_from_results.py
+++ b/examples/silhouette_plot_from_results.py
@@ -1,6 +1,5 @@
 from sklearn.cluster import KMeans
 from sklearn.datasets import make_blobs
-import matplotlib.pyplot as plt
 
 from sklearn_evaluation import plot
 
@@ -14,7 +13,15 @@
     random_state=1,
 )
 
-kmeans = KMeans(n_clusters=4, random_state=1, n_init=5)
-cluster_labels = kmeans.fit_predict(X)
+cluster_labels = []
+
+# Cluster labels for four clusters
+kmeans = KMeans(n_clusters=4, n_init=5)
+cluster_labels.append(kmeans.fit_predict(X))
+
+# Cluster labels for five clusters
+kmeans = KMeans(n_clusters=5, n_init=5)
+cluster_labels.append(kmeans.fit_predict(X))
+
+
 plot.silhouette_analysis_from_results(X, cluster_labels)
-plt.show()
diff --git a/src/sklearn_evaluation/plot/__init__.py b/src/sklearn_evaluation/plot/__init__.py
index 3ddf7c4a..47b62a5e 100644
--- a/src/sklearn_evaluation/plot/__init__.py
+++ b/src/sklearn_evaluation/plot/__init__.py
@@ -21,15 +21,22 @@
     silhouette_analysis,
     silhouette_analysis_from_results,
 )
-from sklearn_evaluation.plot.regression \
-    import residuals, prediction_error, cooks_distance
+from sklearn_evaluation.plot.regression import (
+    residuals,
+    prediction_error,
+    cooks_distance,
+)
 from sklearn_evaluation.plot.target_analysis import target_analysis
 from sklearn_evaluation.plot.calibration import calibration_curve, scores_distribution
-from sklearn_evaluation.plot.classification_report \
-    import classification_report, ClassificationReport
+from sklearn_evaluation.plot.classification_report import (
+    classification_report,
+    ClassificationReport,
+)
 from sklearn_evaluation.plot.ks_statistics import ks_statistic
-from sklearn_evaluation.plot.cumulative_gain_lift_curve \
-     import cumulative_gain, lift_curve
+from sklearn_evaluation.plot.cumulative_gain_lift_curve import (
+    cumulative_gain,
+    lift_curve,
+)
 from sklearn_evaluation.plot.feature_ranking import Rank1D, Rank2D
 
 __all__ = [
diff --git a/src/sklearn_evaluation/plot/clustering.py b/src/sklearn_evaluation/plot/clustering.py
index 92a05646..2988fab0 100644
--- a/src/sklearn_evaluation/plot/clustering.py
+++ b/src/sklearn_evaluation/plot/clustering.py
@@ -40,7 +40,23 @@
 from ploomber_core.exceptions import modify_exceptions
 from ploomber_core import deprecated
 
-# TODO: add unit test
+
+def _generate_axes(cluster, figsize, ax):
+    if ax is not None:
+        if not isinstance(ax, list):
+            ax = [ax]
+        if len(cluster) != len(ax):
+            raise ValueError(
+                f"Received lengths, cluster : {len(cluster)},"
+                f"axes : {len(ax)}."
+                f"Number of axes passed should match number of clusters"
+            )
+    else:
+        ax = []
+        for i in range(len(cluster)):
+            _, axes = plt.subplots(1, 1, figsize=figsize)
+            ax.append(axes)
+    return ax
 
 
 @SKLearnEvaluationLogger.log(feature="plot")
@@ -139,9 +155,13 @@ def elbow_curve_from_results(n_clusters, sum_of_squares, times, ax=None):
     """
     Same as `elbow_curve`, but it takes the number of clusters and sum of
     squares as inputs. Useful if you want to train the models yourself.
+
+    Examples
+    --------
+    .. plot:: ../examples/elbow_curve_from_results.py
+
     """
-    # TODO: unit test this
-    # TODO: also test with unsorted input
+
     idx = np.argsort(n_clusters)
     n_clusters = np.array(n_clusters)[idx]
     sum_of_squares = np.array(sum_of_squares)[idx]
@@ -183,6 +203,7 @@ def _clone_and_score_clusterer(clf, X, n_clusters):
 
 
 @SKLearnEvaluationLogger.log(feature="plot")
+@modify_exceptions
 def silhouette_analysis(
     X,
     clf,
@@ -262,13 +283,16 @@ def silhouette_analysis(
             "Cannot plot silhouette analysis ."
         )
 
-    for n_clusters in range_n_clusters:
-        _, ax = plt.subplots(1, 1, figsize=figsize)
+    # if no ax is passed by user generate new plot
+    # for each model
+
+    ax = _generate_axes(range_n_clusters, figsize, ax)
+
+    for ax, n_clusters in zip(ax, range_n_clusters):
         clf = clone(clf)
         setattr(clf, "n_clusters", n_clusters)
         cluster_labels = clf.fit_predict(X)
-
-        ax = silhouette_analysis_from_results(
+        _silhouette_analysis_one_model(
             X, cluster_labels, metric, figsize, cmap, text_fontsize, ax
         )
     return ax
@@ -276,7 +300,7 @@ def silhouette_analysis(
 
 @SKLearnEvaluationLogger.log(feature="plot")
 @modify_exceptions
-def silhouette_analysis_from_results(
+def _silhouette_analysis_one_model(
     X,
     cluster_labels,
     metric="euclidean",
@@ -286,12 +310,7 @@ def silhouette_analysis_from_results(
     ax=None,
 ):
     """
-    Same as `silhouette_plot` but takes cluster_labels as input.
-    Useful if you want to train the model yourself
-
-    Notes
-    -----
-    .. versionadded:: 0.8.3
+    Generate silhouette plot for one value of n_cluster.
     """
     cluster_labels = np.asarray(cluster_labels)
 
@@ -365,3 +384,36 @@ def silhouette_analysis_from_results(
     ax.tick_params(labelsize=text_fontsize)
     ax.legend(loc="best", fontsize=text_fontsize)
     return ax
+
+
+@SKLearnEvaluationLogger.log(feature="plot")
+@modify_exceptions
+def silhouette_analysis_from_results(
+    X,
+    cluster_labels,
+    metric="euclidean",
+    figsize=None,
+    cmap="nipy_spectral",
+    text_fontsize="medium",
+    ax=None,
+):
+    """
+    Same as `silhouette_plot` but takes list of cluster_labels as input.
+    Useful if you want to train the model yourself
+
+    Examples
+    --------
+    .. plot:: ../examples/silhouette_plot_from_results.py
+
+    """
+
+    # if no ax is passed by user generate new plot
+    # for each model
+
+    ax = _generate_axes(cluster_labels, figsize, ax)
+
+    for ax, label in zip(ax, cluster_labels):
+        _silhouette_analysis_one_model(
+            X, label, metric, figsize, cmap, text_fontsize, ax
+        )
+    return ax
diff --git a/tests/baseline_images/test_clustering/silhouette_analysis_from_results_cluster_five.png b/tests/baseline_images/test_clustering/silhouette_analysis_from_results_cluster_five.png
new file mode 100644
index 00000000..5247e129
Binary files /dev/null and b/tests/baseline_images/test_clustering/silhouette_analysis_from_results_cluster_five.png differ
diff --git a/tests/baseline_images/test_clustering/silhouette_analysis_from_results_cluster_six.png b/tests/baseline_images/test_clustering/silhouette_analysis_from_results_cluster_six.png
new file mode 100644
index 00000000..cfb3c331
Binary files /dev/null and b/tests/baseline_images/test_clustering/silhouette_analysis_from_results_cluster_six.png differ
diff --git a/tests/baseline_images/test_clustering/silhouette_plot_cosine.png b/tests/baseline_images/test_clustering/silhouette_plot_cosine.png
index 26b39be4..3cdfc40c 100644
Binary files a/tests/baseline_images/test_clustering/silhouette_plot_cosine.png and b/tests/baseline_images/test_clustering/silhouette_plot_cosine.png differ
diff --git a/tests/baseline_images/test_clustering/silhouette_plot_five_clusters.png b/tests/baseline_images/test_clustering/silhouette_plot_five_clusters.png
index c1b5c622..5247e129 100644
Binary files a/tests/baseline_images/test_clustering/silhouette_plot_five_clusters.png and b/tests/baseline_images/test_clustering/silhouette_plot_five_clusters.png differ
diff --git a/tests/baseline_images/test_clustering/silhouette_plot_five_clusters_minibatchkmeans.png b/tests/baseline_images/test_clustering/silhouette_plot_five_clusters_minibatchkmeans.png
index 01725e5b..73e7a460 100644
Binary files a/tests/baseline_images/test_clustering/silhouette_plot_five_clusters_minibatchkmeans.png and b/tests/baseline_images/test_clustering/silhouette_plot_five_clusters_minibatchkmeans.png differ
diff --git a/tests/baseline_images/test_clustering/silhouette_plot_four_clusters.png b/tests/baseline_images/test_clustering/silhouette_plot_four_clusters.png
index 7a14a9a2..dab8d8f8 100644
Binary files a/tests/baseline_images/test_clustering/silhouette_plot_four_clusters.png and b/tests/baseline_images/test_clustering/silhouette_plot_four_clusters.png differ
diff --git a/tests/baseline_images/test_clustering/silhouette_plot_four_clusters_minibatchkmeans.png b/tests/baseline_images/test_clustering/silhouette_plot_four_clusters_minibatchkmeans.png
index 1b61155c..37c0762c 100644
Binary files a/tests/baseline_images/test_clustering/silhouette_plot_four_clusters_minibatchkmeans.png and b/tests/baseline_images/test_clustering/silhouette_plot_four_clusters_minibatchkmeans.png differ
diff --git a/tests/baseline_images/test_clustering/silhouette_plot_six_clusters.png b/tests/baseline_images/test_clustering/silhouette_plot_six_clusters.png
index b3b9e197..cfb3c331 100644
Binary files a/tests/baseline_images/test_clustering/silhouette_plot_six_clusters.png and b/tests/baseline_images/test_clustering/silhouette_plot_six_clusters.png differ
diff --git a/tests/baseline_images/test_clustering/silhouette_plot_spectral.png b/tests/baseline_images/test_clustering/silhouette_plot_spectral.png
index 06ff3be5..df8e6780 100644
Binary files a/tests/baseline_images/test_clustering/silhouette_plot_spectral.png and b/tests/baseline_images/test_clustering/silhouette_plot_spectral.png differ
diff --git a/tests/baseline_images/test_clustering/silhouette_plot_three_clusters.png b/tests/baseline_images/test_clustering/silhouette_plot_three_clusters.png
index 24d4c38c..9a9e95ad 100644
Binary files a/tests/baseline_images/test_clustering/silhouette_plot_three_clusters.png and b/tests/baseline_images/test_clustering/silhouette_plot_three_clusters.png differ
diff --git a/tests/baseline_images/test_clustering/silhouette_plot_two_clusters.png b/tests/baseline_images/test_clustering/silhouette_plot_two_clusters.png
index 2bba1cac..80d9aa53 100644
Binary files a/tests/baseline_images/test_clustering/silhouette_plot_two_clusters.png and b/tests/baseline_images/test_clustering/silhouette_plot_two_clusters.png differ
diff --git a/tests/test_clustering.py b/tests/test_clustering.py
index d074f2fa..31a4f67e 100644
--- a/tests/test_clustering.py
+++ b/tests/test_clustering.py
@@ -188,10 +188,7 @@ def test_n_jobs():
 )
 def test_plot_silhouette():
     clf = KMeans(random_state=10)
-    # original = plt.rcParams["figure.figsize"]
-    plt.rcParams["figure.figsize"] = (18, 7)
     plot.silhouette_analysis(X, clf)
-    # plt.rcParams["figure.figsize"] = original
 
 
 @image_comparison(
@@ -201,7 +198,6 @@ def test_plot_silhouette():
 )
 def test_plot_silhouette_with_cluster_range():
     clf = KMeans(random_state=10)
-    plt.rcParams["figure.figsize"] = (18, 7)
     plot.silhouette_analysis(X, clf, range_n_clusters=[4, 5])
 
 
@@ -215,7 +211,6 @@ def test_plot_silhouette_with_cluster_range():
 )
 def test_plot_silhouette_with_minibatchkmeans():
     clf = MiniBatchKMeans(random_state=10)
-    plt.rcParams["figure.figsize"] = (18, 7)
     plot.silhouette_analysis(X, clf, range_n_clusters=[4, 5])
 
 
@@ -224,7 +219,6 @@ def test_plot_silhouette_with_minibatchkmeans():
 )
 def test_cmap():
     clf = KMeans(random_state=10)
-    plt.rcParams["figure.figsize"] = (18, 7)
     plot.silhouette_analysis(X, clf, range_n_clusters=[2], cmap="Spectral")
 
 
@@ -233,53 +227,32 @@ def test_cmap():
 )
 def test_metric():
     clf = KMeans(random_state=10)
-    plt.rcParams["figure.figsize"] = (18, 7)
     plot.silhouette_analysis(X, clf, range_n_clusters=[6], metric="cosine")
 
 
-def test_string_classes():
-    clf = KMeans()
-    cluster_labels = clf.fit_predict(X)
-    plot.silhouette_analysis_from_results(X, convert_labels_into_string(cluster_labels))
-
-
-@image_comparison(
-    baseline_images=["silhouette_plot_array_like"],
-    extensions=["png"],
-    remove_text=False,
-)
 def test_array_like():
-    plot.silhouette_analysis_from_results(X.tolist(), y.tolist())
-
-
-@image_comparison(
-    baseline_images=["silhouette_plot_array_like_string_label"],
-    extensions=["png"],
-    remove_text=False,
-)
-def test_array_like_string():
-    plot.silhouette_analysis_from_results(X.tolist(), convert_labels_into_string(y))
+    clf = KMeans()
+    plot.silhouette_analysis(X.tolist(), clf)
 
 
 def test_ax_silhouette():
     clf = KMeans()
-    cluster_labels = clf.fit_predict(X)
-    plot.silhouette_analysis_from_results(X, cluster_labels)
-    fig, ax = plt.subplots(1, 1)
-    out_ax = plot.silhouette_analysis_from_results(X, cluster_labels)
-    assert ax is not out_ax
-    out_ax = plot.silhouette_analysis_from_results(X, cluster_labels, ax=ax)
-    assert ax is out_ax
+    ax = []
+    for i in range(5):
+        fig, axes = plt.subplots(1, 1)
+        ax.append(axes)
+    out_ax = plot.silhouette_analysis(X, clf)
+    for axes in ax:
+        assert axes is not out_ax
+    out_ax = plot.silhouette_analysis(X, clf, ax=ax)
+    assert ax[-1] is out_ax
 
 
 def test_ax_params():
-    clf = KMeans(n_clusters=8)
-    cluster_labels = clf.fit_predict(X)
-    out_ax = plot.silhouette_analysis_from_results(
-        X, cluster_labels, text_fontsize="large"
-    )
-    assert out_ax.get_title() == "Silhouette Analysis (n_clusters=8)"
-    assert out_ax.get_ylim() == (0.0, 250.0)
+    clf = KMeans(n_clusters=6)
+    out_ax = plot.silhouette_analysis(X, clf, text_fontsize="large")
+    assert out_ax.get_title() == "Silhouette Analysis (n_clusters=6)"
+    assert out_ax.get_ylim() == (0.0, 230.0)
 
 
 def test_invalid_clusterer():
@@ -288,18 +261,72 @@ def test_invalid_clusterer():
         plot.silhouette_analysis(X, clf)
 
 
-def test_silhouette_analysis_from_results_value_error(ploomber_value_error_message):
+def test_silhouette_analysis_value_error(ploomber_value_error_message):
+    clf = KMeans()
     with pytest.raises(ValueError, match=ploomber_value_error_message) as e:
-        plot.silhouette_analysis_from_results([], y.tolist())
+        plot.silhouette_analysis([], clf)
 
     assert "Expected 2D array, got 1D array" in str(e.value)
 
 
-def test_from_results_call(monkeypatch):
+def test_from_one_model_call(monkeypatch):
     mock = Mock()
-    monkeypatch.setattr(cl, "silhouette_analysis_from_results", mock)
+    monkeypatch.setattr(cl, "_silhouette_analysis_one_model", mock)
     clf = KMeans()
-    fig, ax = plt.subplots(1, 1)
-    ax = plot.silhouette_analysis(X, clf, range_n_clusters=[2, 3], ax=ax)
+    fig, ax1 = plt.subplots(1, 1)
+    fig, ax2 = plt.subplots(1, 1)
+    plot.silhouette_analysis(X, clf, range_n_clusters=[2, 3], ax=[ax1, ax2])
     assert mock.call_count == 2
-    assert mock.return_value == ax
+
+
+@image_comparison(
+    baseline_images=[
+        "silhouette_analysis_from_results_cluster_five",
+        "silhouette_analysis_from_results_cluster_six",
+    ],
+    extensions=["png"],
+    remove_text=False,
+)
+def test_silhouette_plot_from_results():
+    cluster_labels = [
+        KMeans(random_state=10, n_clusters=5).fit_predict(X),
+        KMeans(random_state=10, n_clusters=6).fit_predict(X),
+    ]
+
+    plot.silhouette_analysis_from_results(X, cluster_labels)
+
+
+def test_silhouette_analysis_from_results_value_error(ploomber_value_error_message):
+    with pytest.raises(ValueError, match=ploomber_value_error_message) as e:
+        plot.silhouette_analysis_from_results([], [y.tolist()])
+
+    assert "Expected 2D array, got 1D array" in str(e.value)
+
+
+def test_ax_silhouette_from_results():
+    X = [[1.2, 3.4], [2.2, 4.1], [1.1, 6.5]]
+    cluster_labels = [[0, 0, 1], [0, 1, 0]]
+    fig, ax1 = plt.subplots(1, 1)
+    fig, ax2 = plt.subplots(1, 1)
+    out_ax = plot.silhouette_analysis_from_results(X, cluster_labels)
+    assert ax1 is not out_ax
+    assert ax2 is not out_ax
+    out_ax = plot.silhouette_analysis_from_results(X, cluster_labels, ax=[ax1, ax2])
+    assert ax2 is out_ax
+
+
+def test_one_model_from_results_call(monkeypatch):
+    X = [[1.2, 3.4], [2.2, 4.1], [1.1, 6.5]]
+    cluster_labels = [[0, 0, 1], [0, 1, 0], [0, 0, 0]]
+    mock = Mock()
+    monkeypatch.setattr(cl, "_silhouette_analysis_one_model", mock)
+    plot.silhouette_analysis_from_results(X, cluster_labels)
+    assert mock.call_count == 3
+
+
+def test_ax_length_mismatch(ploomber_value_error_message):
+    X = [[1.2, 3.4], [2.2, 4.1], [1.1, 6.5]]
+    cluster_labels = [[0, 0, 1], [0, 1, 0]]
+    fig, ax = plt.subplots(1, 1)
+    with pytest.raises(ValueError, match=ploomber_value_error_message):
+        plot.silhouette_analysis_from_results(X, cluster_labels, ax=[ax])