|
26 | 26 | from collections import defaultdict, Counter
|
27 | 27 | from scipy.sparse import csr_matrix
|
28 | 28 | from scipy.cluster import hierarchy as sch
|
| 29 | +from importlib.util import find_spec |
29 | 30 |
|
30 | 31 | # Typing
|
31 | 32 | import sys
|
|
34 | 35 | from typing import Literal
|
35 | 36 | else:
|
36 | 37 | from typing_extensions import Literal
|
37 |
| -from typing import List, Tuple, Union, Mapping, Any, Callable, Iterable |
| 38 | +from typing import List, Tuple, Union, Mapping, Any, Callable, Iterable, TYPE_CHECKING |
| 39 | + |
| 40 | +# Plotting |
| 41 | +if find_spec("plotly") is None: |
| 42 | + from bertopic._utils import MockPlotlyModule |
| 43 | + plotting = MockPlotlyModule() |
| 44 | + |
| 45 | +else: |
| 46 | + from bertopic import plotting |
| 47 | + if TYPE_CHECKING: |
| 48 | + import plotly.graph_objs as go |
| 49 | + import matplotlib.figure as fig |
| 50 | + |
38 | 51 |
|
39 | 52 | # Models
|
40 | 53 | try:
|
|
72 | 85 | )
|
73 | 86 | import bertopic._save_utils as save_utils
|
74 | 87 |
|
75 |
| - |
76 | 88 | logger = MyLogger()
|
77 | 89 | logger.configure("WARNING")
|
78 | 90 |
|
79 |
| -try: |
80 |
| - from bertopic import plotting |
81 |
| - import plotly.graph_objects as go |
82 |
| - |
83 |
| -except ModuleNotFoundError as e: |
84 |
| - if "No module named 'plotly'" in str(e): |
85 |
| - logger.warning("Plotly is not installed. Please install it to use the plotting functions.") |
86 |
| - from bertopic._utils import mock_plotly_go as go, MockPlotting |
87 |
| - |
88 |
| - plotting = MockPlotting(logger) |
89 |
| - else: |
90 |
| - raise ModuleNotFoundError(e) |
91 |
| - |
92 | 91 |
|
93 | 92 | class BERTopic:
|
94 | 93 | """BERTopic is a topic modeling technique that leverages BERT embeddings and
|
@@ -2415,7 +2414,7 @@ def visualize_topics(
|
2415 | 2414 | title: str = "<b>Intertopic Distance Map</b>",
|
2416 | 2415 | width: int = 650,
|
2417 | 2416 | height: int = 650,
|
2418 |
| - ) -> go.Figure: |
| 2417 | + ) -> "go.Figure": |
2419 | 2418 | """Visualize topics, their sizes, and their corresponding words.
|
2420 | 2419 |
|
2421 | 2420 | This visualization is highly inspired by LDAvis, a great visualization
|
@@ -2473,7 +2472,7 @@ def visualize_documents(
|
2473 | 2472 | title: str = "<b>Documents and Topics</b>",
|
2474 | 2473 | width: int = 1200,
|
2475 | 2474 | height: int = 750,
|
2476 |
| - ) -> go.Figure: |
| 2475 | + ) -> "go.Figure": |
2477 | 2476 | """Visualize documents and their topics in 2D.
|
2478 | 2477 |
|
2479 | 2478 | Arguments:
|
@@ -2575,7 +2574,7 @@ def visualize_document_datamap(
|
2575 | 2574 | topic_prefix: bool = False,
|
2576 | 2575 | datamap_kwds: dict = {},
|
2577 | 2576 | int_datamap_kwds: dict = {},
|
2578 |
| - ): |
| 2577 | + ) -> "fig.Figure": |
2579 | 2578 | """Visualize documents and their topics in 2D as a static plot for publication using
|
2580 | 2579 | DataMapPlot. This works best if there are between 5 and 60 topics. It is therefore best
|
2581 | 2580 | to use a sufficiently large `min_topic_size` or set `nr_topics` when building the model.
|
@@ -2686,7 +2685,7 @@ def visualize_hierarchical_documents(
|
2686 | 2685 | title: str = "<b>Hierarchical Documents and Topics</b>",
|
2687 | 2686 | width: int = 1200,
|
2688 | 2687 | height: int = 750,
|
2689 |
| - ) -> go.Figure: |
| 2688 | + ) -> "go.Figure": |
2690 | 2689 | """Visualize documents and their topics in 2D at different levels of hierarchy.
|
2691 | 2690 |
|
2692 | 2691 | Arguments:
|
@@ -2798,7 +2797,7 @@ def visualize_term_rank(
|
2798 | 2797 | title: str = "<b>Term score decline per Topic</b>",
|
2799 | 2798 | width: int = 800,
|
2800 | 2799 | height: int = 500,
|
2801 |
| - ) -> go.Figure: |
| 2800 | + ) -> "go.Figure": |
2802 | 2801 | """Visualize the ranks of all terms across all topics.
|
2803 | 2802 |
|
2804 | 2803 | Each topic is represented by a set of words. These words, however,
|
@@ -2863,7 +2862,7 @@ def visualize_topics_over_time(
|
2863 | 2862 | title: str = "<b>Topics over Time</b>",
|
2864 | 2863 | width: int = 1250,
|
2865 | 2864 | height: int = 450,
|
2866 |
| - ) -> go.Figure: |
| 2865 | + ) -> "go.Figure": |
2867 | 2866 | """Visualize topics over time.
|
2868 | 2867 |
|
2869 | 2868 | Arguments:
|
@@ -2919,7 +2918,7 @@ def visualize_topics_per_class(
|
2919 | 2918 | title: str = "<b>Topics per Class</b>",
|
2920 | 2919 | width: int = 1250,
|
2921 | 2920 | height: int = 900,
|
2922 |
| - ) -> go.Figure: |
| 2921 | + ) -> "go.Figure": |
2923 | 2922 | """Visualize topics per class.
|
2924 | 2923 |
|
2925 | 2924 | Arguments:
|
@@ -2973,7 +2972,7 @@ def visualize_distribution(
|
2973 | 2972 | title: str = "<b>Topic Probability Distribution</b>",
|
2974 | 2973 | width: int = 800,
|
2975 | 2974 | height: int = 600,
|
2976 |
| - ) -> go.Figure: |
| 2975 | + ) -> "go.Figure": |
2977 | 2976 | """Visualize the distribution of topic probabilities.
|
2978 | 2977 |
|
2979 | 2978 | Arguments:
|
@@ -3080,7 +3079,7 @@ def visualize_hierarchy(
|
3080 | 3079 | linkage_function: Callable[[csr_matrix], np.ndarray] = None,
|
3081 | 3080 | distance_function: Callable[[csr_matrix], csr_matrix] = None,
|
3082 | 3081 | color_threshold: int = 1,
|
3083 |
| - ) -> go.Figure: |
| 3082 | + ) -> "go.Figure": |
3084 | 3083 | """Visualize a hierarchical structure of the topics.
|
3085 | 3084 |
|
3086 | 3085 | A ward linkage function is used to perform the
|
@@ -3176,7 +3175,7 @@ def visualize_heatmap(
|
3176 | 3175 | title: str = "<b>Similarity Matrix</b>",
|
3177 | 3176 | width: int = 800,
|
3178 | 3177 | height: int = 800,
|
3179 |
| - ) -> go.Figure: |
| 3178 | + ) -> "go.Figure": |
3180 | 3179 | """Visualize a heatmap of the topic's similarity matrix.
|
3181 | 3180 |
|
3182 | 3181 | Based on the cosine similarity matrix between c-TF-IDFs or semantic embeddings of the topics,
|
@@ -3236,7 +3235,7 @@ def visualize_barchart(
|
3236 | 3235 | width: int = 250,
|
3237 | 3236 | height: int = 250,
|
3238 | 3237 | autoscale: bool = False,
|
3239 |
| - ) -> go.Figure: |
| 3238 | + ) -> "go.Figure": |
3240 | 3239 | """Visualize a barchart of selected topics.
|
3241 | 3240 |
|
3242 | 3241 | Arguments:
|
|
0 commit comments