55"""
66from __future__ import annotations
77
8+ from abc import ABC , abstractmethod
89from typing import TYPE_CHECKING , List , Optional , Sequence , Union , cast
910import warnings
1011
1112import numpy as np
1213
1314from pandas ._libs .tslibs import Timestamp
14- from pandas ._typing import FrameOrSeries , Hashable
15+ from pandas ._typing import FrameOrSeries , FrameOrSeriesUnion , Hashable
1516from pandas .util ._validators import validate_percentile
1617
1718from pandas .core .dtypes .common import (
@@ -62,106 +63,138 @@ def describe_ndframe(
6263 """
6364 percentiles = refine_percentiles (percentiles )
6465
66+ describer : NDFrameDescriberAbstract
6567 if obj .ndim == 1 :
66- result_series = describe_series (
67- cast ("Series" , obj ),
68- percentiles ,
69- datetime_is_numeric ,
68+ describer = SeriesDescriber (
69+ obj = cast ("Series" , obj ),
70+ datetime_is_numeric = datetime_is_numeric ,
71+ )
72+ else :
73+ describer = DataFrameDescriber (
74+ obj = cast ("DataFrame" , obj ),
75+ include = include ,
76+ exclude = exclude ,
77+ datetime_is_numeric = datetime_is_numeric ,
7078 )
71- return cast (FrameOrSeries , result_series )
72-
73- frame = cast ("DataFrame" , obj )
74-
75- if frame .ndim == 2 and frame .columns .size == 0 :
76- raise ValueError ("Cannot describe a DataFrame without columns" )
77-
78- result_frame = describe_frame (
79- frame = frame ,
80- include = include ,
81- exclude = exclude ,
82- percentiles = percentiles ,
83- datetime_is_numeric = datetime_is_numeric ,
84- )
85- return cast (FrameOrSeries , result_frame )
8679
80+ result = describer .describe (percentiles = percentiles )
81+ return cast (FrameOrSeries , result )
8782
88- def describe_series (
89- series : "Series" ,
90- percentiles : Sequence [float ],
91- datetime_is_numeric : bool ,
92- ) -> Series :
93- """Describe series.
9483
95- The reason for the delegation to ``describe_1d`` only :
96- to allow for a proper stacklevel of the FutureWarning .
84+ class NDFrameDescriberAbstract ( ABC ) :
85+ """Abstract class for describing dataframe or series .
9786
9887 Parameters
9988 ----------
100- series : Series
101- Series to be described.
102- percentiles : list-like of numbers
103- The percentiles to include in the output.
104- datetime_is_numeric : bool, default False
89+ obj : Series or DataFrame
90+ Object to be described.
91+ datetime_is_numeric : bool
10592 Whether to treat datetime dtypes as numeric.
106-
107- Returns
108- -------
109- Series
11093 """
111- return describe_1d (
112- series ,
113- percentiles ,
114- datetime_is_numeric ,
115- is_series = True ,
116- )
11794
95+ def __init__ (self , obj : "FrameOrSeriesUnion" , datetime_is_numeric : bool ):
96+ self .obj = obj
97+ self .datetime_is_numeric = datetime_is_numeric
11898
119- def describe_frame (
120- frame : "DataFrame" ,
121- include : Optional [Union [str , Sequence [str ]]],
122- exclude : Optional [Union [str , Sequence [str ]]],
123- percentiles : Sequence [float ],
124- datetime_is_numeric : bool ,
125- ) -> DataFrame :
126- """Describe DataFrame.
99+ @abstractmethod
100+ def describe (self , percentiles : Sequence [float ]) -> FrameOrSeriesUnion :
101+ """Do describe either series or dataframe.
102+
103+ Parameters
104+ ----------
105+ percentiles : list-like of numbers
106+ The percentiles to include in the output.
107+ """
108+
109+
110+ class SeriesDescriber (NDFrameDescriberAbstract ):
111+ """Class responsible for creating series description."""
112+
113+ obj : "Series"
114+
115+ def describe (self , percentiles : Sequence [float ]) -> Series :
116+ return describe_1d (
117+ self .obj ,
118+ percentiles = percentiles ,
119+ datetime_is_numeric = self .datetime_is_numeric ,
120+ is_series = True ,
121+ )
122+
123+
124+ class DataFrameDescriber (NDFrameDescriberAbstract ):
125+ """Class responsible for creating dataobj description.
127126
128127 Parameters
129128 ----------
130- frame : DataFrame
129+ obj : DataFrame
131130 DataFrame to be described.
132- include : 'all', list-like of dtypes or None (default), optional
131+ include : 'all', list-like of dtypes or None
133132 A white list of data types to include in the result.
134- exclude : list-like of dtypes or None (default), optional,
133+ exclude : list-like of dtypes or None
135134 A black list of data types to omit from the result.
136- percentiles : list-like of numbers
137- The percentiles to include in the output.
138- datetime_is_numeric : bool, default False
135+ datetime_is_numeric : bool
139136 Whether to treat datetime dtypes as numeric.
140-
141- Returns
142- -------
143- DataFrame
144137 """
145- data = select_columns (
146- frame = frame ,
147- include = include ,
148- exclude = exclude ,
149- datetime_is_numeric = datetime_is_numeric ,
150- )
151138
152- ldesc = [
153- describe_1d (s , percentiles , datetime_is_numeric , is_series = False )
154- for _ , s in data .items ()
155- ]
156-
157- col_names = reorder_columns (ldesc )
158- d = concat (
159- [x .reindex (col_names , copy = False ) for x in ldesc ],
160- axis = 1 ,
161- sort = False ,
162- )
163- d .columns = data .columns .copy ()
164- return d
139+ def __init__ (
140+ self ,
141+ obj : "DataFrame" ,
142+ * ,
143+ include : Optional [Union [str , Sequence [str ]]],
144+ exclude : Optional [Union [str , Sequence [str ]]],
145+ datetime_is_numeric : bool ,
146+ ):
147+ self .include = include
148+ self .exclude = exclude
149+
150+ if obj .ndim == 2 and obj .columns .size == 0 :
151+ raise ValueError ("Cannot describe a DataFrame without columns" )
152+
153+ super ().__init__ (obj , datetime_is_numeric = datetime_is_numeric )
154+
155+ def describe (self , percentiles : Sequence [float ]) -> DataFrame :
156+ data = self ._select_data ()
157+
158+ ldesc = [
159+ describe_1d (
160+ series ,
161+ percentiles = percentiles ,
162+ datetime_is_numeric = self .datetime_is_numeric ,
163+ is_series = False ,
164+ )
165+ for _ , series in data .items ()
166+ ]
167+
168+ col_names = reorder_columns (ldesc )
169+ d = concat (
170+ [x .reindex (col_names , copy = False ) for x in ldesc ],
171+ axis = 1 ,
172+ sort = False ,
173+ )
174+ d .columns = data .columns .copy ()
175+ return d
176+
177+ def _select_data (self ):
178+ """Select columns to be described."""
179+ if (self .include is None ) and (self .exclude is None ):
180+ # when some numerics are found, keep only numerics
181+ default_include = [np .number ]
182+ if self .datetime_is_numeric :
183+ default_include .append ("datetime" )
184+ data = self .obj .select_dtypes (include = default_include )
185+ if len (data .columns ) == 0 :
186+ data = self .obj
187+ elif self .include == "all" :
188+ if self .exclude is not None :
189+ msg = "exclude must be None when include is 'all'"
190+ raise ValueError (msg )
191+ data = self .obj
192+ else :
193+ data = self .obj .select_dtypes (
194+ include = self .include ,
195+ exclude = self .exclude ,
196+ )
197+ return data
165198
166199
167200def reorder_columns (ldesc : Sequence ["Series" ]) -> List [Hashable ]:
@@ -175,32 +208,6 @@ def reorder_columns(ldesc: Sequence["Series"]) -> List[Hashable]:
175208 return names
176209
177210
178- def select_columns (
179- frame : "DataFrame" ,
180- include : Optional [Union [str , Sequence [str ]]],
181- exclude : Optional [Union [str , Sequence [str ]]],
182- datetime_is_numeric : bool ,
183- ) -> DataFrame :
184- """Select columns to be described."""
185- if (include is None ) and (exclude is None ):
186- # when some numerics are found, keep only numerics
187- default_include = [np .number ]
188- if datetime_is_numeric :
189- default_include .append ("datetime" )
190- data = frame .select_dtypes (include = default_include )
191- if len (data .columns ) == 0 :
192- data = frame
193- elif include == "all" :
194- if exclude is not None :
195- msg = "exclude must be None when include is 'all'"
196- raise ValueError (msg )
197- data = frame
198- else :
199- data = frame .select_dtypes (include = include , exclude = exclude )
200-
201- return data
202-
203-
204211def describe_numeric_1d (series : "Series" , percentiles : Sequence [float ]) -> Series :
205212 """Describe series containing numerical data.
206213
0 commit comments