tools/ml-auto-eda/ml_eda/reporting/formatting.py (239 lines of code) (raw):

# Copyright 2019 Google Inc. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Control formating of the report""" from typing import Text, Callable, List, Optional from ml_eda.reporting import template from ml_eda.reporting import content_generator as cg WARNING_STR = 'warning' RECOMMEND_STR = 'recommend' def numeric_formatting(value: float, width: int = 10) -> Text: """Consistently formatting numerical value with fix width. If the length is longer than the specified width, scientific notation will be used. Args: value: numerical value to be formatted width: displaying width Returns: Formatted value in string type. """ formatting_string = "{{0:{width}.{width}}}".format(width=width) return formatting_string.format(float(value)) class SectionMeta: """Hold metadata required to define a structure of a report section.""" # pylint: disable-msg=too-many-instance-attributes # pylint: disable-msg=too-many-arguments def __init__(self, section_name: Text, section_title: Optional[Text], section_title_template: Optional[Text], section_description: Optional[Text], section_content_generator: Optional[Callable], skip_if_no_content: bool, generate_warning: bool, generate_recommend: bool, dependency: List ): self.section_name = section_name self.section_title = section_title self.section_title_template = section_title_template self.section_description = section_description self.section_content_generator = section_content_generator self.skip_if_no_content = skip_if_no_content self.generate_warning = generate_warning self.generate_recommend = generate_recommend self.dependency = dependency def __repr__(self): return ''' name: {section_name} title: {section_title}'''.format(section_name=self.section_name, section_title=self.section_title) title = SectionMeta( section_name='title', section_title="Exploratory Data Analysis Report", section_title_template=template.REPORT_TITLE, section_description=None, section_content_generator=None, skip_if_no_content=False, generate_warning=False, generate_recommend=False, dependency=[] ) dataset_info = SectionMeta( section_name='dataset_info', section_title=None, section_title_template=None, section_description=None, section_content_generator=cg.create_dataset_info_section, skip_if_no_content=True, generate_warning=False, generate_recommend=False, dependency=[] ) descriptive = SectionMeta( section_name='descriptive', section_title="Descriptive Analysis", section_title_template=template.SECTION_TITLE, section_description=None, section_content_generator=cg.create_descriptive_section, skip_if_no_content=True, generate_warning=True, generate_recommend=False, dependency=[] ) pearson_correlation = SectionMeta( section_name='pearson_correlation', section_title="Pearson Correlation", section_title_template=template.SUB_SUB_SECTION_TITLE, section_description=None, section_content_generator=cg.create_pearson_correlation_section, skip_if_no_content=True, generate_warning=True, generate_recommend=False, dependency=[] ) correlation_numerical = SectionMeta( section_name='correlation_numerical', section_title="Numerical Attributes Correlation", section_title_template=template.SUB_SECTION_TITLE, section_description=None, section_content_generator=None, skip_if_no_content=True, generate_warning=False, generate_recommend=False, dependency=[pearson_correlation] ) contingency_table = SectionMeta( section_name='contingency_table', section_title="Contingency Table", section_title_template=template.SUB_SUB_SECTION_TITLE, section_description=None, section_content_generator=cg.create_contingency_table_section, skip_if_no_content=True, generate_warning=False, generate_recommend=False, dependency=[] ) information_gain = SectionMeta( section_name='information_gain', section_title="Information Gain", section_title_template=template.SUB_SUB_SECTION_TITLE, section_description=None, section_content_generator=cg.create_information_gain_section, skip_if_no_content=True, generate_warning=False, generate_recommend=False, dependency=[] ) chi_test = SectionMeta( section_name='chi_test', section_title="Chi-square Statistical Test", section_title_template=template.SUB_SUB_SECTION_TITLE, section_description=None, section_content_generator=cg.create_chi_square_section, skip_if_no_content=True, generate_warning=True, generate_recommend=False, dependency=[] ) correlation_categorical = SectionMeta( section_name='correlation_categorical', section_title="Categorical Attributes Correlation", section_title_template=template.SUB_SECTION_TITLE, section_description=None, section_content_generator=None, skip_if_no_content=True, generate_warning=False, generate_recommend=False, dependency=[contingency_table, information_gain, chi_test] ) table_descriptive = SectionMeta( section_name='table_descriptive', section_title="Descriptive Table", section_title_template=template.SUB_SUB_SECTION_TITLE, section_description=None, section_content_generator=cg.create_table_descriptive_section, skip_if_no_content=True, generate_warning=False, generate_recommend=False, dependency=[] ) anova_test = SectionMeta( section_name='anova_test', section_title="ANOVA Statistical Test", section_title_template=template.SUB_SUB_SECTION_TITLE, section_description=None, section_content_generator=cg.create_anova_section, skip_if_no_content=True, generate_warning=True, generate_recommend=False, dependency=[] ) correlation_numerical_categorical = SectionMeta( section_name='correlation_numerical_categorical', section_title="Numerical and Categorical Attributes Correlation", section_title_template=template.SUB_SECTION_TITLE, section_description=None, section_content_generator=None, skip_if_no_content=True, generate_warning=False, generate_recommend=False, dependency=[table_descriptive, anova_test] ) correlation_analysis = SectionMeta( section_name='correlation_analysis', section_title="Correlation Analysis", section_title_template=template.SECTION_TITLE, section_description=None, section_content_generator=None, skip_if_no_content=True, generate_warning=False, generate_recommend=False, dependency=[correlation_numerical, correlation_categorical, correlation_numerical_categorical] ) target_highlight = SectionMeta( section_name='target_highlight', section_title="Highlights of Target Attribute", section_title_template=template.SECTION_TITLE, section_description=None, section_content_generator=cg.create_target_highlight_section, skip_if_no_content=True, generate_warning=False, generate_recommend=True, dependency=[] ) warning = SectionMeta( section_name=WARNING_STR, section_title="Warning", section_title_template=template.SUB_SECTION_TITLE, section_description=None, section_content_generator=None, skip_if_no_content=True, generate_warning=False, generate_recommend=False, dependency=[] ) recommend = SectionMeta( section_name=RECOMMEND_STR, section_title="Recommendation", section_title_template=template.SUB_SUB_SECTION_TITLE, section_description=None, section_content_generator=None, skip_if_no_content=True, generate_warning=False, generate_recommend=False, dependency=[] ) summary = SectionMeta( section_name='summary', section_title="Summary of warning and recommendation", section_title_template=template.SECTION_TITLE, section_description=None, section_content_generator=None, skip_if_no_content=True, generate_warning=False, generate_recommend=False, dependency=[warning, recommend] ) REPORT_STRUCTURE = ( title, dataset_info, descriptive, correlation_analysis, correlation_numerical, pearson_correlation, correlation_categorical, contingency_table, information_gain, chi_test, correlation_numerical_categorical, table_descriptive, anova_test, target_highlight, recommend, )