Source code for heatpro.check.check_data_format

import pandas as pd

ENERGY_FEATURE_NAME = "thermal_energy_kWh"

[docs] def check_datetime_index(dataframe: pd.DataFrame) -> bool: """ Check if the index of a DataFrame is in datetime format. Parameters: dataframe (pd.DataFrame): data Returns: True if the index is in datetime format, False otherwise. """ return isinstance(dataframe.index, pd.DatetimeIndex)
[docs] def check_energy_feature(dataframe: pd.DataFrame) -> bool: """ Check if the DataFrame contains a column with energy information. Args: dataframe (pd.DataFrame): data Returns: bool: True if column ENERGY_FEATURE_NAME is present, False otherwise """ return ENERGY_FEATURE_NAME in dataframe.columns
[docs] def find_duplicate_years(datetime_index: pd.DatetimeIndex) -> list: return list(datetime_index.year.value_counts()[datetime_index.year.value_counts()>1].index)
[docs] def find_duplicate_months(datetime_index: pd.DatetimeIndex): """ Find and return a DataFrame containing (year, month) tuples that appear more than once in the given DatetimeIndex. Parameters: datetime_index: pd.DatetimeIndex Returns: DataFrame with columns 'Year', 'Month' representing (year, month) tuples with multiple appearances. """ # Create a DataFrame with Year and Month columns df = pd.DataFrame({'Year': datetime_index.year, 'Month': datetime_index.month}) # Group by Year and Month, count occurrences, and filter duplicates duplicates_df = df[df.duplicated(subset=['Year', 'Month'], keep=False)].drop_duplicates(keep='first') return duplicates_df[['Year', 'Month']]
[docs] def find_duplicate_days(datetime_index: pd.DatetimeIndex): """ Find and return a DataFrame containing (year, month, day) tuples that appear more than once in the given DatetimeIndex. Parameters: datetime_index: pd.DatetimeIndex Returns: DataFrame with columns 'Year', 'Month', 'Day' representing (year, month, day) tuples with multiple appearances. """ # Create a DataFrame with Year and Month columns df = pd.DataFrame({'Year': datetime_index.year, 'Month': datetime_index.month, 'Day': datetime_index.day}) # Group by Year and Month, count occurrences, and filter duplicates duplicates_df = df[df.duplicated(subset=['Year', 'Month', 'Day'], keep=False)].drop_duplicates(keep='first') return duplicates_df[['Year', 'Month', 'Day']]
[docs] def find_duplicate_hours(datetime_index: pd.DatetimeIndex): """ Find and return a DataFrame containing (year, month, day, hour) tuples that appear more than once in the given DatetimeIndex. Parameters: datetime_index: pd.DatetimeIndex Returns: DataFrame with columns 'Year', 'Month', 'Day', 'Hour' representing (year, month, day, hour) tuples with multiple appearances. """ # Create a DataFrame with Year and Month columns df = pd.DataFrame({'Year': datetime_index.year, 'Month': datetime_index.month, 'Day': datetime_index.day, 'Hour':datetime_index.hour}) # Group by Year and Month, count occurrences, and filter duplicates duplicates_df = df[df.duplicated(subset=['Year', 'Month', 'Day', 'Hour'], keep=False)].drop_duplicates(keep='first') return duplicates_df[['Year', 'Month', 'Day', 'Hour']]
[docs] def find_xor_months(df_left: pd.DataFrame, df_right: pd.DataFrame) -> pd.DataFrame: """Find month that are not in both index Args: df_left (pd.DataFrame): left DataFrame df_right (pd.DataFrame): right DataFrame Returns: pd.DataFrame: Dataframe showing of month that are not in both index """ df = pd.merge( pd.DataFrame({'Year':df_left.index.year,'Month':df_left.index.month}), pd.DataFrame({'Year':df_right.index.year,'Month':df_right.index.month}), on=['Year','Month'], how='outer', indicator=True) df.index = df.reset_index(drop=True).index return df[df['_merge']!='both']
[docs] def find_xor_dates(df_left: pd.DataFrame, df_right: pd.DataFrame) -> pd.DataFrame: """Find dates that are not in both index Args: df_left (pd.DataFrame): left DataFrame df_right (pd.DataFrame): right DataFrame Returns: pd.DataFrame: Dataframe showing of dates that are not in both index """ df = pd.merge( pd.DataFrame({'Date':df_left.index.date}), pd.DataFrame({'Date':df_right.index.date}), on=['Date'], how='outer', indicator=True) df.index = df.reset_index(drop=True).index return df[df['_merge']!='both']
[docs] def find_xor_hour(df_left: pd.DataFrame, df_right: pd.DataFrame) -> pd.DataFrame: """Find hours that are not in both index Args: df_left (pd.DataFrame): left DataFrame df_right (pd.DataFrame): right DataFrame Returns: pd.DataFrame: Dataframe showing of hours that are not in both index """ df = pd.merge( pd.DataFrame({'Date':df_left.index.date,'Hour':df_left.index.hour}), pd.DataFrame({'Date':df_right.index.date,'Hour':df_right.index.hour}), on=['Date'], how='outer', indicator=True) df.index = df.reset_index(drop=True).index return df[df['_merge']!='both']