import pandas as pd
import numpy as np
from pathlib import PurePath
import os
import sys
import time
from math import log10, floor
[docs]def reshape_df(df, value_variable_list, cols_to_melt, melted_header, new_column_name):
"""
Parameters:
df: The DataFrame to melt.\n
value_variable_list: Column(s) list to use as identifier variables.\n
cols_to_melt: Column(s) list of columns to pivot (melt).\n
melted_header: The header for the column to be populated with the cols_to_melt list.\n
new_column_name: Name to use for the ‘Value’ column.
Returns:
A new DataFrame in long and narrow shape rather than the passed short and wide shape.
Note:
This function is not being used.
"""
df = df.melt(id_vars=value_variable_list,
value_vars=cols_to_melt, var_name=melted_header,
value_name=new_column_name)
return df
[docs]def convert_dollars_to_analysis_basis(df, deflators, dollar_basis, *args):
"""
This function converts dollars into a consistent dollar basis as set via the General Inputs file.
Parameters:
df: DataFrame; contains the monetized values and their associated input cost basis.\n
deflators: Dictionary; provides GDP deflators for use in adjusting monetized values throughout the tool into a consistent dollar basis.\n
dollar_basis: Numeric; the dollar basis to be used throughout the analysis as set via the General Inputs file.\n
args: String(s); the attributes within the passed df to be adjusted into 'dollar_basis' dollars.
Returns:
The passed DataFrame will all args adjusted into dollar_basis dollars.
"""
dollar_years = pd.Series(pd.DataFrame(df.loc[df['DollarBasis'] > 1])['DollarBasis'].unique())
for year in dollar_years:
for arg in args:
df.loc[df['DollarBasis'] == year, arg] = df[arg] * deflators[year]['adjustment_factor']
df.loc[df['DollarBasis'] == year, 'DollarBasis'] = dollar_basis
return df
[docs]def round_metrics(df, metrics, round_by):
"""
Parameters:
df: DataFrame containing data to be rounded.\n
metrics: List of metrics within the passed DataFrame for which rounding is requested.\n
round_by: A value that sets the level of rounding.
Returns:
The passed DataFrame with 'metrics' rounded by 'round_by'.
Note:
This function is not being used.
"""
df[metrics] = df[metrics].round(round_by)
return df
[docs]def round_sig(df, divisor=1, sig=0, *args):
"""
Parameters:
df: The DataFrame containing data to be expressed in 'sig' significant digits.\n
divisor: The divisor to use in calculating results.\n
sig: The number of significant digits to use for results.\n
args: The arguments to be expressed in 'sig' significant digits and in 'divisor' units.
Returns:
The passed DataFrame with args expressed in 'sig' significant digits and in 'divisor' units.
Note:
This function is not being used.
"""
for arg in args:
df.loc[(df[arg] != np.nan) & (df[arg] != 0), arg] \
= df.loc[(df[arg] != np.nan) & (df[arg] != 0), arg].apply(lambda x: round(x / divisor, sig-int(floor(log10(abs(x / divisor))))-1))
return df
[docs]def get_file_datetime(list_of_files):
"""
Parameters:
list_of_files: List; the files for which datetimes are required.
Returns:
A DataFrame of input files (full path) and corresponding datetimes (date stamps) for those files.
"""
file_datetime = pd.DataFrame()
file_datetime.insert(0, 'Item', [path_to_file for path_to_file in list_of_files])
file_datetime.insert(1, 'Results', [time.ctime(os.path.getmtime(path_to_file)) for path_to_file in list_of_files])
return file_datetime
[docs]def get_common_metrics(df_left, df_right, ignore=None):
"""
This function simply finds common metrics between 2 DataFrames being merged to ensure a safe merge.
Parameters:
df_left: The left DataFrame being merged.\n
df_right: The right DataFrame being merged.\n
ignore: Any columns (arguments) to ignore when finding common metrics.
Returns:
A DataFrame merged on the common arguments (less any ignored arguments).
Note:
This function is not being used.
"""
if ignore:
cols_left = df_left.columns.tolist()
cols_right = df_right.columns.tolist()
for item in ignore:
if item in cols_left:
cols_left.remove(item)
if item in cols_right:
cols_right.remove(item)
cols = [col for col in df_left[cols_left] if col in df_right[cols_right]]
else:
cols = [col for col in df_left.columns if col in df_right.columns]
if cols != []:
return cols
else:
return
[docs]def save_dict_to_csv(dict_to_save, save_path, row_header=None, *args):
"""
Parameters:
dict_to_save: Dictionary; a dictionary having a tuple of args as keys.\n
save_path: Path object; the path for saving the passed CSV.\n
row_header: List; the column names to use as the row header for the preferred structure of the output file.
args: String(s); the attributes contained in the tuple key - these will be pulled out and named according to the passed arguments.
Returns:
A CSV file with individual key elements split out into columns with args as names.
"""
print('Saving dictionary to CSV.')
df = pd.DataFrame(dict_to_save).transpose()
df.reset_index(inplace=True)
for idx, arg in enumerate(args):
if arg in df.columns:
df.drop(columns=f'level_{idx}', inplace=True)
else:
df.rename(columns={f'level_{idx}': arg}, inplace=True)
if row_header and 'yearID' not in df.columns.tolist():
df.insert(0, 'yearID', df[['modelYearID', 'ageID']].sum(axis=1))
cols = [col for col in df.columns if col not in row_header]
df = pd.DataFrame(df, columns=row_header + cols)
df.to_csv(f'{save_path}.csv', index=False)
return