date or other data normalization general solution in pandas

鱼市口 / 2023-05-11 / 原文

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import time
 
# of course you can use basic pandas api doing this job, but I'd prefer general solution hereby
def convert_to_timestamp(x):
    """Convert date objects to integers"""
    return time.mktime(x.to_datetime().timetuple())


def normalize(df):
    """Normalize the DF using min/max"""
    scaler = MinMaxScaler(feature_range=(-1, 1))
    dates_scaled = scaler.fit_transform(df['dates'])
    return dates_scaled
 
 
if __name__ == '__main__':
    # Create a random series of dates
    df = pd.DataFrame({
        'dates':
            ['1980-01-01', '1980-02-02', '1980-03-02', '1980-01-21',
             '1981-01-21', '1991-02-21', '1991-03-23']
    })

    # Convert to date objects
    df['dates'] = pd.to_datetime(df['dates'])

    # Now df has date objects like you would, we convert to UNIX timestamps
    df['dates'] = df['dates'].apply(convert_to_timestamp)

    # Call normalization function
    df = normalize(df)