Untitled
3 years ago in Plain Text
import pandas as pd
data = pd.read_csv('/datasets/visits.csv', sep='\t')
data['local_time'] = pd.to_datetime(
data['date_time'], format='%Y-%m-%dT%H:%M:%S'
) + pd.Timedelta(hours=3)
data['date_hour'] = data['local_time'].dt.round('1H')
data['too_fast'] = data['time_spent'] < 60
data['too_slow'] = data['time_spent'] > 1000
too_fast_stat = data.pivot_table(index='id', values='too_fast')
print(len(data))
good_ids = too_fast_stat.query('too_fast < 0.5')
good_data = data.query('id in @good_ids.index')
print(len(good_data))