I have a column 'ID', the values of which need to be filled in 10 other columns. Filling to produce line by line. If empty cells remain, fill them with zeros.
Make the most of pandas
ID = [3, 2, 2, 10, 2, 1, 7, 5, 8, 9, 3, 1 ...]
def prepare_train_set(path_to_csv_files, session_length=10): df = pd.read_csv(path_to_csv_files) df['ID'] = pd.factorize(df.site)[0] df['frequency'] = df.groupby('ID', as_index=False)['site'].transform(lambda s: s.count()) dictionary = df[['site', 'ID', 'frequency']].loc[pd.unique(df['ID'])] dct = dictionary.set_index('site').T.to_dict('list') df2 = pd.DataFrame(columns=['site1','site2' ,'site3 ','site4 ', 'site5','site6',\ 'site7 ', 'site8', 'site9', 'site10' , 'user_id']) print(df) print("======") print(dct) print("======") return dct import glob for path in glob.glob(os.path.join(PATH_TO_DATA,'3users/user*[0-9].csv')): #print(os.path.isfile(path)) prepare_train_set(path, 10) 