1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
| import time from argparse import ArgumentParser
import pandas as pd import psycopg2 from sklearn.datasets import load_breast_cancer
def get_data(): X, y = load_breast_cancer(return_X_y=True, as_frame=True) df = pd.concat([X, y], axis="columns") rename_rule = { 'mean radius': 'Feature_A', 'mean texture': 'Feature_B', 'mean perimeter': 'Feature_C', 'mean area': 'Feature_D', 'mean smoothness': 'Feature_E', 'mean compactness': 'Feature_F', 'mean concavity': 'Feature_G', 'mean concave points': 'Feature_H', 'mean symmetry': 'Feature_I', 'mean fractal dimension': 'Feature_J', 'radius error': 'Feature_K', 'texture error': 'Feature_L', 'perimeter error': 'Feature_M', 'area error': 'Feature_N', 'smoothness error': 'Feature_O', 'compactness error': 'Feature_P', 'concavity error': 'Feature_Q', 'concave points error': 'Feature_R', 'symmetry error': 'Feature_S', 'fractal dimension error': 'Feature_T', 'worst radius': 'Feature_U', 'worst texture': 'Feature_V', 'worst perimeter': 'Feature_W', 'worst area': 'Feature_X', 'worst smoothness': 'Feature_Y', 'worst compactness': 'Feature_Z', 'worst concavity': 'Feature_AA', 'worst concave points': 'Feature_BB', 'worst symmetry': 'Feature_CC', 'worst fractal dimension': 'Feature_DD' } df = df.rename(columns=rename_rule) return df
def create_table(db_connect): create_table_query = """ CREATE TABLE IF NOT EXISTS Breast_Cancer_Data ( id SERIAL PRIMARY KEY, timestamp timestamp, Feature_A float8, Feature_B float8, Feature_C float8, Feature_D float8, Feature_E float8, Feature_F float8, Feature_G float8, Feature_H float8, Feature_I float8, Feature_J float8, Feature_K float8, Feature_L float8, Feature_M float8, Feature_N float8, Feature_O float8, Feature_P float8, Feature_Q float8, Feature_R float8, Feature_S float8, Feature_T float8, Feature_U float8, Feature_V float8, Feature_W float8, Feature_X float8, Feature_Y float8, Feature_Z float8, Feature_AA float8, Feature_BB float8, Feature_CC float8, Feature_DD float8, target int );""" print(create_table_query) with db_connect.cursor() as cur: cur.execute(create_table_query) db_connect.commit()
def insert_data(db_connect, data): insert_row_query = f""" INSERT INTO Breast_Cancer_Data (timestamp, Feature_A, Feature_B, Feature_C, Feature_D, Feature_E, Feature_F, Feature_G, Feature_H, Feature_I, Feature_J, Feature_K, Feature_L, Feature_M, Feature_N, Feature_O, Feature_P, Feature_Q, Feature_R, Feature_S, Feature_T, Feature_U, Feature_V, Feature_W, Feature_X, Feature_Y, Feature_Z, Feature_AA, Feature_BB, Feature_CC, Feature_DD, target) VALUES ( NOW(), {data.Feature_A}, {data.Feature_B}, {data.Feature_C}, {data.Feature_D}, {data.Feature_E}, {data.Feature_F}, {data.Feature_G}, {data.Feature_H}, {data.Feature_I}, {data.Feature_J}, {data.Feature_K}, {data.Feature_L}, {data.Feature_M}, {data.Feature_N}, {data.Feature_O}, {data.Feature_P}, {data.Feature_Q}, {data.Feature_R}, {data.Feature_S}, {data.Feature_T}, {data.Feature_U}, {data.Feature_V}, {data.Feature_W}, {data.Feature_X}, {data.Feature_Y}, {data.Feature_Z}, {data.Feature_AA}, {data.Feature_BB}, {data.Feature_CC}, {data.Feature_DD}, {data.target} ); """ print(insert_row_query) with db_connect.cursor() as cur: cur.execute(insert_row_query) db_connect.commit()
def generate_data(db_connect, df): for _ in range(50): insert_data(db_connect, df.sample(1).squeeze()) time.sleep(1)
if __name__ == "__main__": parser = ArgumentParser() parser.add_argument("--db-host", dest="db_host", type=str, default="localhost") args = parser.parse_args()
db_connect = psycopg2.connect( user="zerohertz", password="qwer123!", host=args.db_host, port=5432, database="Breast_Cancer", ) create_table(db_connect) df = get_data() generate_data(db_connect, df)
|