1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| import joblib import pandas as pd import psycopg2 from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.pipeline import Pipeline from sklearn.svm import SVC
db_connect = psycopg2.connect(host="localhost", database="Breast_Cancer", user="zerohertz", password="qwer123!", port="1234") df = pd.read_sql("SELECT * FROM breast_cancer_data ORDER BY id DESC LIMIT 100", db_connect) X, y = df.drop(["id", "timestamp", "target"], axis="columns"), df["target"] X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, random_state=2023)
model_pipeline = Pipeline([("scaler", StandardScaler()), ("svc", SVC())]) model_pipeline.fit(X_train, y_train)
train_pred = model_pipeline.predict(X_train) valid_pred = model_pipeline.predict(X_valid)
train_acc = accuracy_score(y_true=y_train, y_pred=train_pred) valid_acc = accuracy_score(y_true=y_valid, y_pred=valid_pred)
print("Train Accuracy :", train_acc) print("Valid Accuracy :", valid_acc)
joblib.dump(model_pipeline, "Etc./db_pipeline.joblib")
df.to_csv("Etc./DB.csv", index=False)
|