Data Scrubbing

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import ensemble
from sklearn.metrics import mean_absolute_error
df = pd.read_csv("f:/brain/memory/Melbourne_housing_FULL.csv")
del df['Address']
del df['Method']
del df['SellerG']
del df['Date']
del df['Postcode']
del df['Lattitude']
del df['Longtitude']
del df['Regionname']
del df['Propertycount']
df.columns
features_df = pd.get_dummies(df, columns = ['Suburb','CouncilArea','Type'])
del features_df['Price']
x = features_df.values
y = df['Price'].values
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, shuffle = True)
model = ensemble.GradientBoostingRegressor(
    n_estimators = 150,
    learning_rate = 0.1,
    max_depth = 30,
    min_samples_split = 4,
    min_samples_leaf = 6,
    max_features = 0.6,
    loss = 'huber'
)
model.fit(x_train, y_train)
mse = mean_absolute_error(y_train, model.predict(x_train))
print("Training Set Mean Absolute Error: %.2f" % mse)
mse = mean_absolute_erro(y_test, model.predict(x_test))
print("Test Set Mean Absolute Error: %.2f" % mse)