Copy the app link with current input parameters. They will be displayed in the URL.
import
numpy
as
np
import
matplotlib
.
pyplot
as
plt
from
sklearn
.
linear_model
import
LogisticRegression
from
pandas
import
DataFrame
,
Series
import
seaborn
as
sns
import
pandas
as
pd
# Load data into a dataframe & print
df
=
pd
.
read_csv
(
'admit_data.csv'
)
df
.
head
(
)
English
float64
Math
float64
0
35
54.625
1
26.5
68.5
2
41
57
3
21.5
42
4
46
84
# Print out feature names to get exact names (some have spaces) using .columns
df
.
columns
# Get rid of spaces in feature names using df.columns = [ list of new names in quotes]
df
.
columns
=
[
'English'
,
'Math'
,
'Outcome'
]
# Create new feature called 'Admit' using map or apply with values "yes" or "no"
# For this case it's easier to use .map as we did for iris dataset
df
[
'Admit'
]
=
df
[
'Outcome'
]
.
map
(
{
0
:
'no'
,
1
:
'yes'
}
)
# Set background grid for Seaborn & plot Math vs English with color indicating whether the student was admitted
# or not
sns
.
set_style
(
'whitegrid'
)
sns
.
relplot
(
x
=
'Math'
,
y
=
'English'
,
data
=
df
,
hue
=
'Outcome'
)
# Create target 1D array and 2D data array dimensioned 100 by 2
# create target array
# Get NumPy array for english and math scores, stack them and take transpose
# Remember that np.vstack takes 1 argument so put your arrays in ( ) and remember to use .T to transpose
#create 2d array
df
[
'target'
]
=
admissions
.
target
df
.
head
(
)
Execution error
NameError
:
name 'admissions' is not defined
# split our data into a training set and a test set; we choose 75-25 split
# We are splitting the entire data set and target
# first import command fro scikit-learn
from
sklearn
.
model_selection
import
train_test_split
# split the data
from
sklearn
.
model_selection
import
train_test_split
target
=
df
.
target
.
values
(
X_train
,
X_test
,
y_train
,
y_test
)
=
train_test_split
(
X
,
target
,
test_size
+
.2
)
n_train
=
len
(
X_train
)
;
n_test
=
len
(
X_test
)
print
(
n_train
,
n_test
)
# Print out length of each set
Execution error
AttributeError
:
'DataFrame' object has no attribute 'target'
# Now use logistic regression on training set; see how well we do
lr
=
LogisticRegression
(
solver
=
'lbfgs'
)
X1
=
X_train
[
:
,
0
]
;
X1
=
np
.
reshape
(
X1
,
(
n_train
,
1
)
)
# Create model
# Fit with training set
lr
.
fit
(
X1
,
y_train
)
# Calculate training score using .score(X_train, y_train) & print out percent accuracy
print
(
(
"The percent accuracy using mean radius on training set is "
,
\
100
*
lr
.
score
(
X1
,
y_train
)
)
)
Execution error
NameError
:
name 'X_train' is not defined
# Now see how well model does on test set using .score which requires 2 arguments
print
(
'¯\_(ツ)_/¯ '
)
¯\_(ツ)_/¯
# We want to plot the prediction for each data point so first we add a column to dataframe with prediction
# To do this, predict all data using .predict; print out score
# predict all data
# Add column to dataframe
print
(
'¯\_(ツ)_/¯ '
)
¯\_(ツ)_/¯
# Add column to dataframe for this prediction as we did before with .map