Week 12 Practice Notebook - Duplicate

import numpy as np import matplotlib . pyplot as plt from sklearn . linear_model import LogisticRegression from pandas import DataFrame , Series import seaborn as sns import pandas as pd

# Load data into a dataframe & print df = pd . read_csv ( 'admit_data.csv' ) df . head ( )

English float64

Math float64

54.625

26.5

68.5

21.5

# Print out feature names to get exact names (some have spaces) using .columns df . columns

# Get rid of spaces in feature names using df.columns = [ list of new names in quotes] df . columns = [ 'English' , 'Math' , 'Outcome' ]

# Create new feature called 'Admit' using map or apply with values "yes" or "no" # For this case it's easier to use .map as we did for iris dataset df [ 'Admit' ] = df [ 'Outcome' ] . map ( { 0 : 'no' , 1 : 'yes' } )

# Set background grid for Seaborn & plot Math vs English with color indicating whether the student was admitted # or not sns . set_style ( 'whitegrid' ) sns . relplot ( x = 'Math' , y = 'English' , data = df , hue = 'Outcome' )

# Create target 1D array and 2D data array dimensioned 100 by 2 # create target array # Get NumPy array for english and math scores, stack them and take transpose # Remember that np.vstack takes 1 argument so put your arrays in ( ) and remember to use .T to transpose #create 2d array df [ 'target' ] = admissions . target df . head ( )

Execution error

NameError : name 'admissions' is not defined

# split our data into a training set and a test set; we choose 75-25 split # We are splitting the entire data set and target # first import command fro scikit-learn from sklearn . model_selection import train_test_split # split the data from sklearn . model_selection import train_test_split target = df . target . values ( X_train , X_test , y_train , y_test ) = train_test_split ( X , target , test_size + .2 ) n_train = len ( X_train ) ; n_test = len ( X_test ) print ( n_train , n_test ) # Print out length of each set

Execution error

AttributeError : 'DataFrame' object has no attribute 'target'

# Now use logistic regression on training set; see how well we do lr = LogisticRegression ( solver = 'lbfgs' ) X1 = X_train [ : , 0 ] ; X1 = np . reshape ( X1 , ( n_train , 1 ) ) # Create model # Fit with training set lr . fit ( X1 , y_train ) # Calculate training score using .score(X_train, y_train) & print out percent accuracy print ( ( "The percent accuracy using mean radius on training set is " , \ 100 * lr . score ( X1 , y_train ) ) )

Execution error

NameError : name 'X_train' is not defined

# Now see how well model does on test set using .score which requires 2 arguments print ( '¯\_(ツ)_/¯ ' )


                   
                    ¯\_(ツ)_/¯

# We want to plot the prediction for each data point so first we add a column to dataframe with prediction # To do this, predict all data using .predict; print out score # predict all data # Add column to dataframe print ( '¯\_(ツ)_/¯ ' )


                   
                    ¯\_(ツ)_/¯

# Add column to dataframe for this prediction as we did before with .map