1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
#------------------ # Data Preparation #------------------ #Read datasets #Download the data from http://www.saedsayad.com/datasets/CreditData.zip train <- read.csv("Credit_train.csv") test <- read.csv("Credit_test.csv") #Rows and Cols dim(train) dim(test) #Columns name colnames(train) colnames(test) #Show head(train) head(test) #------------------------------------------------------------------- # Data Exploration - Bivariate analysis - Categorical and Numerical #------------------------------------------------------------------- #DEFAULT and MAXLINEUTIL boxplot(MAXLINEUTIL~DEFAULT, data=train, main="Maximum number of lines being utilized" , sub="train", col="darkgreen", xlab="DEFAULT", ylab="MAXLINEUTIL") boxplot(MAXLINEUTIL~DEFAULT, data=test, main="Maximum number of lines being utilized" , sub="test", col="brown", xlab="DEFAULT", ylab="MAXLINEUTIL") #DEFAULT and DAYSDELQ boxplot(DAYSDELQ~DEFAULT, data=train, main="Number of delinquent days" , sub="train", col="darkgreen", xlab="DEFAULT", ylab="DAYSDELQ") boxplot(DAYSDELQ~DEFAULT, data=test, main="Number of delinquent days" , sub="test", col="brown", xlab="DEFAULT", ylab="DAYSDELQ") #DEFAULT and TOTACBAL boxplot(TOTACBAL~DEFAULT, data=train, main="Total balance of business account" , sub="train", col="darkgreen", xlab="DEFAULT", ylab="TOTACBAL") boxplot(TOTACBAL~DEFAULT, data=test, main="Total balance of business account" , sub="test", col="brown", xlab="DEFAULT", ylab="TOTACBAL") #Z-test for two variables - DEFAULT and DAYSDELQ train <- na.omit(train) a <- subset(train$DAYSDELQ,train$DEFAULT=='Y') b <- subset(train$DAYSDELQ,train$DEFAULT=='N') n1 <- length(a) n2 <- length(b) z <- (mean(a) - mean(b)) / (sqrt(var(a)/n1 + var(b)/n2)) pz <- 1-(pnorm(abs(z))-0.5)*2 #ANOVA - BUSTYPE and TOTACBAL fit <- aov(train$TOTACBAL ~ train$BUSTYPE) summary(fit) |
Ejercicio en R Categoricas con Numericas
Mapa -> Exploración -> Bivariables -> Categoricas con Numericas