quantargo · nandakallugjeri · May 8, 2019 · May 8, 2019 · May 8, 2019 · May 8, 2019
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,18 @@
+Package: bmarketing
+Title: package for some marketing data analysis
+Version: 0.0.1
+Authors@R: 
+    person(given = "Group 2",
+           family = "GoIT DS",
+           role = c("aut", "cre"),
+           email = "[email protected]",
+           comment = c(ORCID = "YOUR-ORCID-ID"))
+Description: decision tree model to predict if customer signing a term deposit
+License: MIT + file LICENSE
+Encoding: UTF-8
+LazyData: true
+Depends:
+  rpart.plot
+Imports:
+  rpart
+RoxygenNote: 6.1.1
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2019
+COPYRIGHT HOLDER: patrick sibetz
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,18 @@
+# Generated by roxygen2: do not edit by hand
+
+export(checkNA)
+export(cleanData)
+export(dtree)
+export(dtreeperf)
+export(dtreeplot)
+export(dtreepredict)
+export(dtreesummary)
+export(logit)
+export(standardize)
+export(trans)
+export(translog)
+importFrom(rpart,rpart)
+importFrom(stats,glm)
+importFrom(stats,na.omit)
+importFrom(stats,predict)
+importFrom(stats,sd)
diff --git a/R/checkNA.R b/R/checkNA.R
@@ -0,0 +1,22 @@
+#' Checking for NA values considering all the variables present in the dataset
+#' 
+#' @param ds dataframe object
+#' 
+#' 
+#' @return 
+#' Return an error message indicating how many columns have NAs present, in case there is any.
+#' 
+#' @export
+
+checkNA <- function(ds){
+  # Check if any NA's are found in the whole dataset.
+  newdata <- na.omit(ds)
+
+
+  if( nrow(newdata)==nrow(ds) )
+    print('No empty records found in the dataframe')
+  else 
+  print( paste('There are',nrow(ds)-nrow(newdata), 'rows having NAs' ))
+
+}
+
diff --git a/R/cleanData.R b/R/cleanData.R
@@ -0,0 +1,68 @@
+#' Checking the quality of data.
+#' 
+#' Check if Target variable for the model has NA values. 
+#' It also checks all the variables for NAs and removes the column if more then half of the values are NAs.
+#' 
+#' @param ds dataframe object
+#' @param targetVar name of the target variable of the dataframe \code ds
+#' @param replaceNAs True or False, if you want to replace NAs with mean value.
+#' 
+#' @return 
+#' Return the cleaned dataframe and prints warning messages in case there are any.
+#' 
+#' @importFrom stats sd predict na.omit
+#' 
+#' @export
+cleanData <- function( ds, targetVar, replaceNAs=FALSE ){
+
+  #Firstly checking if the target Variable parameter actually exists inside the dataframe.
+  if (any( colnames(ds)  ==  targetVar ) == FALSE)
+    stop( paste(targetVar, "variable not part of the dataframe passed"))
+
+  #Secondly checking if target variable contains any NA's values.
+  if(length(which(is.na(ds$targetVar)==TRUE)>0)){
+    stop("Missing Value found in the target column")
+  } else{
+    print("Target Variable looks clean. No NA values")
+  }
+
+  #Thirdly checking if there is any NA values inside the dataframe. / 
+  # and replace iff replaceNAs=TRUE NAs with mean
+  if(replaceNAs){
+    for(i in 1:dim(ds)[2]){
+      if(is.numeric(ds[,i])){
+        ds[is.na(ds[,i]),i] <- mean(ds[,i], na.rm = TRUE)
+        warning(paste(colnames(ds)[i],"has been imputed with mean values."))
+      }else{
+        ds[is.na(ds[,i]),i]<-"MISSING"
+        warning(paste("For variable",colnames(ds)[i],"category MISSING was defined."))
+      }
+    }
+  }else {
+    checkNA(ds)
+
+    #Forth, find which columns contain NAs and remove these columns in case more then half of the values
+    #are NAs. 
+
+    # flag[1:dim(ds)[2]]<-FALSE
+    flag <- c(logical(dim(ds)[2]) )
+
+    for (i in 1:dim(ds)[2]){
+
+      if((nrow(ds)-nrow(na.omit(ds[i])))/nrow(ds)>0.5)
+      {
+        warning(paste(colnames(ds)[i],"has more than half NA's, and was excluded from the sample"));
+        flag[i]<-TRUE   
+      }
+      if(flag[i]==FALSE && any(is.na(ds[i])))warning(paste(colnames(ds)[i],"has NA values!"))  
+
+    } 
+
+    ds<-ds[!flag]
+
+  }
+
+  #Return the cleaned dataframe.
+  return(ds)
+
+}
diff --git a/R/dtree.R b/R/dtree.R
@@ -0,0 +1,66 @@
+#' dtree
+#' @description fits the decision tree model based on given parameters
+#' @param udata input data for the model.
+#' @param target the name of the target variable from the input data (string) for example: target_name="Y" 
+#' @return A decision tree model.
+#' @importFrom rpart rpart
+#' 
+#' @export
+
+dtree <- function(udata,target) { 
+  rpart(as.formula(paste(target, "~ .")),data=udata,model=TRUE)
+}
+
+
+#' dtreesummary
+#' @description returns the summary of the chosen model
+#' @param dt_model model that we want to summarize
+#' @return a summary. :)
+#' 
+#' @export
+#' 
+dtreesummary<-function(dt_model){
+  summary(dt_model)
+}
+
+#' dtreeplot
+#' @description returns the plot of the chosen decision tree model
+#' @param dt_model model that we want to plot
+#' @return a plot. :)
+#' 
+#' @export
+#' 
+dtreeplot<-function(dt_model) {
+  rpart.plot(dt_model)
+}
+
+#' dtreepredict
+#' @description returns the prediction 
+#' @param dt_model model that we want to use to generate our predictions
+#' @param predictdata data that we want to score
+#' @return predictions
+#' 
+#' @export
+#' 
+dtreepredict<-function(dt_model,predictdata){
+
+  predictions <- predict(dt_model, predictdata, type = "class")
+  predictions
+}
+
+
+#' dtreeperf
+#' @description checks model accuracy
+#' @param target actual target in our data
+#' @param predictions predicted target
+#' @return accuracy
+#' 
+#' @export
+#' 
+dtreeperf<-function(target,predictions){
+  accuracy<-mean(target == predictions)
+  accuracy
+}
+
+
+
diff --git a/R/logit.R b/R/logit.R
@@ -0,0 +1,16 @@
+#' logit
+#' @description fits the logit model based on
+#' 
+#' @param target the name of the target variable from the input data (string) 
+#' @param udata Input data for the model.
+#' 
+#' @return logit model.
+#' 
+#' @importFrom stats glm
+#' 
+#' @export
+logit<-function(udata,target){
+  glm(as.formula(paste(target,"~.")) , data=udata,family = "binomial") 
+}
+
+
diff --git a/R/transform.R b/R/transform.R
@@ -0,0 +1,45 @@
+#' translog
+#'
+#' A simple log transformation
+#'
+#' @param x A numeric vector
+#' 
+#' @examples
+#' translog(exp(rnorm(7)))
+#' 
+#' @export
+translog<-function(x){
+    if ( !is.numeric(x) ) stop("Input must be numeric!")
+    if ( any(x<0) ) stop("Input must not be negative!")
+    x <- log(x)
+}
+
+#' trans
+#'
+#' A class transformation, wich converts numeric to factor and factor to numeric
+#'
+#' @param x A numeric or factor data
+#' 
+#' @export
+trans <- function(x){
+    if (is.numeric(x)){
+        x <- as.factor(x)
+    } else if (is.factor(x)) {
+        x <- as.numeric(as.character(x))
+    }
+}
+
+
+#' standardize
+#'
+#' standardize or normalize the range of independent variables or features of data
+#'
+#' @param x numeric data
+#' 
+#' @export
+standardize <- function(x){
+    if(!is.numeric(x)) stop("Input must be numeric!")    
+    x <- (x - mean(x)) / sd(x)
+    x
+}
+
diff --git a/README.Rmd b/README.Rmd
@@ -4,20 +4,50 @@ output: github_document
 
 <!-- README.md is generated from README.Rmd. Please edit that file -->
 
-```{r, echo = FALSE}
+```{r, include = FALSE}
 knitr::opts_chunk$set(
   collapse = TRUE,
   comment = "#>",
-  fig.path = "man/figures/README-"
+  fig.path = "man/figures/README-",
+  out.width = "100%"
 )
 ```
+# bmarketing
 
-[![Travis Build Status](https://travis-ci.org/Quantargo/bmarketing.svg?branch=master)](https://travis-ci.org/Quantargo/bmarketing)
-[![Coverage Status](https://img.shields.io/codecov/c/github/Quantargo/bmarketing/master.svg)](https://codecov.io/github/Quantargo/bmarketing?branch=master)
+<!-- badges: start -->
+<!-- badges: end -->
 
-## Overview
+The goal of bmarketing package is to create decision tree model and use it to generate predictions based on provided dataset. It is also able to clean the dataset before creating model or making predictions.
 
-The bmarketing dataset
+## Installation
+
+
+You can install the development version from [GitHub](https://github.com/) with:
+
+``` r
+# install.packages("devtools")
+devtools::install_github("nandakallugjeri/bmarketing")
+```
+## Functions
+
+Funcion documentation is accessible through ?functionname. For example, run 
+?checkNA to see the documentation for checkNA function.
+\item checkNA
+\item cleanData
+\item dtree
+\item dtreeplot
+\item dtreesummary
+\item dtreepredict
+\item dtreeperf
+\item translog
+\item trans
+\item standardize
+\item logit
+
+```{r}
+library(bmarketing)
+mytree<-dtree(bmarketing,"y")
+dtreeplot(mytree)
+```
 
-<!-- TODO: Change README to make it more descriptive, add examples, etc. -->
 
diff --git a/README.md b/README.md
@@ -1,14 +1,59 @@
 
 <!-- README.md is generated from README.Rmd. Please edit that file -->
 
-[![Travis Build
-Status](https://travis-ci.org/Quantargo/bmarketing.svg?branch=master)](https://travis-ci.org/Quantargo/bmarketing)
-[![Coverage
-Status](https://img.shields.io/codecov/c/github/Quantargo/bmarketing/master.svg)](https://codecov.io/github/Quantargo/bmarketing?branch=master)
+# bmarketing
 
-## Overview
+<!-- badges: start -->
 
-The bmarketing
-dataset
+<!-- badges: end -->
 
-<!-- TODO: Change README to make it more descriptive, add examples, etc. -->
+The goal of bmarketing package is to create decision tree model and use
+it to generate predictions based on provided dataset. It is also able to
+clean the dataset before creating model or making predictions.
+
+## Installation
+
+You can install the development version from
+[GitHub](https://github.com/) with:
+
+``` r
+# install.packages("devtools")
+devtools::install_github("nandakallugjeri/bmarketing")
+```
+
+## Functions
+
+Funcion documentation is accessible through ?functionname. For example,
+run ?checkNA to see the documentation for checkNA function.
+
+checkNA
+
+cleanData
+
+dtree
+
+dtreeplot
+
+dtreesummary
+
+dtreepredict
+
+dtreeperf
+
+translog
+
+trans
+
+standardize
+
+logit
+
+``` r
+library(bmarketing)
+#> Loading required package: rpart.plot
+#> Loading required package: rpart
+mytree<-dtree(bmarketing,"y")
+dtreeplot(mytree)
+```
+
+<img src="man/figures/README-unnamed-chunk-2-1.png" width="100%" />