From 81c3574f78af0e26a6a938b342881a775d661255 Mon Sep 17 00:00:00 2001
From: kks-gt <128405618+kks-gt@users.noreply.github.com>
Date: Sat, 23 Dec 2023 19:11:22 +0600
Subject: [PATCH 1/8] Add files via upload
---
RMarkdown_Project1.md | 180 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 180 insertions(+)
create mode 100644 RMarkdown_Project1.md
diff --git a/RMarkdown_Project1.md b/RMarkdown_Project1.md
new file mode 100644
index 00000000000..7cb84165575
--- /dev/null
+++ b/RMarkdown_Project1.md
@@ -0,0 +1,180 @@
+---
+title: "RMarkdown_Project1"
+author: "kks_git"
+date: "2023-12-22"
+output: html_document
+---
+
+
+
+A. Loading and preprocessing the data
+
+1. Load the Data
+
+
+```r
+data<-".\\activity.csv"
+dataP1<-read.csv(data, header=TRUE)
+```
+
+2. Process/transform the Data
+
+
+```r
+dataP1$date<-as.Date(x=dataP1$date, format="%Y-%m-%d")
+```
+
+B. What is mean total number of steps taken per day?
+
+1. Calculate the total number of steps taken per day
+
+
+```r
+dailySteps<-aggregate(steps~date, data=dataP1, FUN=sum)
+```
+
+2. Make a histogram of the total number of steps taken each day
+
+
+```r
+steps_hist<-ggplot(data = dailySteps, aes(x=steps)) +geom_histogram(fill="red", binwidth=500)+labs(title="Steps per Day", x="steps",y="frequency")+theme(plot.title = element_text(hjust=0.5))
+steps_hist
+```
+
+
+
+3. Calculate and report the mean and median of the total number of steps taken per day
+
+
+```r
+stepsMean<-mean(dailySteps$steps, na.rm=TRUE)
+stepsMean
+```
+
+```
+## [1] 10766.19
+```
+
+```r
+stepsMedian<-median(dailySteps$steps, na.rm=TRUE)
+stepsMedian
+```
+
+```
+## [1] 10765
+```
+
+C. What is the average daily activity pattern?
+
+1. Make a time series plot (i.e.
+type = "l") of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all days (y-axis)
+
+```r
+stepsInterval<-aggregate(steps~interval, data = dataP1, FUN = mean)
+timeSeries_plot<-ggplot(stepsInterval, aes(x=interval, y=steps)) +geom_line(color="blue", linewidth=1)+labs(title="Steps per Interval", x="interval", y="average steps") +theme(plot.title = element_text(hjust=0.5, vjust = 2))
+timeSeries_plot
+```
+
+
+
+2. Which 5-minute interval, on average across all the days in the dataset, contains the maximum number of steps?
+
+```r
+maxInterval<-stepsInterval[which.max(stepsInterval$steps),]
+maxInterval
+```
+
+D. Imputing missing values
+
+1. Calculate and report the total number of missing values in the dataset (i.e. the total number of rows with NA NAs)
+
+```r
+totalNAs<-sum(is.na(dataP1$steps))
+totalNAs
+```
+
+```
+## [1] 2304
+```
+
+2. Devise a strategy for filling in all of the missing values in the dataset. The strategy is to fill all NAs with the average of 5-minute interval.
+
+```r
+meanInterval<-aggregate(steps~interval, data=dataP1, FUN=mean, na.rm=TRUE)
+```
+
+3. Create a new dataset that is equal to the original dataset but with the missing data filled in.
+
+```r
+fill_data<-dataP1
+steps_NAs<-is.na(dataP1$steps)
+NAs<-na.omit(subset(meanInterval,interval==dataP1$interval[steps_NAs]))
+fill_data$steps[steps_NAs]<-NAs[,2]
+fill_stepsNA<-sum(is.na(fill_data))
+fill_stepsNA
+```
+
+```
+## [1] 0
+```
+
+4A. Make a histogram of the total number of steps taken each day
+
+```r
+dailySteps_filled<-aggregate(steps~date, data=fill_data, FUN=sum,na.rm=TRUE)
+steps_filled_hist<-ggplot(data = dailySteps_filled, aes(x=steps)) +geom_histogram(fill="green", binwidth=500)+labs(title="Steps per Day with no NAs", x="steps",y="frequency")+theme(plot.title = element_text(hjust=0.5))
+steps_filled_hist
+```
+
+
+
+4B. Calculate and report the mean and median total number of steps taken per day. Do these values differ from the estimates from the first part of the assignment? What is the impact of imputing missing data on the estimates of the total daily number of steps?
+
+```r
+steps_filledMean<-mean(dailySteps_filled$steps, na.rm=TRUE)
+steps_filledMean
+```
+
+```
+## [1] 10766.19
+```
+
+```r
+steps_filledMedian<-median(dailySteps_filled$steps, na.rm=TRUE)
+steps_filledMedian
+```
+
+```
+## [1] 10766.19
+```
+
+4C.Do these values differ from the estimates from the first part of the assignment? What is the impact of imputing missing data on the estimates of the total daily number of steps?
+
+```r
+"The mean values are same for both cases, but the median was higher for data with NAs. However, the mean and median become equal after filling NAs."
+```
+
+```
+## [1] "The mean values are same for both cases, but the median was higher for data with NAs. However, the mean and median become equal after filling NAs."
+```
+
+E. Are there differences in activity patterns between weekdays and weekends?
+
+1.Create a new factor variable in the dataset with two levels -- "weekday" and "weekend" indicating whether a given date is a weekday or weekend day.
+
+```r
+fill_data$date<-as.Date(fill_data$date)
+wday=c("Monday", "Tuesday","Wednesday", "Thursday","Friday")
+fill_data$day<-factor(ifelse(weekdays(fill_data$date) %in% wday,'weekday','weekend'))
+```
+
+2. Create a new factor variable in the dataset with two levels -- "weekday" and "weekend" indicating whether a given date is a weekday or weekend day.
+
+```r
+meanSteps_days<-aggregate(steps~interval+day, data = fill_data, FUN = mean, na.rm=TRUE)
+timeSeries_daySteps<-ggplot(meanSteps_days, aes(x=interval, y=steps,color=day)) +geom_line()+facet_grid(day~.)+labs(title="Steps by Days", x="interval", y="average steps") +theme(plot.title = element_text(hjust=0.5, vjust = 2))
+timeSeries_daySteps
+```
+
+
+
From bbd1385ee74a081e6e953d570424b6e6f2a240c8 Mon Sep 17 00:00:00 2001
From: kks-gt <128405618+kks-gt@users.noreply.github.com>
Date: Sat, 23 Dec 2023 19:13:04 +0600
Subject: [PATCH 2/8] Add files via upload
---
RMarkdown_Project1.Rmd | 133 ++++++++++
RMarkdown_Project1.html | 519 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 652 insertions(+)
create mode 100644 RMarkdown_Project1.Rmd
create mode 100644 RMarkdown_Project1.html
diff --git a/RMarkdown_Project1.Rmd b/RMarkdown_Project1.Rmd
new file mode 100644
index 00000000000..473bbef75a5
--- /dev/null
+++ b/RMarkdown_Project1.Rmd
@@ -0,0 +1,133 @@
+---
+title: "RMarkdown_Project1"
+author: "kks_git"
+date: "2023-12-22"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+library(ggplot2)
+library(dplyr)
+library(tidyr)
+library(rmarkdown)
+```
+
+A. Loading and preprocessing the data
+
+1. Load the Data
+
+```{r}
+data<-".\\activity.csv"
+dataP1<-read.csv(data, header=TRUE)
+```
+
+2. Process/transform the Data
+
+```{r}
+dataP1$date<-as.Date(x=dataP1$date, format="%Y-%m-%d")
+```
+
+B. What is mean total number of steps taken per day?
+
+1. Calculate the total number of steps taken per day
+
+```{r}
+dailySteps<-aggregate(steps~date, data=dataP1, FUN=sum)
+```
+
+2. Make a histogram of the total number of steps taken each day
+
+```{r}
+steps_hist<-ggplot(data = dailySteps, aes(x=steps)) +geom_histogram(fill="red", binwidth=500)+labs(title="Steps per Day", x="steps",y="frequency")+theme(plot.title = element_text(hjust=0.5))
+steps_hist
+```
+
+3. Calculate and report the mean and median of the total number of steps taken per day
+
+```{r}
+stepsMean<-mean(dailySteps$steps, na.rm=TRUE)
+stepsMean
+stepsMedian<-median(dailySteps$steps, na.rm=TRUE)
+stepsMedian
+```
+
+C. What is the average daily activity pattern?
+
+1. Make a time series plot (i.e.
+type = "l") of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all days (y-axis)
+```{r}
+stepsInterval<-aggregate(steps~interval, data = dataP1, FUN = mean)
+timeSeries_plot<-ggplot(stepsInterval, aes(x=interval, y=steps)) +geom_line(color="blue", linewidth=1)+labs(title="Steps per Interval", x="interval", y="average steps") +theme(plot.title = element_text(hjust=0.5, vjust = 2))
+timeSeries_plot
+```
+
+2. Which 5-minute interval, on average across all the days in the dataset, contains the maximum number of steps?
+```{r}
+maxInterval<-stepsInterval[which.max(stepsInterval$steps),]
+maxInterval
+```
+
+D. Imputing missing values
+
+1. Calculate and report the total number of missing values in the dataset (i.e. the total number of rows with NA NAs)
+```{r}
+totalNAs<-sum(is.na(dataP1$steps))
+totalNAs
+```
+
+2. Devise a strategy for filling in all of the missing values in the dataset. The strategy is to fill all NAs with the average of 5-minute interval.
+```{r}
+meanInterval<-aggregate(steps~interval, data=dataP1, FUN=mean, na.rm=TRUE)
+```
+
+3. Create a new dataset that is equal to the original dataset but with the missing data filled in.
+```{r}
+fill_data<-dataP1
+steps_NAs<-is.na(dataP1$steps)
+NAs<-na.omit(subset(meanInterval,interval==dataP1$interval[steps_NAs]))
+fill_data$steps[steps_NAs]<-NAs[,2]
+fill_stepsNA<-sum(is.na(fill_data))
+fill_stepsNA
+```
+
+4A. Make a histogram of the total number of steps taken each day
+```{r}
+dailySteps_filled<-aggregate(steps~date, data=fill_data, FUN=sum,na.rm=TRUE)
+steps_filled_hist<-ggplot(data = dailySteps_filled, aes(x=steps)) +geom_histogram(fill="green", binwidth=500)+labs(title="Steps per Day with no NAs", x="steps",y="frequency")+theme(plot.title = element_text(hjust=0.5))
+steps_filled_hist
+```
+
+4B. Calculate and report the mean and median total number of steps taken per day. Do these values differ from the estimates from the first part of the assignment? What is the impact of imputing missing data on the estimates of the total daily number of steps?
+```{r}
+steps_filledMean<-mean(dailySteps_filled$steps, na.rm=TRUE)
+steps_filledMean
+steps_filledMedian<-median(dailySteps_filled$steps, na.rm=TRUE)
+steps_filledMedian
+```
+
+4C.Do these values differ from the estimates from the first part of the assignment? What is the impact of imputing missing data on the estimates of the total daily number of steps?
+```{r}
+"The mean values are same for both cases, but the median was higher for data with NAs. However, the mean and median become equal after filling NAs."
+```
+
+E. Are there differences in activity patterns between weekdays and weekends?
+
+1.Create a new factor variable in the dataset with two levels -- "weekday" and "weekend" indicating whether a given date is a weekday or weekend day.
+```{r}
+fill_data$date<-as.Date(fill_data$date)
+wday=c("Monday", "Tuesday","Wednesday", "Thursday","Friday")
+fill_data$day<-factor(ifelse(weekdays(fill_data$date) %in% wday,'weekday','weekend'))
+```
+
+2. Create a new factor variable in the dataset with two levels -- "weekday" and "weekend" indicating whether a given date is a weekday or weekend day.
+```{r}
+meanSteps_days<-aggregate(steps~interval+day, data = fill_data, FUN = mean, na.rm=TRUE)
+timeSeries_daySteps<-ggplot(meanSteps_days, aes(x=interval, y=steps,color=day)) +geom_line()+facet_grid(day~.)+labs(title="Steps by Days", x="interval", y="average steps") +theme(plot.title = element_text(hjust=0.5, vjust = 2))
+timeSeries_daySteps
+```
+
+F. Convert to html
+```{r}
+```
+
diff --git a/RMarkdown_Project1.html b/RMarkdown_Project1.html
new file mode 100644
index 00000000000..72ace49ce86
--- /dev/null
+++ b/RMarkdown_Project1.html
@@ -0,0 +1,519 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+RMarkdown_Project1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
A. Loading and preprocessing the data
+
+- Load the Data
+
+
data<-".\\activity.csv"
+dataP1<-read.csv(data, header=TRUE)
+
+- Process/transform the Data
+
+
dataP1$date<-as.Date(x=dataP1$date, format="%Y-%m-%d")
+
B. What is mean total number of steps taken per day?
+
+- Calculate the total number of steps taken per day
+
+
dailySteps<-aggregate(steps~date, data=dataP1, FUN=sum)
+
+- Make a histogram of the total number of steps taken each day
+
+
steps_hist<-ggplot(data = dailySteps, aes(x=steps)) +geom_histogram(fill="red", binwidth=500)+labs(title="Steps per Day", x="steps",y="frequency")+theme(plot.title = element_text(hjust=0.5))
+steps_hist
+

+
+- Calculate and report the mean and median of the total number of
+steps taken per day
+
+
stepsMean<-mean(dailySteps$steps, na.rm=TRUE)
+stepsMean
+
## [1] 10766.19
+
stepsMedian<-median(dailySteps$steps, na.rm=TRUE)
+stepsMedian
+
## [1] 10765
+
C. What is the average daily activity pattern?
+
+- Make a time series plot (i.e. type = “l”) of the 5-minute interval
+(x-axis) and the average number of steps taken, averaged across all days
+(y-axis)
+
+
stepsInterval<-aggregate(steps~interval, data = dataP1, FUN = mean)
+timeSeries_plot<-ggplot(stepsInterval, aes(x=interval, y=steps)) +geom_line(color="blue", linewidth=1)+labs(title="Steps per Interval", x="interval", y="average steps") +theme(plot.title = element_text(hjust=0.5, vjust = 2))
+timeSeries_plot
+

+
+- Which 5-minute interval, on average across all the days in the
+dataset, contains the maximum number of steps?
+
+
maxInterval<-stepsInterval[which.max(stepsInterval$steps),]
+maxInterval
+
## interval steps
+## 104 835 206.1698
+
D. Imputing missing values
+
+- Calculate and report the total number of missing values in the
+dataset (i.e. the total number of rows with NA NAs)
+
+
totalNAs<-sum(is.na(dataP1$steps))
+totalNAs
+
## [1] 2304
+
+- Devise a strategy for filling in all of the missing values in the
+dataset. The strategy is to fill all NAs with the average of 5-minute
+interval.
+
+
meanInterval<-aggregate(steps~interval, data=dataP1, FUN=mean, na.rm=TRUE)
+
+- Create a new dataset that is equal to the original dataset but with
+the missing data filled in.
+
+
fill_data<-dataP1
+steps_NAs<-is.na(dataP1$steps)
+NAs<-na.omit(subset(meanInterval,interval==dataP1$interval[steps_NAs]))
+fill_data$steps[steps_NAs]<-NAs[,2]
+fill_stepsNA<-sum(is.na(fill_data))
+fill_stepsNA
+
## [1] 0
+
4A. Make a histogram of the total number of steps taken each day
+
dailySteps_filled<-aggregate(steps~date, data=fill_data, FUN=sum,na.rm=TRUE)
+steps_filled_hist<-ggplot(data = dailySteps_filled, aes(x=steps)) +geom_histogram(fill="green", binwidth=500)+labs(title="Steps per Day with no NAs", x="steps",y="frequency")+theme(plot.title = element_text(hjust=0.5))
+steps_filled_hist
+

+
4B. Calculate and report the mean and median total number of steps
+taken per day. Do these values differ from the estimates from the first
+part of the assignment? What is the impact of imputing missing data on
+the estimates of the total daily number of steps?
+
steps_filledMean<-mean(dailySteps_filled$steps, na.rm=TRUE)
+steps_filledMean
+
## [1] 10766.19
+
steps_filledMedian<-median(dailySteps_filled$steps, na.rm=TRUE)
+steps_filledMedian
+
## [1] 10766.19
+
4C.Do these values differ from the estimates from the first part of
+the assignment? What is the impact of imputing missing data on the
+estimates of the total daily number of steps?
+
"The mean values are same for both cases, but the median was higher for data with NAs. However, the mean and median become equal after filling NAs."
+
## [1] "The mean values are same for both cases, but the median was higher for data with NAs. However, the mean and median become equal after filling NAs."
+
E. Are there differences in activity patterns between weekdays and
+weekends?
+
1.Create a new factor variable in the dataset with two levels –
+“weekday” and “weekend” indicating whether a given date is a weekday or
+weekend day.
+
fill_data$date<-as.Date(fill_data$date)
+wday=c("Monday", "Tuesday","Wednesday", "Thursday","Friday")
+fill_data$day<-factor(ifelse(weekdays(fill_data$date) %in% wday,'weekday','weekend'))
+
+- Create a new factor variable in the dataset with two levels –
+“weekday” and “weekend” indicating whether a given date is a weekday or
+weekend day.
+
+
meanSteps_days<-aggregate(steps~interval+day, data = fill_data, FUN = mean, na.rm=TRUE)
+timeSeries_daySteps<-ggplot(meanSteps_days, aes(x=interval, y=steps,color=day)) +geom_line()+facet_grid(day~.)+labs(title="Steps by Days", x="interval", y="average steps") +theme(plot.title = element_text(hjust=0.5, vjust = 2))
+timeSeries_daySteps
+

+
F. Convert to html
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
From 54fc6cdf8ffd0ca104a03562df420eca1516fa53 Mon Sep 17 00:00:00 2001
From: kks-gt <128405618+kks-gt@users.noreply.github.com>
Date: Sat, 23 Dec 2023 19:19:36 +0600
Subject: [PATCH 3/8] Delete RMarkdown_Project1.html
---
RMarkdown_Project1.html | 519 ----------------------------------------
1 file changed, 519 deletions(-)
delete mode 100644 RMarkdown_Project1.html
diff --git a/RMarkdown_Project1.html b/RMarkdown_Project1.html
deleted file mode 100644
index 72ace49ce86..00000000000
--- a/RMarkdown_Project1.html
+++ /dev/null
@@ -1,519 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-RMarkdown_Project1
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
A. Loading and preprocessing the data
-
-- Load the Data
-
-
data<-".\\activity.csv"
-dataP1<-read.csv(data, header=TRUE)
-
-- Process/transform the Data
-
-
dataP1$date<-as.Date(x=dataP1$date, format="%Y-%m-%d")
-
B. What is mean total number of steps taken per day?
-
-- Calculate the total number of steps taken per day
-
-
dailySteps<-aggregate(steps~date, data=dataP1, FUN=sum)
-
-- Make a histogram of the total number of steps taken each day
-
-
steps_hist<-ggplot(data = dailySteps, aes(x=steps)) +geom_histogram(fill="red", binwidth=500)+labs(title="Steps per Day", x="steps",y="frequency")+theme(plot.title = element_text(hjust=0.5))
-steps_hist
-

-
-- Calculate and report the mean and median of the total number of
-steps taken per day
-
-
stepsMean<-mean(dailySteps$steps, na.rm=TRUE)
-stepsMean
-
## [1] 10766.19
-
stepsMedian<-median(dailySteps$steps, na.rm=TRUE)
-stepsMedian
-
## [1] 10765
-
C. What is the average daily activity pattern?
-
-- Make a time series plot (i.e. type = “l”) of the 5-minute interval
-(x-axis) and the average number of steps taken, averaged across all days
-(y-axis)
-
-
stepsInterval<-aggregate(steps~interval, data = dataP1, FUN = mean)
-timeSeries_plot<-ggplot(stepsInterval, aes(x=interval, y=steps)) +geom_line(color="blue", linewidth=1)+labs(title="Steps per Interval", x="interval", y="average steps") +theme(plot.title = element_text(hjust=0.5, vjust = 2))
-timeSeries_plot
-

-
-- Which 5-minute interval, on average across all the days in the
-dataset, contains the maximum number of steps?
-
-
maxInterval<-stepsInterval[which.max(stepsInterval$steps),]
-maxInterval
-
## interval steps
-## 104 835 206.1698
-
D. Imputing missing values
-
-- Calculate and report the total number of missing values in the
-dataset (i.e. the total number of rows with NA NAs)
-
-
totalNAs<-sum(is.na(dataP1$steps))
-totalNAs
-
## [1] 2304
-
-- Devise a strategy for filling in all of the missing values in the
-dataset. The strategy is to fill all NAs with the average of 5-minute
-interval.
-
-
meanInterval<-aggregate(steps~interval, data=dataP1, FUN=mean, na.rm=TRUE)
-
-- Create a new dataset that is equal to the original dataset but with
-the missing data filled in.
-
-
fill_data<-dataP1
-steps_NAs<-is.na(dataP1$steps)
-NAs<-na.omit(subset(meanInterval,interval==dataP1$interval[steps_NAs]))
-fill_data$steps[steps_NAs]<-NAs[,2]
-fill_stepsNA<-sum(is.na(fill_data))
-fill_stepsNA
-
## [1] 0
-
4A. Make a histogram of the total number of steps taken each day
-
dailySteps_filled<-aggregate(steps~date, data=fill_data, FUN=sum,na.rm=TRUE)
-steps_filled_hist<-ggplot(data = dailySteps_filled, aes(x=steps)) +geom_histogram(fill="green", binwidth=500)+labs(title="Steps per Day with no NAs", x="steps",y="frequency")+theme(plot.title = element_text(hjust=0.5))
-steps_filled_hist
-

-
4B. Calculate and report the mean and median total number of steps
-taken per day. Do these values differ from the estimates from the first
-part of the assignment? What is the impact of imputing missing data on
-the estimates of the total daily number of steps?
-
steps_filledMean<-mean(dailySteps_filled$steps, na.rm=TRUE)
-steps_filledMean
-
## [1] 10766.19
-
steps_filledMedian<-median(dailySteps_filled$steps, na.rm=TRUE)
-steps_filledMedian
-
## [1] 10766.19
-
4C.Do these values differ from the estimates from the first part of
-the assignment? What is the impact of imputing missing data on the
-estimates of the total daily number of steps?
-
"The mean values are same for both cases, but the median was higher for data with NAs. However, the mean and median become equal after filling NAs."
-
## [1] "The mean values are same for both cases, but the median was higher for data with NAs. However, the mean and median become equal after filling NAs."
-
E. Are there differences in activity patterns between weekdays and
-weekends?
-
1.Create a new factor variable in the dataset with two levels –
-“weekday” and “weekend” indicating whether a given date is a weekday or
-weekend day.
-
fill_data$date<-as.Date(fill_data$date)
-wday=c("Monday", "Tuesday","Wednesday", "Thursday","Friday")
-fill_data$day<-factor(ifelse(weekdays(fill_data$date) %in% wday,'weekday','weekend'))
-
-- Create a new factor variable in the dataset with two levels –
-“weekday” and “weekend” indicating whether a given date is a weekday or
-weekend day.
-
-
meanSteps_days<-aggregate(steps~interval+day, data = fill_data, FUN = mean, na.rm=TRUE)
-timeSeries_daySteps<-ggplot(meanSteps_days, aes(x=interval, y=steps,color=day)) +geom_line()+facet_grid(day~.)+labs(title="Steps by Days", x="interval", y="average steps") +theme(plot.title = element_text(hjust=0.5, vjust = 2))
-timeSeries_daySteps
-

-
F. Convert to html
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
From f7a0d3bb41574ee0e0f0f27733d6dd167e133192 Mon Sep 17 00:00:00 2001
From: kks-gt <128405618+kks-gt@users.noreply.github.com>
Date: Sat, 23 Dec 2023 19:20:20 +0600
Subject: [PATCH 4/8] Add files via upload
---
RMarkdown_Project1.html | 519 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 519 insertions(+)
create mode 100644 RMarkdown_Project1.html
diff --git a/RMarkdown_Project1.html b/RMarkdown_Project1.html
new file mode 100644
index 00000000000..72ace49ce86
--- /dev/null
+++ b/RMarkdown_Project1.html
@@ -0,0 +1,519 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+RMarkdown_Project1
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
A. Loading and preprocessing the data
+
+- Load the Data
+
+
data<-".\\activity.csv"
+dataP1<-read.csv(data, header=TRUE)
+
+- Process/transform the Data
+
+
dataP1$date<-as.Date(x=dataP1$date, format="%Y-%m-%d")
+
B. What is mean total number of steps taken per day?
+
+- Calculate the total number of steps taken per day
+
+
dailySteps<-aggregate(steps~date, data=dataP1, FUN=sum)
+
+- Make a histogram of the total number of steps taken each day
+
+
steps_hist<-ggplot(data = dailySteps, aes(x=steps)) +geom_histogram(fill="red", binwidth=500)+labs(title="Steps per Day", x="steps",y="frequency")+theme(plot.title = element_text(hjust=0.5))
+steps_hist
+

+
+- Calculate and report the mean and median of the total number of
+steps taken per day
+
+
stepsMean<-mean(dailySteps$steps, na.rm=TRUE)
+stepsMean
+
## [1] 10766.19
+
stepsMedian<-median(dailySteps$steps, na.rm=TRUE)
+stepsMedian
+
## [1] 10765
+
C. What is the average daily activity pattern?
+
+- Make a time series plot (i.e. type = “l”) of the 5-minute interval
+(x-axis) and the average number of steps taken, averaged across all days
+(y-axis)
+
+
stepsInterval<-aggregate(steps~interval, data = dataP1, FUN = mean)
+timeSeries_plot<-ggplot(stepsInterval, aes(x=interval, y=steps)) +geom_line(color="blue", linewidth=1)+labs(title="Steps per Interval", x="interval", y="average steps") +theme(plot.title = element_text(hjust=0.5, vjust = 2))
+timeSeries_plot
+

+
+- Which 5-minute interval, on average across all the days in the
+dataset, contains the maximum number of steps?
+
+
maxInterval<-stepsInterval[which.max(stepsInterval$steps),]
+maxInterval
+
## interval steps
+## 104 835 206.1698
+
D. Imputing missing values
+
+- Calculate and report the total number of missing values in the
+dataset (i.e. the total number of rows with NA NAs)
+
+
totalNAs<-sum(is.na(dataP1$steps))
+totalNAs
+
## [1] 2304
+
+- Devise a strategy for filling in all of the missing values in the
+dataset. The strategy is to fill all NAs with the average of 5-minute
+interval.
+
+
meanInterval<-aggregate(steps~interval, data=dataP1, FUN=mean, na.rm=TRUE)
+
+- Create a new dataset that is equal to the original dataset but with
+the missing data filled in.
+
+
fill_data<-dataP1
+steps_NAs<-is.na(dataP1$steps)
+NAs<-na.omit(subset(meanInterval,interval==dataP1$interval[steps_NAs]))
+fill_data$steps[steps_NAs]<-NAs[,2]
+fill_stepsNA<-sum(is.na(fill_data))
+fill_stepsNA
+
## [1] 0
+
4A. Make a histogram of the total number of steps taken each day
+
dailySteps_filled<-aggregate(steps~date, data=fill_data, FUN=sum,na.rm=TRUE)
+steps_filled_hist<-ggplot(data = dailySteps_filled, aes(x=steps)) +geom_histogram(fill="green", binwidth=500)+labs(title="Steps per Day with no NAs", x="steps",y="frequency")+theme(plot.title = element_text(hjust=0.5))
+steps_filled_hist
+

+
4B. Calculate and report the mean and median total number of steps
+taken per day. Do these values differ from the estimates from the first
+part of the assignment? What is the impact of imputing missing data on
+the estimates of the total daily number of steps?
+
steps_filledMean<-mean(dailySteps_filled$steps, na.rm=TRUE)
+steps_filledMean
+
## [1] 10766.19
+
steps_filledMedian<-median(dailySteps_filled$steps, na.rm=TRUE)
+steps_filledMedian
+
## [1] 10766.19
+
4C.Do these values differ from the estimates from the first part of
+the assignment? What is the impact of imputing missing data on the
+estimates of the total daily number of steps?
+
"The mean values are same for both cases, but the median was higher for data with NAs. However, the mean and median become equal after filling NAs."
+
## [1] "The mean values are same for both cases, but the median was higher for data with NAs. However, the mean and median become equal after filling NAs."
+
E. Are there differences in activity patterns between weekdays and
+weekends?
+
1.Create a new factor variable in the dataset with two levels –
+“weekday” and “weekend” indicating whether a given date is a weekday or
+weekend day.
+
fill_data$date<-as.Date(fill_data$date)
+wday=c("Monday", "Tuesday","Wednesday", "Thursday","Friday")
+fill_data$day<-factor(ifelse(weekdays(fill_data$date) %in% wday,'weekday','weekend'))
+
+- Create a new factor variable in the dataset with two levels –
+“weekday” and “weekend” indicating whether a given date is a weekday or
+weekend day.
+
+
meanSteps_days<-aggregate(steps~interval+day, data = fill_data, FUN = mean, na.rm=TRUE)
+timeSeries_daySteps<-ggplot(meanSteps_days, aes(x=interval, y=steps,color=day)) +geom_line()+facet_grid(day~.)+labs(title="Steps by Days", x="interval", y="average steps") +theme(plot.title = element_text(hjust=0.5, vjust = 2))
+timeSeries_daySteps
+

+
F. Convert to html
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
From c4a963d3065688769c5959287d7ec3bcd143696f Mon Sep 17 00:00:00 2001
From: kks-gt <128405618+kks-gt@users.noreply.github.com>
Date: Sat, 23 Dec 2023 20:10:54 +0600
Subject: [PATCH 5/8] Add files via upload
---
Fig 1-Histogram of Total Steps per Day.png | Bin 0 -> 4711 bytes
...am of Total Steps per Day with Filled NAs.png | Bin 0 -> 4376 bytes
...inute interval vs averaged steps per day).png | Bin 0 -> 7313 bytes
...5-minute interval vs averaged steps taken.png | Bin 0 -> 5776 bytes
4 files changed, 0 insertions(+), 0 deletions(-)
create mode 100644 Fig 1-Histogram of Total Steps per Day.png
create mode 100644 Fig 3-Histogram of Total Steps per Day with Filled NAs.png
create mode 100644 Fig 4-Panel Plot in Time Series Plot (5-minute interval vs averaged steps per day).png
create mode 100644 Fig. 2-Time Series Plot (5-minute interval vs averaged steps taken.png
diff --git a/Fig 1-Histogram of Total Steps per Day.png b/Fig 1-Histogram of Total Steps per Day.png
new file mode 100644
index 0000000000000000000000000000000000000000..253c9aadb366e295072cea4d2967e047e7d87fb2
GIT binary patch
literal 4711
zcmeHLdsI_L8czX*vIP<=q9_8QRja^?Qrhqc6ai5qqE-|{RBY5Di9#t6Lb8aJE2&AW
z4^)(_MMa2^0vhBQs|g^AtVR$GkoL;UAjB(%+=PVNN#geG?!Vo0cK`66d(X`M=6n9;
z`)1C}vGwaB>};KFaX6e^ec3@}m2
z94b{n6@a&oW0c`=r~;0$Y2g4$3&tbjctACe!joeHDp}Bni6}S`CK6$C5r(yJ$SoXd
z3*c_SS};t`Bg*A~5$h9B`vjamtfddbhKGkS91e&9ZomP?Fw7_l*a0JkVUyad^B}Sh
z53Jgnio^Y7n(_PKaSv0E!x4NVSB7k&Ul=K{%Y4$*aWsEN=#Q1;)vHL|m*?NOxmeN)LMbV6S9|S>8CBIa2sy;zG@%)wd{*cv~sa8MqCfK>U4p
zlFW^HHAKHQ2R(N+1&N7ZOLx)M4=Y3Dwe(Z;zu&u@f;4Ad)HhJnF9ltjWhuz5&0}e7
zF{%tpoNn)^xWJm$gC4w0MS7&Y{V)RevPY8yh*C>>P>gAR%52*dFRo;=)X$!VkH<1J
z5rf^`UbPO~f!$rniHA$#7=w?yVSjc^gmv3JgG^8qwf!dl*QOzZtb9m!dRP1NC@1p4X;U8Kt{f%(YJMa1l4-hm_ISTh2uREP?*xJ+3$$XeD1Qxf~
zFtIb}s4z9Y>
zKho_lwqW=nzX9sc*9IPyGm*MySw0R~n}tJ2+-9S0#hqh#fp4?5fOI-{F7^EG#_x
z$3!)GPGadz$mUWG5igre(OsaQ|jh1M!Rzt-DP8ZaEbY@rd>pWHt
zIvvV(2~#1eQgO2f)31tN8d1Nb|LT2CZ~&a@KJ`1dpd}Uv)c*au9D?~>kn21p+th=+zj(3;
zzzH%Sf^qLsc!-;^7XYuZ_q7FhgB~FvbsMP0#)uj>%q>X$?K6T2AXj!U
zL;14O!vu5CjBNCY?CS^0r
zuWt?va*ocXC$*A+m#72(ttKkQA;B%kHaZ&>43@p^7%YakA)DNK13G$`H$v;3jPxSQ
zv4dQ6Ca@uI)=A-$a;E?|3v!mo&1fn5fZ>|H`zDnIQ97Y$^3z-tx6%%-o;zd&Ihc{k
z_?yYcy4;LWqyn;)#BDVD>7^wF4(?GO1f&EPmpQJlt8}Z3c8YZ`ESZ~Kk6jC{uYt8&
zv*!O~f#|O@%i*MlP2BzkaTm%udxKx?_Th?~Ux2Cu3%R;y$EvS(f4h$Wsbb{iXxX4HMRq03tudh?
zP09yb!C)NrL+QJ5T0G*5Z4ZPeI;y2HhD9tr0Nah8jY`Gs5Qg}9igaufiG+NGO|=J#{9V;;>)y%v!UV;Y0hWI*P|@kw
zuIg@!)=N*a-b9lkkhY3`*6|2EH7N}~bgMvMnl_^?AY-m~NRb@I7%&GKZZ2PtQgvle
zdq8}w6|iJ3Z{uoT?TDN!UIX%2>})Q7pg|>?7c^!@!L(2qHK0A4C3_0JJS_lZ=H&xW
z#qvD#ff=FW=H&$}An9=g{-N3%HWH2CpY9la?SQ?-aZF??(%D%m<-2^gCyGEzE{kOp
zr00^W#9~?&t%@`D*|PBNqAvaCicN4+t+gki4!ysJ*2ap%B>T^_pv^wlcP5=0C{OqF
zDCQ*_nttIgxYN5KH&oxR8IB8p@4jeyt~}Ckg;OULV*}6q|BbNSE1#2FcXq+lD5uV!
zeQn{KQWp(cuw{1QnVA9cTkdu)f*eZ28LhV}M8usmuQ)Vw&czkw};N$T#L_B{5
z^?KR5t~jt;K7P+=!umZt|DB}Z;E~S#XCT!9QEk-H8y>IY{gwu`I^TJ8v{1S(yax94
zm~zH$$cFLMj?Jw+df>(<$FZfl4k&8Y*LJ_{CF50oc$;?vkY={c5oyHM(9dH(mQ2`L
z-GPg)yU!{65H?J;*oj1U;yImG2-7nAXLM1LO+?aRx;Q&{3*(gUks{xadPT6C+3b0@
zUYZ+zhUSfQ2Tkcb|1`>H4t4uYOiVnS
zz-iAWevG|{Wu!JcinXCLux6j6yYmUM*-9!xa&O!Xjwywi`bDy@a?bXyPiJcaI=DXZ
zzToJSR*jNd$uV|+=6JL=MBg*(x2K3Qcx{@UB<6Oo-ioj>KJgm;%?WE{xgEU6S<=2D
zM?zy_#H{BkgWUC_9b*fUAKDj`srYj%DZ97zy{YSWqMh=cdQ72bv)|U}<#vE$@vrJP
z2PP&K=6?G_J^Kc$@?qhpNgquUYyCFA9Gh&v@2zmYJ+P)##nSS}ABY-R-#|P2wqHJ!
ze(n0QReJ6$X|)11D2#eMFP*Mb8)Tsm?W?>~w6Dr4oBGPptM@XNeHOlYU`g*^&)2iJ
zF|qL2#1suW8hh{1L_>{pRqu;7&(DYFsl#;i_YbNSkmAeE&Cic-&-CCtCyFMGsS
z-fMORf&J@6!SZy(Nlfw#{PNs`c)c=J(!SJqX;3tN-yj>4?htC!H>oX-G5ddikiSg(3B8W{>|e3L|B?b!zsNg|j`D
t&tUndPGq{k!W_B}`;UY2RAhOZuqV|j^@Ccm#&}^A8Mba^`IkEm{|LE9cHF0O#uKf^V+$69{?ag!@bxTp1C}QCcw?4?;bx-xCH<#0FVGI8zAuk
z2yk+8g2z|@iv>tnLxF2Hmc+-h;Y&g=6f%LpB9O@hGMB(=Ak8t8020Z-z$U?k%_i|B
zBt8VwkqIm^tAWfNU>SH=EHam6Xc}1XOasKm0_-_-HX92wvH5JaM8cLpTrz=6hDjhk
z3E-2kd_IiF=0kiwB;i9)1B=kWA~(P&4NwCFN!ZvqCTM_59^kSDAZTP{1m=Y4;5B#_
zf(%SBAq2sTkgr!_8?5w_@SO)E0RZ)(;YK{~X6OI_-RQOb?|!u0F$pRV^JqnVMB9x|
zoxOev>thGKXdg1my-|)Sj}HHE*{Xnb2^uXkTk-V7Rtx#NoqY88^{epn#slM2V=A8mF;DZS5Ra_!P4J`QATw4&t45ApPqkB`U5ii4IA
ziF8%gBlB!UV2FZZsp#9AtDL6hWyr_NuDa{>((7^Zhc!&wv?3<&N@uUl6U@VjZc8MQ
z`KzvH9XCW_T5>AP4aVO%Yu~e)Dv#EKeXrjo|lF
zd)LK%A0Fi0wYGhK#$OaiKL9d#bQ?iX_nm;u*nk5;*^QGrH@D#<@iSt_AZwu;o+}N|
zOb-?|)24C##Oa3dO3vrE;+`D08>L#{4~Nwi9elP?tZd8d5Q9h6rJZASd9Ii8ptkW2
zaR&8uC#5k`-&lQggRa}DXN4d7Y9ngbm`A~-(T%#MqRRC97;4Pj%#Krb9%(T=iaCqGw7Zf_Xw646pzO+;|80_G9TVLXu
z<-i$hw7D@Gsd=@3b|PJ+4x~Y-+>_wi7V&JibZTalGgNm-)2xvx-P$T0bKPUD8~|2s
z;mn$nDzz=~k{lsi`f?mlMhXIjXZ9~vd8)Y`Rh!q-p1X-cR!W`BR9UuWkg(W`?!`U2
z%6`<$#7!j%yah0p|2}`w_$QVsutP)AAc2yhEq={&RHOH%nDsgT9i#QSRP()ObJ=1
zHkBqK@r!8Ev8C685U|)u0Jj)+t7h@K~~>JnLR?($4K5eC-%V>4p@
zgjTD4tQCPJXKiAyC`t$SL0tD4@kK3v)z6-a5PDW);T-8vS9(kl^C4~lqgq-tXUWIA
z;}KQq|2wvLuNU*r{~}Gw|5+Pe&%5CCZYcag4tlRp|7j41iio&vi|{+ZYg;*cwu~j^
zkl7YlwMgw}s(Ur!ZYyRq-M=a>;>~!VGzAXl2&J_!Ab#fcgIOdE13v-}9Mo%i^$$7Y
za9CV+WUbvW@aFWfCR2tUJUXtA)O2#w`X-8)VRtEw&4CY^6&uB1PRQDszPjXzRtnMB
z{i$zt*~p9-Dg94p%a$&5L;xlcR_PC(5(g&y`bi|;f`>n|L
zn*#yjAbs;a>Jzlf5xIQ=@AexGUjDuKLp`_@TqR5HTn^XW9ykrSR8}xgFFf-EQglX2
zBhSHax8@r%s#+vpM8vm=Z{(y-%_GB^R8<)(jXW!-s_)f4+T0bDR{=1_&dcR6O#EvM
z^ML#O6wa+J#8kKyz`KCqsq%fC)pX~11ZD0xMy#Iib8|B@r>BD=?A{KY80xt_g;R5l
zk}`o&h%|0~j`mw*#};I)pS0X+RSQ>H|60klJ~xkAowylv2u>wFeCUirBuIx|JDdi&
z{h5b{BhNdME<-NXpde}Zx(S*pqBMoKM)w$U6%tNkNgv(Hw19IMoQmK?ru0xhUsy?0
z#>1G-i8c|>RUSSa@mu@1^}{S+@I@HMKy6^@)xusfq{BI|IGgu_f&Qu^bS}_)5;aek
zNO@sI8ymt>3>nY>lyV5*;SByVh?IyF>_th-3rDIJwsARysG2g^e10SFEBl0w{M(ZY
za`)aeQN4OcCrmjI6lVy9w^|igh)(+$A|w&X$f7#3@Cd=)$qd~Y2O4w3uz2}GjVTLneEjwXWsp{PJWC|LQ{T8ToU
zDg9CF5FcQ~f3aXtrf@mPDr^ly=w?p6H5O9O_)N9hr%+-9s@oP^rJLO+bXt`FNjm~yXKnzK!U~sxS9gtai)9kK?k#-pJCwa9s#{(aDaFf11N{(H
zr=Eu$8(E~a_!{74M@e2{YXqJvL|ofHA?dS4`0O^@R~_r^aJxqD+6mWr-4BaxSyOAp
z`{BT;+@_=))MW{&*|$41jm{+g|2cu@z+IkX*rC2X;I
zFqMA9LPPsGlV~&g6!+PzA
z%Mu*6t3fT_A*9=P*26?l-xT)4Q^_v-7$Ma$>*q=8ce%qy
zvETYD1U19wUTcZW0PE5eO$WIzE~@%+-^BKk6l4vvbq#&G-d*41av;8C%=cPom5`C<
zVwXo5m=ROfN;^|8r
z&2fip9$N~o5}qI{xpq`*MXh0z9h)x-bGjI}jTu%_FN=31&4!F`b~O#*?Aq-Oe>l86
My|>@~@*ija30@hgOaK4?
literal 0
HcmV?d00001
diff --git a/Fig 4-Panel Plot in Time Series Plot (5-minute interval vs averaged steps per day).png b/Fig 4-Panel Plot in Time Series Plot (5-minute interval vs averaged steps per day).png
new file mode 100644
index 0000000000000000000000000000000000000000..8ca6e5ba6daa649c8c7c0156a8408576baef9a5e
GIT binary patch
literal 7313
zcmbuE4LDQ%|M)j#N|Go|+U~f^S4g5f&rM0{MkP_o3Qh8`Qn6+><*7VPw?c~D$wN`8
zWK(7~V&slglIO$7Q#g~gGiLi8-QVx`cm00X|NFoG*Y!WwIp_2Ee)zo4=WL(X=iE5x
z?xMCrX9WU*P;=e4=P&|+gqAK9MY!hX4|O-V;D7Mzb2m%vP
z`v#46K%@Q9{tO)*9dmPYxQ7kG#s)!vFPo(U&ScmS2sQ-x&a}zMu*rb$Jq`|NhpRPi
zOGmhyTe@3#nOl03|4&8!4p;pdU4(yHf+d%9!SV!~48qcpLCAp59s(0S<*;zLTY7le
z@Kp!&RR{m8HC^dT>eAt=^z^3mvf=b5d4>%l5p$mSj|#jxm+IlR6GmEc=`E#$1Wles%w@mv=yo>@X
zMw5frTKc{?;}zrj)hgmO)3@YJ^X!2LmSI}J8Q+piDVls{?&n}vQZ3KLEPAI5KR2H2Y`$P`ZL`N<+%gbsroY>2@1^D8H6Sf|4{0_R
z1~5)Uldqf?S>cjPN;~tnxMrg|^PfhElkYu@rPp4^rXTfQ^}VEOMrgci6>k4j#YM_2
z{`c(e{LsyXR|@I6VVmW{CGl%;CK)jv()D8d{Dv)c{8R_G>QI{_wSVx+|G;s40@EdQ
zr+Im(o%hCxOky&9TH@IuCat@x%us9d|5CtrS$k1(sb3IPAnK7{wzQ4Qj;n@(wus8}
zXD_~C1%u69w)@?`Ek8wNZ)+VB>TjmpJ8#dE^SxhalCD^12kLGXyqYtdj~
z>Z4O>S5{*SDxSyS60==I&ATNR?5eL;WcMNFkCDUoG3p*t<92h-nLZdfS1a1fl?HqT
z4c=Ks`R31VBB;?KYlL%sv$wEjDAFD}f?e|W15yU;)wt0*v0{r$gS$5sqK3!ZpC1l<
z9J#mai@{+8bs`c$Bw8Z;R&qavpuiuIia-OxuI4{iu3vXrzf$l_1z6BR+5vgY|B-F_
z%JWp&B~q?ZCxh-(S-YSC!QQ$IjE(9}uh)y6@l^T`S!$g*SF!($?Y0yI$3+9q2LC^2
zs}pOXQ(JZKe=g
zFxH@rztq8@ZvV=&=y^0~iX;EH>@T~ljJ?laXcw5`DC>pxu$JWai4HXJX{|xsnF5tC
z2-Pw&yXw^;dmyjAtgbm6!CoJm?TMH%usZpe&XdQBi#VX2p5e}~*tGAtYitHM_mxHF98_`mUZHr}o`>_wVc2dpCemVb;
z&TDhHUr}gCmimB)_MCI|~ha6jHG#7cqo3x4><3A
zK&XO=v&A)FNh_z%8De-Jb$YG4)MB+2<7~|u^x;ZKiw0o(dff|1ca^~tt7B&*K{sEL
zr2=uk6J5Mo*8WbLN>v0UwK6V0`=Up_6!~CYil+f)Rb@paCAw`CCD{RNzK^r-Rm9zx
zPbZXW8BL85s^Vx;SuY|ApY?sdqTgTVmE~ifz3Bsj^tnu9|KBCLZNso@
zX9-o4!h!Hwuq_iaaf3e5$2GF{az25XDETIg_(Dq~I6veWd0NVKXG|z5(j2e~en;84
zo?c4eBL2ZK_)WzvMvzI)TY;UPyU>1C-_}=t)Q#2lRH;k)aB4M}Yq&VLTL)~cki#=8
zew|$=&g@)?HNvzQ(Qiu1D%mIa$4OFgr@JLQvx+(&-geZ;7e3k0DpO<=l#RlR6fj_y
zey$gywi!%b2#jTDve`XJ@DBxXyPc=qPD8uwozc)Cr~KEst%NhBC3D5Ea^EGMG4Z-%
z(=r?CO9m4iUXcx*5{J}GkIqGoKbp9Dh-~-0lD~n2sJZ1%I+}mbATcPG^+RW5C0CI{
zD(vzPB~9-#QIuwM{v$|bAumjUGG&V>zf+(%uKJvs1K-f!^Ct-Ucd^
z-$Jn)U>nblGajf8Jlh!C=^h*#B=K&RBW5;Htd_I)W59KI{p>@S-iuQ&M!T*NU#tEa
zxW1{5R)PUX`PUSvZipUsHgMR?i9a1Y=^Hy-I
zq#j69xh&BG9Gx*9Anj^B&XgD*&x(0!O{_FYGyUV;$BuiMzEt&0#wn*i8Y6R1NNBxO
znt9>#i;=w-rJ{)Fn@mjGd%;3|$q&KfGc76NYud-okQz>+ooy>M?&nOZQeNDf+85Hb
zwZS&!*;eOk+Qqv|fwe82&x#FffmbHuE9XJ|r)Tcc1z~lan)(efmW_q2Z+V^h0bw9z
z+89XKPC=3l19;|l-0s!-CTTm&Tnjq6!8fF2GbMh8_K>*X)qU{104j8zK|0W&IgLhp
z1T+q)rQGV>0;VoFVe>tdr0+Y|uSztH6mKUTeW|~4C6ooZARNG*iA=y(yJ@)>C_TFd
zG*2>(ex<^QDMAix*E-!Xe2N=V9Ec8^)&c)opD(13I-qRq^KW=)wJG#(Z_r^MtXqw%
z*RH)mbT#%INRRM@C(e$QoY;&Jw?WDGmj!zy(LTmXb|%oeo#^~#a7gWh%&B%?vnhmd
znA@4CU$33jJ9u?rZ-?W9T(u*iu9_4T(gQ73(D++oke6-mKI6x{l<#I}u|-KK^{zUY
zXqW{Q>BImH6V^dqc3UytVb|CAiRuveGb^u-HjYTmMQ=8NUiREcHayiG!TW|3YR4Ao
z*2vwqsThpuze(Z^mt~JZ^d$bHy>dKj)t5fW~r(W)vxl;N#
zC#R(?<;?7@?(X4ZM`^`pgvYl~KQPgcfBd$HkIO;GW~T?6KX(~2%GOx`kuC2n95ZddQFSmjqg}KA`
zxU6qsmf-0tK-cfkkDrsG-bFMkW=&5W0?H<@zKf;v=Tbu4T*z<1KJT`tgDFF>fh()=
zZ=dLlq(#-MfD>Bgq{YaUK#H(5_;!|n!UsaB`(V&Z7q
zY}Yu=O6uC%LjO@vF_~hNWOn?!<8e*g#?O{3$C2g7_FUj^xujV8n#|_JtjCUKnkMmB
zE{wiEZ?{i#PJ?e-00e{ju^)m|}{;ez*)bD>K2+wGVvNC4wE>sb>
zFwyKG)?)Je4ZSBNEj17%eb5JeqkbTs^{&HX0$IfPi?Y~Cc2<4V@66C+A1qF%t=GN@1>fB4;AIycW&K|5`o!9b`E2K$ACRma
z*A;PZvs+%tdqXGCuO{Nfh}D5y8;|>*@xQSDL*b2rRP4Vpfv9d!jRgG>`F^&sEY=2u
zsWLELY3>U>A^@owP9-XiP&(R5W$Wn;oB-uj*Y=;SE1wmQd+2N#8*Ak{9$el7ZZto?
z`pD42jD*$bvkIQSg3LTox$I?i)sIVg6rZURm~U?wiyeM8t_O0n9@&<<9K4Z;k@M^2
zP|^%lQ#{k*K7U%oVyS0NQ(uZ{9s-&mleAb9Ewl`NIu{dwH9KshTeSoiwP3z%LejE@`liswJ-Pv|U>91aL1?Y%Dy|du+#(Nav$o+|Czxr!n*~52Fxlhn=m@)xA@RmrT>K+RkeJ?ssHDC
zSd&{$#A2r0p6fNL7+y*~7*Df(B{FZ&v6}WA)7qp$yCObcxs1ejR;lA>N6>IS7P5(=
z%svFktl0V!Yvk{h7&zB`$B05M$AIZ6si@jNTDwJ2UlXE6&u^SzsyLFA65d}^hsJWR
zYtXXhM(3tBrxkq!X9wm}}*CIgVxVjL^Jljl*MF7f|K*uHTGdYSx!{?zjbJ
zKDlV=v?5P$8o0Z~zc6T_730uM7A<*q!Kx}GXQ^i=HJuUoxA1+^#sQ4`5)U@|4kDmo#`l08*7ZGO9}@xNM7Bo%8Q%G+}$iTw6Op{NS_AiOt7TpzTA^c0kx#SWq@yFNK?QiLd^}
zE_+4WV5IR*_&ExMNBKr>PtVfg7Y;d+U9Ewe+4t*Y<
z?a5QP=M8kk%=-V#DH^^XEL7fjd|jusN@NB}4Bli}IMcC6T=JMn@Crx+$aMDzTOt;O
z$9dO}zbLT-ev-;t&@m`6FLDA&@>&5P%hj-RXI;+SuSWYC#(EbmLDdpAm8-yxo%Jk^
zmq$b|@Wlwc^K$H%2ea1>af1yxEi9S!e&4Xku9&W~NFD=JaBHHRpyYD0Dyu6$p>nJieFZcHW
z0nx=kYo1<%CaL$8rl)aT%d8zGazoILrQ%^ScQh}0J>^gKOwaS@_MYcQ-EXeBC9g_U
zH%QvytgOIM`1HCtokG5`M&vSH_L(AcR)8owXf~>;NFVx=BuI;o7?Jd31z2-lv)3dB*?^L+8cREe(ro2K8{dLE
zvw=fsD=imM80fa{1l^-HNE5g)>b?)Hg6~agPgF-bs6z$n-T`|aZCplaKeDVm_qJ(R
z=ztct#V{5n-3KCpa;lX7d%ndUPSBFWs8BsFeGQB^|u^ryMR
zQHN#hR@L_h?k_X-dl$?-V5vB);}ohZ{iLxEuO#hEewEP?*qnGo7(I->eN}3RH_0%F
zS(SI&RlOfH@HTCD$WLkS5R_@0+>M=@7FM3S>GOoz+sq1#wC@L(jTMC&Q!XDvk_4S+
zkG@%-FRoO@?&3UAC%wCM6NtM`^I6n9jlvxaJ8Oa<`V=bqQ86I3?mNrRxDItDZ7&qL
zG?Y!s<@rsDutf0SZfeXjuA(10wlf+g3D0`eX*EvRsbaMOv)793(;A22AfwoDFfD*M
z=>jCtl{PB)nI>paY;zF!ythc=yw1Dim%Oq_7LJm^QiR?ms4FD6m{Y8j{JK+DgWZNl
z2jjYr*l`yW@zF0eIDQY=pfbB0obp3cE-dd{F|bXYRIk?xd?$X3SEpQ+{{*sTV`|}-
zJ0E$S+ps|>n(%`3>R%B-VW6ac8_rJreoHTcM!uQCUPg{fffi7UORJ
zv=9CbAJYx1MEf1B!+_%BhyLWFOzb(4
zoU<{sbSm9rs@XW$#-37hwnj7*Rf&OocAtX%3d+h&
zT+5buDFJH9m+qY?65;`6jzl>0=&LxR1;M7?{F+Zda`sQG_#1BSgGle9UH3F+A9F3k
zjDdHai9FO!s1YGeth2=Ut3HM?$Slny_%R&wThpn3Uing_ik;wwtei%ae}^HWIF?xY
zx%#>e?krRk!VL-hb)j4`b@(kH&rgkrM#fn#P$aGzJ124PcTwOSXoJu_skJopt
zNM?W%Ey6154s#>;HN124WOe6&{y->6;-gM--b+S$i*CM4^8RZ?k*%kHP*-0;wJZ69v`3WXml6VP6
zNi#q~boht{l?u8xfejgryP_^CPfn-S{u-9lM%bY3@D&}s
ztEC`#{tBOC@%tTJ+=9`rx`FKlD+kzx!KZXb#LgXC3wt%cr~T=3=Js7&_uT78+)Q|%
z@i*HCp~h=T@MKb0Z
z7G;+%c)y0W^bdNaQ4>Dstz&+b$ljZv?a4fvJy_}W)5Mp@rR}Wun)%ax(DwUhTs67tc29^N&0r
ztL{FP@?hT48DpH?VoOb((720*xBLG+DEK=-z%DST+|EsYQa%wJt)Q3I#no;fQ-zNI
zr7Zh)dqzf%dVcxJGZV{azfbvW6Yr~URq65crS@R;eUHor7RHK!^<1y;R=Y83rTqF>m?Dd(~>
z{$x*VIbU~B5GNhs`K_ptjhNB{7SASNOy7J1I;$IiZ3(#KXgN7b_@K|8+v&dxRH^ts
ce8P}nql5%NH`(do(x*jNXZJl%olaf*FW(2V+yDRo
literal 0
HcmV?d00001
diff --git a/Fig. 2-Time Series Plot (5-minute interval vs averaged steps taken.png b/Fig. 2-Time Series Plot (5-minute interval vs averaged steps taken.png
new file mode 100644
index 0000000000000000000000000000000000000000..6ac122fafd899f4e99c5b16c0d9ec3ce1bc78898
GIT binary patch
literal 5776
zcmb_g2{=^y-#=r`I-!tl+O%9WEh1x>t8#@jDUoeNH7X)Y9E_Pkk<@S_++?}ZX30gD
zD`N~Q2~8!*Iy2TRnSd`<=w2M{Jbj
zwd4T+P_jLA@E8D~VDWF2Br@~sH?#xt;2%BgWQ{xl00#hM01yGV6o5_Mh*
z01gL`aX92o!I8;0awRSWxqBSMSAc^9!y%x^f$<8FBm+1^j4b9x{!_@vRoO$X6d_Cj
z4vc`JA%>Wnflv&_D+V%rB}%~oDc|*`q~KCg$SIX6DLta10EeO>Q6(9uB;zVYm6amV
zD~7`>M!>6AqE{kOPYSN52jQyh5%u(lL_wC`=w7Is;6?D^eO$;$)|13MHlZr
z?dg4HYBn+QspQMci>DK2_6kLWb4)cU)auOub$t+ozic$K8pqeZo@QuMB(>>PS^f^X
z)gA!z8KC5yZibR=iaIX%et1xoo8v0KfJ;}TJxk>aZlErhIQlDsgr?=9!Ofj0<|#u>
zOY=_^L3MSJ!FQvhBh&G$g}{~1b)IKiW}K=EPSTdTI*EJQgrzOmq7J>9pl7>t2S1<6
z$~^!G#tv1_9H+>nUC__q^b_a-xjP`=Ve3`Y?r0QX_F%^
zWNse|4iT;IvNV%n)@}L|%V?dfOEZtN-w{4Fk1jk|UEo1mvYp@lMi`g^F|QayD0b+(LV
z2^Dx>;vd^@!;A--bZ%{K^Ip-n4GRz7%ULzF>G~t>)-%`m7^?8nufKV%f#euqlJ~f(
zJe+{(*Pm2DgIUNZ7_JfY>&v~F9z^W6@j0Cna~qzFHzD{2BK|W#U2pj(W@{Lny=Q-
z2L#39u+v?d(pTmFF3H*|1KQ|cE2qmcasQ-DR#CTXTDmJiagKSzRtc<=a#TmpAtHd_
z5kiWE|4F6GdI=5nAd6jW^X*pY(s*9Se@&I$Z!j2o`~%7ZDc8TX75q)9D_}Do_K%X%
z0A`lz8CSL>Nmr7Y`X8y5;eRO(2)-*SbIOU#jdJvffK+&Y`q0B2V|OUPn$T+`LTPy|
z@k(Aa`xCJ{w0Bbsh1B<|p|0#%F3}C)0auo6?cbh7acG3|a?cET3xcgiRvFTG(Q<+Q
z=W@EBKl2YMPj#qITP;1WrJG*Yy&1!OQn$~xjw!_)-g#M0TDTCIiWjC)Lqam|1_FVm
zhduWzi=e2;=iKO;mGQl|wcYYVuH!^!SG8N5pyom9>qhrZcVH_(4CC
zmpU1<63K%0M~p{LStshjlX=`$SO*QBKdhAN^_Bw{>Ot=_C0OP+{80TFNF5#%BY#)Y
zY=l@?*M8X>X7n8
zfm_9PGx{EIzb7;G|I3_=0yjv8xlaa^)n8RX&)Bj4hyEnhE3RyNjg%kmSj#mS^6o3V
z`r%H#l(#zcUV2FEP9x8ECl2F4=lpb3QD-{Lel>aeDAx05mUu-`;3-vc$mQ5I+pIFB
zDTbc=#VZX3YKToG-?#sJ0U`Rr425B%RN7?#tWdqu^(&Md$iRjNcrj&}j&co?hhU`p
zK8zaEU~AuA-^#EU5Y6@!oz|Itfatl$nJ)6Bv2ZWV`il`~*^Y^7_&x;a*jMz)ZViSny1zKJGEZB%(oOYReC<_k1c`D`ok@x4Kq(?_q`uf&8
zI@6$Dpj+%tznRzBL$~xG6Qlw`aBk-lY0%5=5`+lYAttxMP4@fQV^C
zm|@X0*p_Z$x0>8!ncbLWbTMsxs}_@9@zm5kOx;nO3CKn$!S$B|y>Aeju!6Sq?6con
zC!QIp*(t%qBF*S~Zdw86`X32B&DVgHb=V50FT(mPNm;1jq&78Cz(z55ZrL}W@$LJo
zz7XO2{+M>xL*nGrH~z!hI4mu7Bys3j|6k6lp$7RaYQ&)Dx#T({gXQ|-ppu_?dxtFI
zu-uS&cd94tPg_ftXPqGnAerNB;FT?rMc?Wk^}2N?JiKbOo0G6X=n;FKP`v&Q$9L1U((&VPQmZhZPQ3Jd;;N1Abm7GZ
z`v$PTaT%iwknI<5)6imiXJBBeV|vVL4(sE=*TEPmoE?pl)(~6ho1o(IAvd@C!+-U&vp2-o#UuLXCDUif;d`0NHKk~R~vr4
zVGW$)?V$jbtA2ozC>cgP=`s`HZthtH(5hIet~I<%g0vdxEJ;xrU-rT0{1eR@20bh1
z)MM(u$F0TgXzt+r{nBgAu6HeJc6(sjuvR2qWKJ$(T8CW>U-$}dWa
zVw<=9jSG&bx2H*C#>i!MB8s2^k6Wv;Wfia_
z!X+OzM~PHPo{7uZZhcP)Yd#C~lT2<|o`3QYeK#`(vmOn$J0GZ6
z%cO|4)G5N>equM#Ge(;lOtpdj2$Nf$R;DrDYVI)^%#>ANRF^7gt)|H?UgMR(2dPXM
zLp{3bMg2CrHHM8g^jsC@v2&~OOTj7InR&Z-HfL@_*Q?{%F3NPYa?V1X3X?}xgdMx&
zcwL=d9&8O?eKxw~HNWHMMMwqBy`9#t!gFx9t@sJ-@aTqG@xfr1C#wJHZQ}{8oa~qC
z_zW3gX7OqGqv0`dZ1{>4iT%3X;B}!wi`}{B05<6{??t$YDyeI~=Sk=Nn`A)O!^+rH
zS>}0z^UkQ!yDd^VHSOcBIj2JGp=@jVybQ@h3VKjd;Cc*Jul}rT4GoNj(=nO)@yR|i
ztf*uy=*01GOuk$y_gH{^stjb$t>!=Q0%Ja`n
zbW8-%xjeR3%j`-rAdscZU%zkx9NDc3eRFRvk73WY0D%gmZ$oHkr6HW8ygeO7Jq5-?
zj^T-?;YB|MmU&AokHGSz1>K@XR2ze3cwW&obbN7mWTzPymc!hP*LY2o;23>XTbb&a
zbHO|z?p{LLhRf^?o`0G~@E2*@s9(=CzQO0;I5$O}ondVx$F0=Z!L|!m^If+rJ(Ac?
zk*ChjTBCAPFw?y%8lBi;SF-_Y7ltwer2d|3LkXaL(WQLorqMAfQ!&T}#8f-*&9ost
zq%aiC?ImI<;UlgMqZ(iPJvyRfIS#k3^^VGMlv1P0Q_k|bns|x)E+yQ}tF!U}0QFF%
zlGjP3?y5{a8_O^q4QwkbpI{-f1ox6n+kVHr)4SOf(9Jk&_>Hq9|HZ6x(k6CtOC^6$
zjz7_qNdo!NRi%e|t`_I+x4Tx7=Wg@)9DI#tnJ4E}BDa<`8|?7Bah`Zs
zfm=%jn7=s76HakM`Q}Se#+2-1jrNNG>CWxO38KHUm&K_n=c?nupf!eVU(tFXYEq4&
z2=kUv-k
z`OMD{b#Qn;y*)G~83%twFC$)O7CXt!$q&V8Y-;fALUUP%)pjeGypQKNZ@x9?^2vs;
z3{yVECcAOFuO^>>1}w`H7RW}t!Nu-EHy;3_6ibAqg$4KcYp3WOn-FX6C#9TEHeiPv
zJBep=xHm>{guDu)bk*gFG)1yhXee<3eqH!5-v2?sTDB=MqA{j4Cpf(d2t-+4sMY$w
zSYNh&eJk`&ZJ1vpL(ws&
z6r}1J2dPQo!%e^3{K{WC(i0~?3rRj8=`n*j;|&^E+1Ayhr~r0V{A$+a+I^B>iTTOR
zLO5|4YBl@$50zaMCvwq@*8CqogQ)CIVvLRq@ig=
zv6>^j@ueAMem3`9|Zc~&%L2$k7vk31%~@9xv~wW0xluvsVeibWkcTDaMOJXpshNjmbN}e6EfLanJdj4
zn3C~Dp}`up9?q@C(JB-n;-e(I3X*blgU1(id0RP0lsKXWC0=5=lzz9MP8#o!E!!A*8c1R&BU6D)G
zSV~@`Guq(DT<e0d|^a
z&+}Zpzs(m$QnS02nM%2jEUiv-3EkO66^<4@Bc)bl@#f)`qsH;2)RXMKI7qo#ONh?s
zhPr84GlT?cs4LWUF3eq<#D3rU7$utivUnDYr|CC~EO=U#Zog!!g!>*%&f{l!YpjU-
z^()MHjsLT_zNZWB??$Rvf%{{Vhtc0~Q0La)qRVS)3_P;6Z;K3ArFEBDHja4p`I;7v
zp684Wr~&Dg8^5({d=e!lXssX{ziTTU2JaI!-1p0_XhAIoze{pM7`g2j4gK*<8RlXk
z)tTmz_UN=0)VA}9^>UMSB2EV0X%ASwRi&e!O7SF(Q1H+HjbS72>H*;q{4D=M|AND0
z@lM_{lGm6cliTqxDY86CT{=1x(YZ^H6*2MWpT8bicwuWdPZC^4;Ym0s*sUfwl)>S={ASOMp5GDV^p_sX
zv!kXuSHG|GC+At?A+}-pg
ziLk<|>yE&?Swz)&mW18v;jpNSEu>o>Q~DVT7une00w_19ug=mwde1<6Tn~}4XR^t(
zX{^&n`m#;|DKn>;Uh^B
ZVt83#98HP)Qv7Av*80f7C;Lyu{u^rCRaO81
literal 0
HcmV?d00001
From 60e19fa953117bbe50376f365d191ead60bd153c Mon Sep 17 00:00:00 2001
From: kks-gt <128405618+kks-gt@users.noreply.github.com>
Date: Sat, 23 Dec 2023 20:11:42 +0600
Subject: [PATCH 6/8] Add files via upload
From 0c1ac33778637d637e187e238eb7f3ba5e203fff Mon Sep 17 00:00:00 2001
From: kks-gt <128405618+kks-gt@users.noreply.github.com>
Date: Sat, 23 Dec 2023 20:12:18 +0600
Subject: [PATCH 7/8] Delete RMarkdown_Project1.Rmd
---
RMarkdown_Project1.Rmd | 133 -----------------------------------------
1 file changed, 133 deletions(-)
delete mode 100644 RMarkdown_Project1.Rmd
diff --git a/RMarkdown_Project1.Rmd b/RMarkdown_Project1.Rmd
deleted file mode 100644
index 473bbef75a5..00000000000
--- a/RMarkdown_Project1.Rmd
+++ /dev/null
@@ -1,133 +0,0 @@
----
-title: "RMarkdown_Project1"
-author: "kks_git"
-date: "2023-12-22"
-output: html_document
----
-
-```{r setup, include=FALSE}
-knitr::opts_chunk$set(echo = TRUE)
-library(ggplot2)
-library(dplyr)
-library(tidyr)
-library(rmarkdown)
-```
-
-A. Loading and preprocessing the data
-
-1. Load the Data
-
-```{r}
-data<-".\\activity.csv"
-dataP1<-read.csv(data, header=TRUE)
-```
-
-2. Process/transform the Data
-
-```{r}
-dataP1$date<-as.Date(x=dataP1$date, format="%Y-%m-%d")
-```
-
-B. What is mean total number of steps taken per day?
-
-1. Calculate the total number of steps taken per day
-
-```{r}
-dailySteps<-aggregate(steps~date, data=dataP1, FUN=sum)
-```
-
-2. Make a histogram of the total number of steps taken each day
-
-```{r}
-steps_hist<-ggplot(data = dailySteps, aes(x=steps)) +geom_histogram(fill="red", binwidth=500)+labs(title="Steps per Day", x="steps",y="frequency")+theme(plot.title = element_text(hjust=0.5))
-steps_hist
-```
-
-3. Calculate and report the mean and median of the total number of steps taken per day
-
-```{r}
-stepsMean<-mean(dailySteps$steps, na.rm=TRUE)
-stepsMean
-stepsMedian<-median(dailySteps$steps, na.rm=TRUE)
-stepsMedian
-```
-
-C. What is the average daily activity pattern?
-
-1. Make a time series plot (i.e.
-type = "l") of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all days (y-axis)
-```{r}
-stepsInterval<-aggregate(steps~interval, data = dataP1, FUN = mean)
-timeSeries_plot<-ggplot(stepsInterval, aes(x=interval, y=steps)) +geom_line(color="blue", linewidth=1)+labs(title="Steps per Interval", x="interval", y="average steps") +theme(plot.title = element_text(hjust=0.5, vjust = 2))
-timeSeries_plot
-```
-
-2. Which 5-minute interval, on average across all the days in the dataset, contains the maximum number of steps?
-```{r}
-maxInterval<-stepsInterval[which.max(stepsInterval$steps),]
-maxInterval
-```
-
-D. Imputing missing values
-
-1. Calculate and report the total number of missing values in the dataset (i.e. the total number of rows with NA NAs)
-```{r}
-totalNAs<-sum(is.na(dataP1$steps))
-totalNAs
-```
-
-2. Devise a strategy for filling in all of the missing values in the dataset. The strategy is to fill all NAs with the average of 5-minute interval.
-```{r}
-meanInterval<-aggregate(steps~interval, data=dataP1, FUN=mean, na.rm=TRUE)
-```
-
-3. Create a new dataset that is equal to the original dataset but with the missing data filled in.
-```{r}
-fill_data<-dataP1
-steps_NAs<-is.na(dataP1$steps)
-NAs<-na.omit(subset(meanInterval,interval==dataP1$interval[steps_NAs]))
-fill_data$steps[steps_NAs]<-NAs[,2]
-fill_stepsNA<-sum(is.na(fill_data))
-fill_stepsNA
-```
-
-4A. Make a histogram of the total number of steps taken each day
-```{r}
-dailySteps_filled<-aggregate(steps~date, data=fill_data, FUN=sum,na.rm=TRUE)
-steps_filled_hist<-ggplot(data = dailySteps_filled, aes(x=steps)) +geom_histogram(fill="green", binwidth=500)+labs(title="Steps per Day with no NAs", x="steps",y="frequency")+theme(plot.title = element_text(hjust=0.5))
-steps_filled_hist
-```
-
-4B. Calculate and report the mean and median total number of steps taken per day. Do these values differ from the estimates from the first part of the assignment? What is the impact of imputing missing data on the estimates of the total daily number of steps?
-```{r}
-steps_filledMean<-mean(dailySteps_filled$steps, na.rm=TRUE)
-steps_filledMean
-steps_filledMedian<-median(dailySteps_filled$steps, na.rm=TRUE)
-steps_filledMedian
-```
-
-4C.Do these values differ from the estimates from the first part of the assignment? What is the impact of imputing missing data on the estimates of the total daily number of steps?
-```{r}
-"The mean values are same for both cases, but the median was higher for data with NAs. However, the mean and median become equal after filling NAs."
-```
-
-E. Are there differences in activity patterns between weekdays and weekends?
-
-1.Create a new factor variable in the dataset with two levels -- "weekday" and "weekend" indicating whether a given date is a weekday or weekend day.
-```{r}
-fill_data$date<-as.Date(fill_data$date)
-wday=c("Monday", "Tuesday","Wednesday", "Thursday","Friday")
-fill_data$day<-factor(ifelse(weekdays(fill_data$date) %in% wday,'weekday','weekend'))
-```
-
-2. Create a new factor variable in the dataset with two levels -- "weekday" and "weekend" indicating whether a given date is a weekday or weekend day.
-```{r}
-meanSteps_days<-aggregate(steps~interval+day, data = fill_data, FUN = mean, na.rm=TRUE)
-timeSeries_daySteps<-ggplot(meanSteps_days, aes(x=interval, y=steps,color=day)) +geom_line()+facet_grid(day~.)+labs(title="Steps by Days", x="interval", y="average steps") +theme(plot.title = element_text(hjust=0.5, vjust = 2))
-timeSeries_daySteps
-```
-
-F. Convert to html
-```{r}
-```
-
From af0b8bfcd8c449bbb36c70b7f09fa955c59770f0 Mon Sep 17 00:00:00 2001
From: kks-gt <128405618+kks-gt@users.noreply.github.com>
Date: Sat, 23 Dec 2023 20:12:55 +0600
Subject: [PATCH 8/8] Add files via upload
---
RMarkdown_Project1.Rmd | 133 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 133 insertions(+)
create mode 100644 RMarkdown_Project1.Rmd
diff --git a/RMarkdown_Project1.Rmd b/RMarkdown_Project1.Rmd
new file mode 100644
index 00000000000..473bbef75a5
--- /dev/null
+++ b/RMarkdown_Project1.Rmd
@@ -0,0 +1,133 @@
+---
+title: "RMarkdown_Project1"
+author: "kks_git"
+date: "2023-12-22"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+library(ggplot2)
+library(dplyr)
+library(tidyr)
+library(rmarkdown)
+```
+
+A. Loading and preprocessing the data
+
+1. Load the Data
+
+```{r}
+data<-".\\activity.csv"
+dataP1<-read.csv(data, header=TRUE)
+```
+
+2. Process/transform the Data
+
+```{r}
+dataP1$date<-as.Date(x=dataP1$date, format="%Y-%m-%d")
+```
+
+B. What is mean total number of steps taken per day?
+
+1. Calculate the total number of steps taken per day
+
+```{r}
+dailySteps<-aggregate(steps~date, data=dataP1, FUN=sum)
+```
+
+2. Make a histogram of the total number of steps taken each day
+
+```{r}
+steps_hist<-ggplot(data = dailySteps, aes(x=steps)) +geom_histogram(fill="red", binwidth=500)+labs(title="Steps per Day", x="steps",y="frequency")+theme(plot.title = element_text(hjust=0.5))
+steps_hist
+```
+
+3. Calculate and report the mean and median of the total number of steps taken per day
+
+```{r}
+stepsMean<-mean(dailySteps$steps, na.rm=TRUE)
+stepsMean
+stepsMedian<-median(dailySteps$steps, na.rm=TRUE)
+stepsMedian
+```
+
+C. What is the average daily activity pattern?
+
+1. Make a time series plot (i.e.
+type = "l") of the 5-minute interval (x-axis) and the average number of steps taken, averaged across all days (y-axis)
+```{r}
+stepsInterval<-aggregate(steps~interval, data = dataP1, FUN = mean)
+timeSeries_plot<-ggplot(stepsInterval, aes(x=interval, y=steps)) +geom_line(color="blue", linewidth=1)+labs(title="Steps per Interval", x="interval", y="average steps") +theme(plot.title = element_text(hjust=0.5, vjust = 2))
+timeSeries_plot
+```
+
+2. Which 5-minute interval, on average across all the days in the dataset, contains the maximum number of steps?
+```{r}
+maxInterval<-stepsInterval[which.max(stepsInterval$steps),]
+maxInterval
+```
+
+D. Imputing missing values
+
+1. Calculate and report the total number of missing values in the dataset (i.e. the total number of rows with NA NAs)
+```{r}
+totalNAs<-sum(is.na(dataP1$steps))
+totalNAs
+```
+
+2. Devise a strategy for filling in all of the missing values in the dataset. The strategy is to fill all NAs with the average of 5-minute interval.
+```{r}
+meanInterval<-aggregate(steps~interval, data=dataP1, FUN=mean, na.rm=TRUE)
+```
+
+3. Create a new dataset that is equal to the original dataset but with the missing data filled in.
+```{r}
+fill_data<-dataP1
+steps_NAs<-is.na(dataP1$steps)
+NAs<-na.omit(subset(meanInterval,interval==dataP1$interval[steps_NAs]))
+fill_data$steps[steps_NAs]<-NAs[,2]
+fill_stepsNA<-sum(is.na(fill_data))
+fill_stepsNA
+```
+
+4A. Make a histogram of the total number of steps taken each day
+```{r}
+dailySteps_filled<-aggregate(steps~date, data=fill_data, FUN=sum,na.rm=TRUE)
+steps_filled_hist<-ggplot(data = dailySteps_filled, aes(x=steps)) +geom_histogram(fill="green", binwidth=500)+labs(title="Steps per Day with no NAs", x="steps",y="frequency")+theme(plot.title = element_text(hjust=0.5))
+steps_filled_hist
+```
+
+4B. Calculate and report the mean and median total number of steps taken per day. Do these values differ from the estimates from the first part of the assignment? What is the impact of imputing missing data on the estimates of the total daily number of steps?
+```{r}
+steps_filledMean<-mean(dailySteps_filled$steps, na.rm=TRUE)
+steps_filledMean
+steps_filledMedian<-median(dailySteps_filled$steps, na.rm=TRUE)
+steps_filledMedian
+```
+
+4C.Do these values differ from the estimates from the first part of the assignment? What is the impact of imputing missing data on the estimates of the total daily number of steps?
+```{r}
+"The mean values are same for both cases, but the median was higher for data with NAs. However, the mean and median become equal after filling NAs."
+```
+
+E. Are there differences in activity patterns between weekdays and weekends?
+
+1.Create a new factor variable in the dataset with two levels -- "weekday" and "weekend" indicating whether a given date is a weekday or weekend day.
+```{r}
+fill_data$date<-as.Date(fill_data$date)
+wday=c("Monday", "Tuesday","Wednesday", "Thursday","Friday")
+fill_data$day<-factor(ifelse(weekdays(fill_data$date) %in% wday,'weekday','weekend'))
+```
+
+2. Create a new factor variable in the dataset with two levels -- "weekday" and "weekend" indicating whether a given date is a weekday or weekend day.
+```{r}
+meanSteps_days<-aggregate(steps~interval+day, data = fill_data, FUN = mean, na.rm=TRUE)
+timeSeries_daySteps<-ggplot(meanSteps_days, aes(x=interval, y=steps,color=day)) +geom_line()+facet_grid(day~.)+labs(title="Steps by Days", x="interval", y="average steps") +theme(plot.title = element_text(hjust=0.5, vjust = 2))
+timeSeries_daySteps
+```
+
+F. Convert to html
+```{r}
+```
+