diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000000..5b6a0652566 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/PA1_template.Rmd b/PA1_template.Rmd index d5cc677c93d..45a46623579 100644 --- a/PA1_template.Rmd +++ b/PA1_template.Rmd @@ -5,21 +5,107 @@ output: keep_md: true --- +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE) +``` +# Activity Monitoring Data Analysis -## Loading and preprocessing the data +## 0. Load the necessary library +```{r} +library(utils) +library(ggplot2) +``` +## 1. Loading and preprocessing the data +```{r} +# Read the CSV file directly from the zipped archive +zipFilePath <- "activity.zip" +csvFileName <- "activity.csv" +data <- read.csv(unzip(zipFilePath, files = csvFileName)) +# Aggregate total steps by date +totalStepsByDay <- aggregate(steps ~ date, data, sum, na.rm = TRUE) +``` -## What is mean total number of steps taken per day? +## 2. What is mean total number of steps taken per day? +```{r} +# Make a histogram of the total number of steps taken each day +hist(totalStepsByDay$steps, main = "Total Steps Taken Each Day", xlab = "Total Steps", ylab = "Frequency", col = "blue") +# Calculate and report the mean and median total number of steps taken per day +meanSteps <- mean(totalStepsByDay$steps) +medianSteps <- median(totalStepsByDay$steps) +# Print the results +cat("Mean total number of steps taken per day: ", meanSteps, "\n") +cat("Median total number of steps taken per day: ", medianSteps, "\n") +``` -## What is the average daily activity pattern? +## 3. What is the average daily activity pattern? +```{r} +# Calculate the average number of steps taken in each 5-minute interval +averageStepsByInterval <- aggregate(steps ~ interval, data, mean, na.rm = TRUE) +# Make a time series plot +plot(averageStepsByInterval$interval, averageStepsByInterval$steps, type = "l", xlab = "Interval", ylab = "Average Number of Steps", main = "Average Daily Activity Pattern") +# Identify the interval with the maximum average number of steps +maxInterval <- averageStepsByInterval[which.max(averageStepsByInterval$steps), ]$interval -## Imputing missing values +# Print the result +cat("The 5-minute interval with the maximum number of steps on average is:", maxInterval) +``` +## 4. Imputing missing values +```{r} +# Calculate and report the total number of missing values +totalNAs <- sum(is.na(data$steps)) +cat("Total number of missing values: ", totalNAs, "\n") +# Calculate the mean for each interval +averageStepsByInterval <- aggregate(steps ~ interval, data, mean, na.rm = TRUE) -## Are there differences in activity patterns between weekdays and weekends? +# Fill in missing values and create a new dataset +filledData <- data +for (i in 1:nrow(filledData)) { + if (is.na(filledData$steps[i])) { + filledData$steps[i] <- averageStepsByInterval$steps[which(averageStepsByInterval$interval == filledData$interval[i])] + } +} + +# Aggregate total steps by day for the filled dataset +totalStepsByDayFilled <- aggregate(steps ~ date, filledData, sum) + +# Make a histogram +hist(totalStepsByDayFilled$steps, main = "Total Steps Taken Each Day (Filled Data)", xlab = "Total Steps", ylab = "Frequency", col = "green") + +# Calculate mean and median +meanStepsFilled <- mean(totalStepsByDayFilled$steps) +medianStepsFilled <- median(totalStepsByDayFilled$steps) + +# Print the results +cat("Mean total number of steps taken per day (Filled Data): ", meanStepsFilled, "\n") +cat("Median total number of steps taken per day (Filled Data): ", medianStepsFilled, "\n") +``` + +## 5. Are there differences in activity patterns between weekdays and weekends? +```{r} +# Convert 'date' to Date type if it's not already +filledData$date <- as.Date(filledData$date) + +# Create a new factor variable for weekday/weekend +filledData$dayType <- ifelse(weekdays(filledData$date, abbreviate = TRUE) %in% c("土", "日"), "weekend", "weekday") +filledData$dayType <- factor(filledData$dayType, levels = c("weekday", "weekend")) + +# Aggregate average steps by interval and dayType again to reflect the updated dayType +averageStepsByDayType <- aggregate(steps ~ interval + dayType, filledData, mean) + +# Create the plot +ggplot(averageStepsByDayType, aes(x = interval, y = steps, color = dayType)) + + geom_line() + + xlab("Interval") + + ylab("Average Number of Steps") + + ggtitle("Average Number of Steps by Interval: Weekday vs Weekend") + + scale_color_manual(values = c("weekday" = "blue", "weekend" = "red")) + + theme(legend.position = "bottom") +``` \ No newline at end of file diff --git a/PA1_template.html b/PA1_template.html new file mode 100644 index 00000000000..8b48a3761f4 --- /dev/null +++ b/PA1_template.html @@ -0,0 +1,509 @@ + + + + + + + + + + + + + +Reproducible Research: Peer Assessment 1 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +
+

Activity Monitoring Data Analysis

+
+

0. Load the necessary library

+
library(utils)
+library(ggplot2)
+
## Warning: パッケージ 'ggplot2' はバージョン 4.3.2 の R の下で造られました
+
+
+

1. Loading and preprocessing the data

+
# Read the CSV file directly from the zipped archive
+zipFilePath <- "activity.zip"
+csvFileName <- "activity.csv"
+data <- read.csv(unzip(zipFilePath, files = csvFileName))
+
+# Aggregate total steps by date
+totalStepsByDay <- aggregate(steps ~ date, data, sum, na.rm = TRUE)
+
+
+

2. What is mean total number of steps taken per day?

+
# Make a histogram of the total number of steps taken each day
+hist(totalStepsByDay$steps, main = "Total Steps Taken Each Day", xlab = "Total Steps", ylab = "Frequency", col = "blue")
+

+
# Calculate and report the mean and median total number of steps taken per day
+meanSteps <- mean(totalStepsByDay$steps)
+medianSteps <- median(totalStepsByDay$steps)
+
+# Print the results
+cat("Mean total number of steps taken per day: ", meanSteps, "\n")
+
## Mean total number of steps taken per day:  10766.19
+
cat("Median total number of steps taken per day: ", medianSteps, "\n")
+
## Median total number of steps taken per day:  10765
+
+
+

3. What is the average daily activity pattern?

+
# Calculate the average number of steps taken in each 5-minute interval
+averageStepsByInterval <- aggregate(steps ~ interval, data, mean, na.rm = TRUE)
+
+# Make a time series plot
+plot(averageStepsByInterval$interval, averageStepsByInterval$steps, type = "l", xlab = "Interval", ylab = "Average Number of Steps", main = "Average Daily Activity Pattern")
+

+
# Identify the interval with the maximum average number of steps
+maxInterval <- averageStepsByInterval[which.max(averageStepsByInterval$steps), ]$interval
+
+# Print the result
+cat("The 5-minute interval with the maximum number of steps on average is:", maxInterval)
+
## The 5-minute interval with the maximum number of steps on average is: 835
+
+
+

4. Imputing missing values

+
# Calculate and report the total number of missing values
+totalNAs <- sum(is.na(data$steps))
+cat("Total number of missing values: ", totalNAs, "\n")
+
## Total number of missing values:  2304
+
# Calculate the mean for each interval
+averageStepsByInterval <- aggregate(steps ~ interval, data, mean, na.rm = TRUE)
+
+# Fill in missing values and create a new dataset
+filledData <- data
+for (i in 1:nrow(filledData)) {
+  if (is.na(filledData$steps[i])) {
+    filledData$steps[i] <- averageStepsByInterval$steps[which(averageStepsByInterval$interval == filledData$interval[i])]
+  }
+}
+
+# Aggregate total steps by day for the filled dataset
+totalStepsByDayFilled <- aggregate(steps ~ date, filledData, sum)
+
+# Make a histogram
+hist(totalStepsByDayFilled$steps, main = "Total Steps Taken Each Day (Filled Data)", xlab = "Total Steps", ylab = "Frequency", col = "green")
+

+
# Calculate mean and median
+meanStepsFilled <- mean(totalStepsByDayFilled$steps)
+medianStepsFilled <- median(totalStepsByDayFilled$steps)
+
+# Print the results
+cat("Mean total number of steps taken per day (Filled Data): ", meanStepsFilled, "\n")
+
## Mean total number of steps taken per day (Filled Data):  10766.19
+
cat("Median total number of steps taken per day (Filled Data): ", medianStepsFilled, "\n")
+
## Median total number of steps taken per day (Filled Data):  10766.19
+
+
+

5. Are there differences in activity patterns between weekdays and +weekends?

+
# Convert 'date' to Date type if it's not already
+filledData$date <- as.Date(filledData$date)
+
+# Create a new factor variable for weekday/weekend
+filledData$dayType <- ifelse(weekdays(filledData$date, abbreviate = TRUE) %in% c("土", "日"), "weekend", "weekday")
+filledData$dayType <- factor(filledData$dayType, levels = c("weekday", "weekend"))
+
+# Aggregate average steps by interval and dayType again to reflect the updated dayType
+averageStepsByDayType <- aggregate(steps ~ interval + dayType, filledData, mean)
+
+# Create the plot
+ggplot(averageStepsByDayType, aes(x = interval, y = steps, color = dayType)) +
+  geom_line() +
+  xlab("Interval") +
+  ylab("Average Number of Steps") +
+  ggtitle("Average Number of Steps by Interval: Weekday vs Weekend") +
+  scale_color_manual(values = c("weekday" = "blue", "weekend" = "red")) +
+  theme(legend.position = "bottom")
+

+
+
+ + + + +
+ + + + + + + + + + + + + + + diff --git a/PA1_template.md b/PA1_template.md new file mode 100644 index 00000000000..d09b10daf44 --- /dev/null +++ b/PA1_template.md @@ -0,0 +1,165 @@ +--- +title: "Reproducible Research: Peer Assessment 1" +output: + html_document: + keep_md: true +--- + + +# Activity Monitoring Data Analysis + + +## 0. Load the necessary library + +```r +library(utils) +library(ggplot2) +``` + +``` +## Warning: パッケージ 'ggplot2' はバージョン 4.3.2 の R の下で造られました +``` +## 1. Loading and preprocessing the data + +```r +# Read the CSV file directly from the zipped archive +zipFilePath <- "activity.zip" +csvFileName <- "activity.csv" +data <- read.csv(unzip(zipFilePath, files = csvFileName)) + +# Aggregate total steps by date +totalStepsByDay <- aggregate(steps ~ date, data, sum, na.rm = TRUE) +``` + +## 2. What is mean total number of steps taken per day? + +```r +# Make a histogram of the total number of steps taken each day +hist(totalStepsByDay$steps, main = "Total Steps Taken Each Day", xlab = "Total Steps", ylab = "Frequency", col = "blue") +``` + +![](PA1_template_files/figure-html/unnamed-chunk-3-1.png) + +```r +# Calculate and report the mean and median total number of steps taken per day +meanSteps <- mean(totalStepsByDay$steps) +medianSteps <- median(totalStepsByDay$steps) + +# Print the results +cat("Mean total number of steps taken per day: ", meanSteps, "\n") +``` + +``` +## Mean total number of steps taken per day: 10766.19 +``` + +```r +cat("Median total number of steps taken per day: ", medianSteps, "\n") +``` + +``` +## Median total number of steps taken per day: 10765 +``` + +## 3. What is the average daily activity pattern? + +```r +# Calculate the average number of steps taken in each 5-minute interval +averageStepsByInterval <- aggregate(steps ~ interval, data, mean, na.rm = TRUE) + +# Make a time series plot +plot(averageStepsByInterval$interval, averageStepsByInterval$steps, type = "l", xlab = "Interval", ylab = "Average Number of Steps", main = "Average Daily Activity Pattern") +``` + +![](PA1_template_files/figure-html/unnamed-chunk-4-1.png) + +```r +# Identify the interval with the maximum average number of steps +maxInterval <- averageStepsByInterval[which.max(averageStepsByInterval$steps), ]$interval + +# Print the result +cat("The 5-minute interval with the maximum number of steps on average is:", maxInterval) +``` + +``` +## The 5-minute interval with the maximum number of steps on average is: 835 +``` + +## 4. Imputing missing values + +```r +# Calculate and report the total number of missing values +totalNAs <- sum(is.na(data$steps)) +cat("Total number of missing values: ", totalNAs, "\n") +``` + +``` +## Total number of missing values: 2304 +``` + +```r +# Calculate the mean for each interval +averageStepsByInterval <- aggregate(steps ~ interval, data, mean, na.rm = TRUE) + +# Fill in missing values and create a new dataset +filledData <- data +for (i in 1:nrow(filledData)) { + if (is.na(filledData$steps[i])) { + filledData$steps[i] <- averageStepsByInterval$steps[which(averageStepsByInterval$interval == filledData$interval[i])] + } +} + +# Aggregate total steps by day for the filled dataset +totalStepsByDayFilled <- aggregate(steps ~ date, filledData, sum) + +# Make a histogram +hist(totalStepsByDayFilled$steps, main = "Total Steps Taken Each Day (Filled Data)", xlab = "Total Steps", ylab = "Frequency", col = "green") +``` + +![](PA1_template_files/figure-html/unnamed-chunk-5-1.png) + +```r +# Calculate mean and median +meanStepsFilled <- mean(totalStepsByDayFilled$steps) +medianStepsFilled <- median(totalStepsByDayFilled$steps) + +# Print the results +cat("Mean total number of steps taken per day (Filled Data): ", meanStepsFilled, "\n") +``` + +``` +## Mean total number of steps taken per day (Filled Data): 10766.19 +``` + +```r +cat("Median total number of steps taken per day (Filled Data): ", medianStepsFilled, "\n") +``` + +``` +## Median total number of steps taken per day (Filled Data): 10766.19 +``` + +## 5. Are there differences in activity patterns between weekdays and weekends? + +```r +# Convert 'date' to Date type if it's not already +filledData$date <- as.Date(filledData$date) + +# Create a new factor variable for weekday/weekend +filledData$dayType <- ifelse(weekdays(filledData$date, abbreviate = TRUE) %in% c("土", "日"), "weekend", "weekday") +filledData$dayType <- factor(filledData$dayType, levels = c("weekday", "weekend")) + +# Aggregate average steps by interval and dayType again to reflect the updated dayType +averageStepsByDayType <- aggregate(steps ~ interval + dayType, filledData, mean) + +# Create the plot +ggplot(averageStepsByDayType, aes(x = interval, y = steps, color = dayType)) + + geom_line() + + xlab("Interval") + + ylab("Average Number of Steps") + + ggtitle("Average Number of Steps by Interval: Weekday vs Weekend") + + scale_color_manual(values = c("weekday" = "blue", "weekend" = "red")) + + theme(legend.position = "bottom") +``` + +![](PA1_template_files/figure-html/unnamed-chunk-6-1.png) diff --git a/PA1_template_files/figure-html/unnamed-chunk-3-1.png b/PA1_template_files/figure-html/unnamed-chunk-3-1.png new file mode 100644 index 00000000000..c4e15e01a82 Binary files /dev/null and b/PA1_template_files/figure-html/unnamed-chunk-3-1.png differ diff --git a/PA1_template_files/figure-html/unnamed-chunk-4-1.png b/PA1_template_files/figure-html/unnamed-chunk-4-1.png new file mode 100644 index 00000000000..8331abcc99e Binary files /dev/null and b/PA1_template_files/figure-html/unnamed-chunk-4-1.png differ diff --git a/PA1_template_files/figure-html/unnamed-chunk-5-1.png b/PA1_template_files/figure-html/unnamed-chunk-5-1.png new file mode 100644 index 00000000000..3664e3b802b Binary files /dev/null and b/PA1_template_files/figure-html/unnamed-chunk-5-1.png differ diff --git a/PA1_template_files/figure-html/unnamed-chunk-6-1.png b/PA1_template_files/figure-html/unnamed-chunk-6-1.png new file mode 100644 index 00000000000..4c57fbc78b5 Binary files /dev/null and b/PA1_template_files/figure-html/unnamed-chunk-6-1.png differ diff --git a/RepData_PeerAssessment1.Rproj b/RepData_PeerAssessment1.Rproj new file mode 100644 index 00000000000..8e3c2ebc99e --- /dev/null +++ b/RepData_PeerAssessment1.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX