Data Science Using R
Data Science Using R
Call:
lm(formula = mpg ~ wt, data = mtcars)
Residuals:
Min 1Q Median 3Q Max
-4.5432 -2.3647 -0.1252 1.4096 6.8727
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 37.2851 1.8776 19.858 < 2e-16 ***
wt -5.3445 0.5591 -9.559 1.29e-10 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘
’ 1
> abline(fit,col=3,lwd=2)
> mtext(lmlab, 3, line=-2)
Error in as.graphicsAnnot(text) : object 'lmlab' not found
> # Create a vector.
> x <- c(12,7,3,4.2,18,2,54,-21,8,-5)
> # Find Mean.
> result.mean <- mean(x)
> print(result.mean)
[1] 8.22
> # Create the function.
> getmode <- function(v) {
+ uniqv <- unique(v)
+ uniqv[which.max(tabulate(match(v, uniqv)))]
+ }
> # Create the vector with numbers.
> v <- c(2,1,2,3,1,2,3,4,1,5,5,3,2,3)
> # Calculate the mode using the user function.
> result <- getmode(v)
> print(result)
[1] 2
> # Create the vector with characters.
> charv <- c("o","it","the","it","it")
> # Calculate the mode using the user function.
> result <- getmode(charv)
> print(result)
[1] "it"
> x <- c(151, 174, 138, 186, 128, 136, 179, 163, 152, 131)
> y <- c(63, 81, 56, 91, 47, 57, 76, 72, 62, 48)
> # Apply the lm() function.
> relation <- lm(y~x)
> print(relation)
Call:
lm(formula = y ~ x)
Coefficients:
(Intercept) x
-38.4551 0.6746
> # Apply the lm() function.
> relation <- lm(y~x)
> print(summary(relation))
Call:
lm(formula = y ~ x)
Residuals:
Min 1Q Median 3Q Max
-6.3002 -1.6629 0.0412 1.8944 3.9775
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -38.45509 8.04901 -4.778 0.00139 **
x 0.67461 0.05191 12.997 1.16e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘
’ 1
[[2]]
[1] 21.3
[[3]]
function (x) .Primitive("sin")
, , 2
> # Access the list element using the name of the element.
> print(list_data$A_Matrix)
[,1] [,2] [,3]
[1,] 3 5 -2
[2,] 9 1 8
> # Define the column and row names.
> rownames = c("row1", "row2", "row3", "row4")
> colnames = c("col1", "col2", "col3")
> # Create the matrix.
> P <- matrix(c(3:14), nrow = 4, byrow = TRUE, dimnames =
list(rownames, colnames))
> # Access the element at 3rd column and 1st row.
> print(P[1,3])
[1] 5
> # Access the element at 2nd column and 4th row.
> print(P[4,2])
[1] 13
> # Access only the 2nd row.
> print(P[2,])
col1 col2 col3
6 7 8
> # Access only the 3rd column.
> print(P[,3])
row1 row2 row3 row4
5 8 11 14
> # Create two vectors of different lengths.
> vector1 <- c(5,9,3)
> vector2 <- c(10,11,12,13,14,15)
> # Take these vectors as input to the array.
> array1 <- array(c(vector1,vector2),dim = c(3,3,2))
> # Create two vectors of different lengths.
> vector3 <- c(9,1,0)
> vector4 <- c(6,0,11,3,14,1,2,6,9)
> array2 <- array(c(vector1,vector2),dim = c(3,3,2))
> # create matrices from these arrays.
> matrix1 <- array1[,,2]
> matrix2 <- array2[,,2]
> # Add the matrices.
> result <- matrix1+matrix2
> print(result)
[,1] [,2] [,3]
[1,] 10 20 26
[2,] 18 22 28
[3,] 6 24 30
> # Create the vectors for data frame.
> height <- c(132,151,162,139,166,147,122)
> weight <- c(48,49,66,53,67,52,40)
> gender <-
c("male","male","female","female","male","female","male")
> # Create the data frame.
> input_data <- data.frame(height,weight,gender)
> print(input_data)
height weight gender
1 132 48 male
2 151 49 male
3 162 66 female
4 139 53 female
5 166 67 male
6 147 52 female
7 122 40 male
> # Test if the gender column is a factor.
> print(is.factor(input_data$gender))
[1] FALSE
> # Print the gender column so see the levels.
> print(input_data$gender)
[1] "male" "male" "female" "female" "male" "female"
"male"
> # Create the data frame.
> emp.data <- data.frame(
+ emp_id = c (1:5),
+ emp_name = c("Rick","Dan","Michelle","Ryan","Gary"),
+ salary = c(623.3,515.2,611.0,729.0,843.25),
+
+ start_date = as.Date(c("2012-01-01", "2013-09-23",
"2014-11-15", "2014-05-11",
+ "2015-03-27")),
+ stringsAsFactors = FALSE
+ )
> # Print the summary.
> print(summary(emp.data))
emp_id emp_name salary
start_date
Min. :1 Length:5 Min. :515.2 Min.
:2012-01-01
1st Qu.:2 Class :character 1st Qu.:611.0 1st
Qu.:2013-09-23
Median :3 Mode :character Median :623.3 Median
:2014-05-11
Mean :3 Mean :664.4 Mean
:2014-01-14
3rd Qu.:4 3rd Qu.:729.0 3rd
Qu.:2014-11-15
Max. :5 Max. :843.2 Max.
:2015-03-27
>
> # Create vector objects.
> city <- c("Tampa","Seattle","Hartford","Denver")
> state <- c("FL","WA","CT","CO")
> zipcode <- c(33602,98104,06161,80294)
> # Combine above three vectors into one data frame.
> addresses <- cbind(city,state,zipcode)
> # Print a header.
> cat("# # # # The First data frame\n")
# # # # The First data frame
> # Print the data frame.
> print(addresses)
city state zipcode
[1,] "Tampa" "FL" "33602"
[2,] "Seattle" "WA" "98104"
[3,] "Hartford" "CT" "6161"
[4,] "Denver" "CO" "80294"
> # Create another data frame with similar columns
> new.address <- data.frame(
+ city = c("Lowry","Charlotte"),
+ state = c("CO","FL"),
+ zipcode = c("80230","33949"),
+ stringsAsFactors = FALSE
+ )
> # Print a header.
> cat("# # # The Second data frame\n")
# # # The Second data frame
> # Print the data frame.
> print(new.address)
city state zipcode
1 Lowry CO 80230
2 Charlotte FL 33949
> # Combine rows form both the data frames.
> all.addresses <- rbind(addresses,new.address)
> # Print a header.
> cat("# # # The combined data frame\n")
# # # The combined data frame
> # Print the result.
> print(all.addresses)
city state zipcode
1 Tampa FL 33602
2 Seattle WA 98104
3 Hartford CT 6161
4 Denver CO 80294
5 Lowry CO 80230
6 Charlotte FL 33949