Tarea 8 U3 Robin Javier Ortega
Tarea 8 U3 Robin Javier Ortega
Tarea 8 U3 Robin Javier Ortega
Robin Ortega
Zambrano
Fecha:
#librerias necesarias
library(readr)
library(tidyverse)
## Conflicts - -
tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(BSDA)
##
## Attaching package: 'BSDA'
library(mlbench)
## Warning: package 'mlbench' was built under R version 4.0.5
library(readxl)
#1. Una persona se sometio a una prueba para detectar una pandemia
generalizada. El medico
#hizo un diagnostico clinico de que la persona no tiene esta enfermedad. Mas
tarde, cuando
#se realizo un analisis de sangre, salio positivo. ?Cual de los siguientes
errores ha cometido
#el medico?
##
## Column specification -
## cols(
## NEGOCIO = col_character(),
## SECTOR = col_character(),
## VENTAS_PRECOVID = col_number(),
## VENTAS_POSTCOVID = col_number()
## )
#a. El promedio de las ventas pre emergencia sanitaria es igual a 2000
dolares?
##
## One Sample t-test
##
## data: ventas$VENTAS_PRECOVID
## t = 64.992, df = 99, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 2000
## 95 percent confidence interval:
## 190768.6 202657.9
## sample estimates:
## mean of x
## 196713.2
#b. El promedio de las ventas del SECTOR1 es mayor al promedio de las ventas
del
#SECTOR2 previo a la emergencia sanitaria?
#H0: u1 > u2
#H1: u1 <= u2
vventas<-select(ventas,SECTOR,VENTAS_PRECOVID)
sect1<-filter(vventas, SECTOR %in% c("SECTOR1"))
sect2<-filter(vventas, SECTOR %in% c("SECTOR2"))
#prueba
t.test(x=sect1$VENTAS_PRECOVID, sigma.x = sd(sect1$VENTAS_PRECOVID),
y=sect2$VENTAS_PRECOVID, sigma.y = sd(sect2$VENTAS_PRECOVID),
alternative = "greater", conf.level = 0.95)
##
## Welch Two Sample t-test
##
## data: sect1$VENTAS_PRECOVID and sect2$VENTAS_PRECOVID
## t = 1.9102, df = 93.966, p-value = 0.02958
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 1457.122 Inf
## sample estimates:
## mean of x mean of y
## 202079.6 190899.8
desviacion = sd(ventas$VENTAS_POSTCOVID)
t.test(ventas$VENTAS_POSTCOVID, mu = 2000, sigma.x = desviacion, conf.level =
0.95)
##
## One Sample t-test
##
## data: ventas$VENTAS_POSTCOVID
## t = 74.751, df = 99, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 2000
## 95 percent confidence interval:
## 143355 151064
## sample estimates:
## mean of x
## 147209.5
#d. El promedio de las ventas del SECTOR1 es igual al promedio de las ventas
del
#SECTOR2 posterior a la emergencia sanitaria?
#H0: u1 es igual a u2
#H1: u1 es diferente a u2
posterior<-select(ventas,SECTOR,VENTAS_POSTCOVID)
sec1<-filter(posterior, SECTOR %in% c("SECTOR1"))
sec2<-filter(posterior, SECTOR %in% c("SECTOR2"))
##
## Two-sample z-Test
##
## data: sec1$VENTAS_POSTCOVID and sec2$VENTAS_POSTCOVID
## z = 1.2856, p-value = 0.09929
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -1387.858 NA
## sample estimates:
## mean of x mean of y
## 149593.4 144626.8
##
## Two-sample z-Test
##
## data: ventas$VENTAS_PRECOVID and ventas$VENTAS_POSTCOVID
## z = 13.864, p-value < 2.2e-16
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 43630.62 NA
## sample estimates:
## mean of x mean of y
## 196713.2 147209.5
## cols(
## SEXO = col_character(),
## PREV_TRAT = col_number(),
## POST_TRAT = col_number()
## )
desvest4ej=sd(mujeres$PREV_TRAT)
t.test(mujeres$PREV_TRAT, mu = 70, sigma.x = desvest4ej, conf.level = 0.95)
##
## One Sample t-test
##
## data: mujeres$PREV_TRAT
## t = 34.275, df = 29, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 70
## 95 percent confidence interval:
## 6687.402 7527.264
## sample estimates:
## mean of x
## 7107.333
##
## Two-sample z-Test
##
## data: hombres$PREV_TRAT and mujeres$PREV_TRAT
## z = 3.0544, p-value = 0.001127
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 401.4935 NA
## sample estimates:
## mean of x mean of y
## 7977.333 7107.333
##
## One-sample z-Test
##
## data: hombres$PREV_TRAT
## z = 40.004, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 80
## 95 percent confidence interval:
## 7590.411 8364.256
## sample estimates:
## mean of x
## 7977.333
##
## Welch Two Sample t-test
##
## data: mujeres$POST_TRAT and hombres$POST_TRAT
## t = -6.932, df = 56.65, p-value = 1
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -1221.783 Inf
## sample estimates:
## mean of x mean of y
## 6538.367 7522.700
##
## Two-sample z-Test
##
## data: pesotratamiento$PREV_TRAT and pesotratamiento$POST_TRAT
## z = 2.8519, p-value = 0.002173
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 216.6108 NA
## sample estimates:
## mean of x mean of y
## 7542.333 7030.533
## cols(
## guest = col_double(),
## accommodation = col_double(),
## stay = col_double(),
## diversity = col_double(),
## waitingtime = col_double(),
## safety = col_double(),
## quality = col_double(),
## satisfaction = col_double(),
## price = col_double(),
## expenses = col_double(),
## recommendation = col_double(),
## skiholiday = col_double(),
## sex = col_double(),
## country = col_double(),
## age = col_double(),
## education = col_double()
## )
##
## One-sample z-Test
##
## data: turismo$satisfaction
## z = 2.5887, p-value = 0.009635
## alternative hypothesis: true mean is not equal to 60
## 95 percent confidence interval:
## 60.88403 66.39597
## sample estimates:
## mean of x
## 63.64
##
## Two-sample z-Test
##
## data: gastodemujeres$expenses and gastodehombres$expenses
## z = -5.7641, p-value = 1
## alternative hypothesis: true difference in means is greater than 0
## 90 percent confidence interval:
## -52.17103 NA
## sample estimates:
## mean of x mean of y
## 349.3647 392.0462
##
## Two Sample t-test
##
## data: PimaIndiansDiabetes2$insulin and PimaIndiansDiabetes2$mass
## t = 28.432, df = 1149, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 114.5965 131.5851
## sample estimates:
## mean of x mean of y
## 155.54822 32.45746
#b. Realice una prueba de hipotesis para decidir que varianza, para las
variables mass
#y triceps, es menor. Sugerencia: pruebe si el el cociente entre las
varianzas es menor
#que 1.
## [1] 35.9374
desvnvm=sd(data2$mass)
mediavm=mean(data2$mass)
CVt <- desvnvm / mediavm * 100
CVt
## [1] 21.33558
var.test(data1$triceps,data2$mass,alternative="greater",mu=0,var.equal=TRUE,c
onf.level=0.95)
##
## F test to compare two variances
##
## data: data1$triceps and data2$mass
## F = 2.2889, num df = 540, denom df = 756, p-value < 2.2e-16
## alternative hypothesis: true ratio of variances is greater than 1
## 95 percent confidence interval:
## 2.009234 Inf
## sample estimates:
## ratio of variances
## 2.28894
#se rechaza la hipotesis nula, los cocientes son mayores a 1
#a) Con los datos mostrados en la tabla construya un data frame llamado
programa
clientes <- c(1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)
antes <-c(56.00,95.00,78.00,67.00,59.00,81.00,60.00,56.00,
70.00,78.00,84.00,71.00,90.00,101.00,54.00,60.00)
despues <- c(52.00,91.00,77.00,65.00,54.00,78.00,54.00,55.00,
65.00,76.00,82.00,66.00,88.00,94.00,53.00,55.00)
programa<-data.frame(clientes,antes,despues)
#b. Realice la prueba adecuada para probar la hipotesis de que hay una
perdida de
#peso de por lo menos 3 kg. suponiendo que los pesos de la poblacion se
distribuyen
#normalmente
#H0: U1-U2 => 3
#H1: U1-U2 < 3
t.test(programa$antes,programa$despues,alternative="greater",mu=3,var.equal=T
RUE,conf.level=0.95)
##
## Two Sample t-test
##
## data: programa$antes and programa$despues
## t = 0.083612, df = 30, p-value = 0.467
## alternative hypothesis: true difference in means is greater than 3
## 95 percent confidence interval:
## -5.443393 Inf
## sample estimates:
## mean of x mean of y
## 72.5000 69.0625
#8. En el cuadro 2 estan los datos de los salarios anuales de 144 personas
pertenecientes a
#dos grupos de ocupacion distintos. En el grupo 1 se encuentran trabajadores
de medios
#de comunicacion y empresas de marketing, mientras que en el grupo 2 se
encuentran trabajadores
#dedicados a educacion. Los datos ademas pueden ser encontrados en el archivo
sueldos.xlsx. Se pide:
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...6
## * `` -> ...7
## * ...
#b) Pruebe la hipotesis que las medias poblacionales de los dos grupos
#son iguales. Previamente, verifique la igualdad de las varianzas.
#H0: u1 es igual a u2
#H1: u1 es diferente a u2
var.test(grupoa,grupob)
##
## F test to compare two variances
##
## data: grupoa and grupob
## F = 0.81307, num df = 71, denom df = 71, p-value = 0.3854
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5086951 1.2995592
## sample estimates:
## ratio of variances
## 0.8130679