FACULTAD DE CIENCIAS
TRABAJO FIN DE GRADO
Grado en Estadística
Desarrollo de una aplicación web de Análisis Clúster robusto.
Autor:
Federico Pérez Rosado
Tutor:
... 4 ... 6 ... 6 ... 6 ... 7 ... 8 ... 10 ... 11 ... 11 ... 11 ... 14 ... 14 ... 14 ... 16 ... 18 ... 18 ... 18 ... 19 ... 24 ... 27 ... 27 ... 27 ... 30 ... 32 ... 36 ... 36 ... 37 ... 47 ... 49 ... 51
-• •
𝑋 = {0, 56, 61, 57, 58.5}
𝑋̅ = 46.5
𝑋 = {0, 56, 61, 57, 58.5}
𝑋 = {0, 56, 61, 57, 58.5} 𝑄1, 𝑄2 𝑄3 𝑅𝐼𝐶 𝑄1= 52.25 𝑅𝐼𝐶 = (𝑄3− 𝑄1) = 58.5 − 56 = 2.5 𝑄2 = 𝑋̃ = 57 𝑖𝑛𝑓 = (𝑄1− 1.5 ∗ 𝑅𝐼𝐶) = 52.25 𝑄3 = 58.5 𝑠𝑢𝑝 = (𝑄3+ 1.5 ∗ 𝑅𝐼𝐶) = 58.25 𝑋̅ = 58.125
𝛼 𝑋 = {0, 56, 61, 57, 58.5} 𝛼 = 0.4 𝛼 = 0.4 𝛼 ∗ 𝑛 = 0.4 ∗ 5 = 2 𝑋̅𝑟𝑒𝑐 = 57.17 𝑋 𝛼 = 0.4 𝛼 = 0 ℝ
𝑘 𝑋 = { 𝒙1, 𝒙2, … , 𝒙𝑛} 𝑘 - 𝑘 - 𝑛 𝑘 - {𝒎1, 𝒎2, … , 𝒎𝑘} ⊂ ℝ𝑑 𝑘 𝑘 𝑑𝑖 = 𝑗=1,…𝑘min ‖𝑥𝑖− 𝑚𝑗‖ , 𝑖 = 1, … 𝑛.
-𝐻𝑗 = { 𝑖: ‖𝑥𝑖− 𝑚𝑗‖ 2 = min 1≤𝑗≤𝑘‖𝑥𝑖− 𝑚𝑗‖ 2 } , 𝑗 = 1, … , 𝑘 𝑋 𝑘 { 𝐻1, … 𝐻𝑘 } 𝑚𝑗 = 1 𝑛𝑗 ∑𝑥𝑖 𝑖∊𝐻𝑗 𝑗 = 1, … , 𝑘 𝑛𝑗 𝐻𝑗 𝑗 = 1, … , 𝑘 ∑ ( 𝑖𝑛𝑓 ‖𝑥𝑖− 𝑚𝑗‖ 1 ≤ 𝑗 ≤ 𝑘 ) 2 𝑛 𝑖=1 ∑ ∑ ‖𝑥𝑖− 𝑚𝑗‖ 2 𝑥𝑗∊𝐻𝑘 𝑘 𝑗=1
𝛼 𝑋 = { 𝒙1, 𝒙2, … , 𝒙𝑛} 𝛼 ∊ (0,1) 𝑘 - 𝑘 - {𝒎1, 𝒎2, … , 𝒎𝑘} ⊂ ℝ𝑑 𝑘 𝑘 𝑑𝑖 = 𝑗=1,…𝑘min ‖𝑥𝑖− 𝑚𝑗‖ , 𝑖 = 1, … 𝑛. - (𝑛 ∗ 𝛼) 𝑑𝑖 𝐻0 𝑑𝑖 ≥ 𝑑([𝑛(1−𝛼)]) 𝐻0 = { 𝑖: 𝑑𝑖 ≥ 𝑑([𝑛(1−𝛼)])}, 𝑖 = 1, … 𝑛. -𝛼
𝐻0 𝐻𝑗 = { 𝑖: ‖𝑥𝑖− 𝑚𝑗‖ 2 = min 1≤𝑗≤𝑘‖𝑥𝑖− 𝑚𝑗‖ 2 𝑦 𝑖 ∉ 𝐻0 } 𝐻 𝑘 { 𝐻1, … 𝐻𝑘 } 𝐻0 𝑚𝑗= 1 𝑛𝑗 ∑𝑥𝑖 𝑖∊𝐻𝑗 , 𝑗 = 1, … 𝑘 1 [𝑛(1 − 𝛼)]∑ ( 𝑖𝑛𝑓 ‖𝑥𝑖− 𝑚𝑗‖ 1 ≤ 𝑗 ≤ 𝑘 ) 2 𝑖∉𝐻0 1 [𝑛(1 − 𝛼)]∑ ∑ ‖𝑥𝑖− 𝑚𝑗‖ 2 𝑥𝑗∊𝐻𝑘 𝑘 𝑗=1 𝐻0
𝑁2(( 15 10) , ( 1 0 0 1)) 𝑁2(( 20 15) , ( 1 0 0 1)) 𝑁2(( 0 0) , ( 1 0 0 0))
import math as math
install numpy
import numpy as np
install matplotlib
•
•
•
• • • • • • 𝑁2(( 0 5) , ( 1 0 0 1)) 𝑁2(( 5 0) , ( 1 0 0 1)) 𝑁2(( 2.5 2.5) , ( 50 0 0 50))
•
•
virtualenv temp
•
“C.\Users\...\temp\Scripts\activate.bat”
trimm <- function(X,K,alpha){ … } • • •
• • •
𝛼 𝑁2(( 20 15) , ( 1 0 0 1)) 𝑁2(( 30 25) , ( 1 0 0 1)) 𝑁2(( 20 20) , ( 50 0 0 50))
𝛼 𝑁2(( 20 15) , ( 1 0 0 1)) 𝑁2(( 30 25) , ( 1 0 0 1)) 𝑁2(( 25 20) , ( 50 0 0 50))
•
•
https://www.datasciencecentral.com/profiles/blogs/r-vs-python-meta-review-on-usability-popularity-pros-amp-cons https://www.ntu.ac.uk/about-us/news/news-articles/2015/11/people-check-their-smartphones-85-times-a-day-and-they-dont-even-know-theyre-doing-it https://d3js.org/ https://medium.com/@data_driven/python-vs-r-for-data-science-and-the-winner-is-3ebb1a968197 https://luca-d3.com/es/diccionario-tecnologico/index.html http://diccionarioempresarial.wolterskluwer.es/Content/Documento.aspx?params=H4sIAAAA AAAEAMtMSbF1jTAAASNjM2MztbLUouLM_DxbIwMDS0NDA1OQQGZapUt-ckhlQaptWmJOcSoAvvhbpzUAAAA=WKE http://bl.ocks.org/peterssonjonas/4a0e7cb8d23231243e0e https://jarroba.com/seleccion-del-numero-optimo-clusters/ https://docs.python.org/3.7/ https://docs.djangoproject.com/es/2.1/
https://developer.mozilla.org/es/docs/Web/JavaScript