• No se han encontrado resultados

Código SAS para selección de variables selección de variables

In document FACULTAD DE ESTUDIOS ESTADÍSTICOS (página 62-117)

libname datos 'C:\Users\cris\Documents\TFM';

/*********************************************************VARIABLES ORIGINALES********************************************************/

proc print data=datos.variables_originales_train;run;

data=datos.variables_originales_train;run; /*STEPWISE*/

%macro

randomselect(data=,listclass=,vardepen=,modelo=,criterio=,sinicio=,sfi nal=,fracciontrain=,directorio=&directorio);

options nocenter linesize=256;

proc printto print="&directorio\kk.txt";run;

data _null_;file "&directorio\cosa.txt" linesize=2000;run;

%do semilla=&sinicio %to &sfinal;

proc surveyselect data=&data rate=&fracciontrain out=sal1234 seed=&semilla;run;

ods output SelectionSummary=modelos; ods output SelectedEffects=efectos;

ods output Glmselect.SelectedModel.FitStatistics=ajuste; proc glmselect data=sal1234 plots=all seed=&semilla;

class &listclass;

model &vardepen= &modelo/ selection=stepwise(select=&criterio choose=&criterio) details=all stats=all;

run;

ods graphics off; ods html close;

data union;i=5;set efectos;set ajuste point=i;run;

data _null_;semilla=&semilla;file "&directorio\cosa.txt" mod linesize=2000;set union;put effects ;run;

%end;

proc printto ;run; data todos;

infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;

input efecto @@;

if efecto ne 'Intercept' then output; run;

proc freq data=todos;tables efecto /out=sal;run; proc sort data=sal;by descending count;

proc print data=sal;run; data todos;

infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;

input efecto $ &&; run;

proc freq data=todos;tables efecto /out=salefec;run; proc sort data=salefec;by descending count;

proc print data=salefec;run;

data _null_;set salefec;put efecto;run; %mend;

57

%randomselect(data=datos.variables_originales_train,

listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,

modelo=REP_disc_number_track album_type explicit_track

acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track

loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track

time_signature_mean_track time_signature_min_track

time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,

criterio=AIC, sinicio=12345, sfinal=12445,

fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;

proc copy inlib=work outlib=datos ;

select salefec; run;

%randomselect(data=datos.variables_originales_train,

listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,

modelo=REP_disc_number_track album_type explicit_track

acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track

loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track

time_signature_mean_track time_signature_min_track

time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,

criterio=BIC, sinicio=12345, sfinal=12445,

fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;

proc copy inlib=work outlib=datos ;

select salefec; run;

%randomselect(data=datos.variables_originales_train,

listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,

modelo=REP_disc_number_track album_type explicit_track

acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track

loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track

time_signature_mean_track time_signature_min_track

time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,

criterio=SBC, sinicio=12345,

58 sfinal=12445,

fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;

proc copy inlib=work outlib=datos ;

select salefec; run; /*BACKWARD*/ %macro randomselect(data=,listclass=,vardepen=,modelo=,criterio=,sinicio=,sfi nal=,fracciontrain=,directorio=&directorio);

options nocenter linesize=256;

proc printto print="&directorio\kk.txt";run;

data _null_;file "&directorio\cosa.txt" linesize=2000;run;

%do semilla=&sinicio %to &sfinal;

proc surveyselect data=&data rate=&fracciontrain out=sal1234 seed=&semilla;run;

ods output SelectionSummary=modelos; ods output SelectedEffects=efectos;

ods output Glmselect.SelectedModel.FitStatistics=ajuste; proc glmselect data=sal1234 plots=all seed=&semilla;

class &listclass;

model &vardepen= &modelo/ selection=backward(select=&criterio choose=&criterio) details=all stats=all;

run;

ods graphics off; ods html close;

data union;i=5;set efectos;set ajuste point=i;run;

data _null_;semilla=&semilla;file "&directorio\cosa.txt" mod linesize=2000;set union;put effects ;run;

%end;

proc printto ;run; data todos;

infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;

input efecto @@;

if efecto ne 'Intercept' then output; run;

proc freq data=todos;tables efecto /out=sal;run; proc sort data=sal;by descending count;

proc print data=sal;run; data todos;

infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;

input efecto $ &&; run;

proc freq data=todos;tables efecto /out=salefec;run; proc sort data=salefec;by descending count;

proc print data=salefec;run;

data _null_;set salefec;put efecto;run; %mend;

%randomselect(data=datos.variables_originales_train,

listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,

modelo=REP_disc_number_track album_type explicit_track

acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track

59

loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track

time_signature_mean_track time_signature_min_track

time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,

criterio=AIC, sinicio=12345, sfinal=12445,

fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;

proc copy inlib=work outlib=datos ;

select salefec; run;

%randomselect(data=datos.variables_originales_train,

listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,

modelo=REP_disc_number_track album_type explicit_track

acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track

loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track

time_signature_mean_track time_signature_min_track

time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,

criterio=BIC, sinicio=12345, sfinal=12445,

fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;

proc copy inlib=work outlib=datos ;

select salefec; run;

%randomselect(data=datos.variables_originales_train,

listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,

modelo=REP_disc_number_track album_type explicit_track

acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track

loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track

time_signature_mean_track time_signature_min_track

time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,

criterio=SBC, sinicio=12345, sfinal=12445,

fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;

proc copy inlib=work outlib=datos ;

select salefec; run;

60 %macro

randomselect(data=,listclass=,vardepen=,modelo=,criterio=,sinicio=,sfi nal=,fracciontrain=,directorio=&directorio);

options nocenter linesize=256;

proc printto print="&directorio\kk.txt";run;

data _null_;file "&directorio\cosa.txt" linesize=2000;run;

%do semilla=&sinicio %to &sfinal;

proc surveyselect data=&data rate=&fracciontrain out=sal1234 seed=&semilla;run;

ods output SelectionSummary=modelos; ods output SelectedEffects=efectos;

ods output Glmselect.SelectedModel.FitStatistics=ajuste; proc glmselect data=sal1234 plots=all seed=&semilla;

class &listclass;

model &vardepen= &modelo/ selection=forward(select=&criterio choose=&criterio) details=all stats=all;

run;

ods graphics off; ods html close;

data union;i=5;set efectos;set ajuste point=i;run;

data _null_;semilla=&semilla;file "&directorio\cosa.txt" mod linesize=2000;set union;put effects ;run;

%end;

proc printto ;run; data todos;

infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;

input efecto @@;

if efecto ne 'Intercept' then output; run;

proc freq data=todos;tables efecto /out=sal;run; proc sort data=sal;by descending count;

proc print data=sal;run; data todos;

infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;

input efecto $ &&; run;

proc freq data=todos;tables efecto /out=salefec;run; proc sort data=salefec;by descending count;

proc print data=salefec;run;

data _null_;set salefec;put efecto;run; %mend;

%randomselect(data=datos.variables_originales_train,

listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,

modelo=REP_disc_number_track album_type explicit_track

acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track

loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track

time_signature_mean_track time_signature_min_track

time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,

criterio=AIC, sinicio=12345,

61 sfinal=12445,

fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;

proc copy inlib=work outlib=datos ;

select salefec; run;

%randomselect(data=datos.variables_originales_train,

listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,

modelo=REP_disc_number_track album_type explicit_track

acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track

loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track

time_signature_mean_track time_signature_min_track

time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,

criterio=BIC, sinicio=12345, sfinal=12445,

fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;

proc copy inlib=work outlib=datos ;

select salefec; run;

%randomselect(data=datos.variables_originales_train,

listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,

modelo=REP_disc_number_track album_type explicit_track

acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track

loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track

time_signature_mean_track time_signature_min_track

time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,

criterio=SBC, sinicio=12345, sfinal=12445,

fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;

proc copy inlib=work outlib=datos ;

select salefec; run;

/*********************************************************VARIABLES TRANSFORMADAS********************************************************/

proc print data=datos.variables_transformadas_train;run;

data=datos.variables_transformadas_train;run; /*STEPWISE*/

62 %macro

randomselect(data=,listclass=,vardepen=,modelo=,criterio=,sinicio=,sfi nal=,fracciontrain=,directorio=&directorio);

options nocenter linesize=256;

proc printto print="&directorio\kk.txt";run;

data _null_;file "&directorio\cosa.txt" linesize=2000;run;

%do semilla=&sinicio %to &sfinal;

proc surveyselect data=&data rate=&fracciontrain out=sal1234 seed=&semilla;run;

ods output SelectionSummary=modelos; ods output SelectedEffects=efectos;

ods output Glmselect.SelectedModel.FitStatistics=ajuste; proc glmselect data=sal1234 plots=all seed=&semilla;

class &listclass;

model &vardepen= &modelo/ selection=stepwise(select=&criterio choose=&criterio) details=all stats=all;

run;

ods graphics off; ods html close;

data union;i=5;set efectos;set ajuste point=i;run;

data _null_;semilla=&semilla;file "&directorio\cosa.txt" mod linesize=2000;set union;put effects ;run;

%end;

proc printto ;run; data todos;

infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;

input efecto @@;

if efecto ne 'Intercept' then output; run;

proc freq data=todos;tables efecto /out=sal;run; proc sort data=sal;by descending count;

proc print data=sal;run; data todos;

infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;

input efecto $ &&; run;

proc freq data=todos;tables efecto /out=salefec;run; proc sort data=salefec;by descending count;

proc print data=salefec;run;

data _null_;set salefec;put efecto;run; %mend;

%randomselect(data=datos.variables_transformadas_train,

listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,

modelo=REP_disc_number_track album_type explicit_track

EXP_key_min_track EXP_loudness_mean_track EXP_mode_max_track EXP_mode_track LOG_speechiness_track LOG_tempo_mean_track LOG_tempo_min_track LOG_total_followers_artist

LOG_valence_track PWR_danceability_track PWR_key_mean_track PWR_key_track PWR_loudness_max_track PWR_loudness_min_track PWR_loudness_track PWR_tempo_track

PWR_time_signature_max_track PWR_time_signature_min_track

SQRT_energy_track SQRT_instrumentalness_track SQRT_liveness_track SQRT_total_tracks_album SQR_mode_mean_track

SQR_mode_min_track acousticness_track danceability_track

duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track

63

liveness_track loudness_max_track loudness_mean_track

loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track

tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track time_signature_mean_track

time_signature_min_track time_signature_track

total_followers_artist total_tracks_album track_number_track valence_track,

criterio=AIC, sinicio=12345, sfinal=12445,

fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;

proc copy inlib=work outlib=datos ;

select salefec; run;

%randomselect(data=datos.variables_transformadas_train,

listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,

modelo=REP_disc_number_track album_type explicit_track

EXP_key_min_track EXP_loudness_mean_track EXP_mode_max_track EXP_mode_track LOG_speechiness_track LOG_tempo_mean_track LOG_tempo_min_track LOG_total_followers_artist

LOG_valence_track PWR_danceability_track PWR_key_mean_track PWR_key_track PWR_loudness_max_track PWR_loudness_min_track PWR_loudness_track PWR_tempo_track

PWR_time_signature_max_track PWR_time_signature_min_track

SQRT_energy_track SQRT_instrumentalness_track SQRT_liveness_track SQRT_total_tracks_album SQR_mode_mean_track

SQR_mode_min_track acousticness_track danceability_track

duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track

liveness_track loudness_max_track loudness_mean_track

loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track

tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track time_signature_mean_track

time_signature_min_track time_signature_track

total_followers_artist total_tracks_album track_number_track valence_track,

criterio=BIC, sinicio=12345, sfinal=12445,

fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;

proc copy inlib=work outlib=datos ;

select salefec; run;

%randomselect(data=datos.variables_transformadas_train,

listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,

modelo=REP_disc_number_track album_type explicit_track

EXP_key_min_track EXP_loudness_mean_track EXP_mode_max_track EXP_mode_track LOG_speechiness_track LOG_tempo_mean_track LOG_tempo_min_track LOG_total_followers_artist

LOG_valence_track PWR_danceability_track PWR_key_mean_track PWR_key_track PWR_loudness_max_track PWR_loudness_min_track PWR_loudness_track PWR_tempo_track

PWR_time_signature_max_track PWR_time_signature_min_track

SQRT_energy_track SQRT_instrumentalness_track SQRT_liveness_track SQRT_total_tracks_album SQR_mode_mean_track

64

SQR_mode_min_track acousticness_track danceability_track

duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track

liveness_track loudness_max_track loudness_mean_track

loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track

tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track time_signature_mean_track

time_signature_min_track time_signature_track

total_followers_artist total_tracks_album track_number_track valence_track,

criterio=SBC, sinicio=12345, sfinal=12445,

fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;

proc copy inlib=work outlib=datos ;

select salefec; run; /*BACKWARD*/ %macro randomselect(data=,listclass=,vardepen=,modelo=,criterio=,sinicio=,sfi nal=,fracciontrain=,directorio=&directorio);

options nocenter linesize=256;

proc printto print="&directorio\kk.txt";run;

data _null_;file "&directorio\cosa.txt" linesize=2000;run;

%do semilla=&sinicio %to &sfinal;

proc surveyselect data=&data rate=&fracciontrain out=sal1234 seed=&semilla;run;

ods output SelectionSummary=modelos; ods output SelectedEffects=efectos;

ods output Glmselect.SelectedModel.FitStatistics=ajuste; proc glmselect data=sal1234 plots=all seed=&semilla;

class &listclass;

model &vardepen= &modelo/ selection=backward(select=&criterio choose=&criterio) details=all stats=all;

run;

ods graphics off; ods html close;

data union;i=5;set efectos;set ajuste point=i;run;

data _null_;semilla=&semilla;file "&directorio\cosa.txt" mod linesize=2000;set union;put effects ;run;

%end;

proc printto ;run; data todos;

infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;

input efecto @@;

if efecto ne 'Intercept' then output; run;

proc freq data=todos;tables efecto /out=sal;run; proc sort data=sal;by descending count;

proc print data=sal;run; data todos;

infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;

input efecto $ &&; run;

In document FACULTAD DE ESTUDIOS ESTADÍSTICOS (página 62-117)

Documento similar