libname datos 'C:\Users\cris\Documents\TFM';
/*********************************************************VARIABLES ORIGINALES********************************************************/
proc print data=datos.variables_originales_train;run;
data=datos.variables_originales_train;run; /*STEPWISE*/
%macro
randomselect(data=,listclass=,vardepen=,modelo=,criterio=,sinicio=,sfi nal=,fracciontrain=,directorio=&directorio);
options nocenter linesize=256;
proc printto print="&directorio\kk.txt";run;
data _null_;file "&directorio\cosa.txt" linesize=2000;run;
%do semilla=&sinicio %to &sfinal;
proc surveyselect data=&data rate=&fracciontrain out=sal1234 seed=&semilla;run;
ods output SelectionSummary=modelos; ods output SelectedEffects=efectos;
ods output Glmselect.SelectedModel.FitStatistics=ajuste; proc glmselect data=sal1234 plots=all seed=&semilla;
class &listclass;
model &vardepen= &modelo/ selection=stepwise(select=&criterio choose=&criterio) details=all stats=all;
run;
ods graphics off; ods html close;
data union;i=5;set efectos;set ajuste point=i;run;
data _null_;semilla=&semilla;file "&directorio\cosa.txt" mod linesize=2000;set union;put effects ;run;
%end;
proc printto ;run; data todos;
infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;
input efecto @@;
if efecto ne 'Intercept' then output; run;
proc freq data=todos;tables efecto /out=sal;run; proc sort data=sal;by descending count;
proc print data=sal;run; data todos;
infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;
input efecto $ &&; run;
proc freq data=todos;tables efecto /out=salefec;run; proc sort data=salefec;by descending count;
proc print data=salefec;run;
data _null_;set salefec;put efecto;run; %mend;
57
%randomselect(data=datos.variables_originales_train,
listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,
modelo=REP_disc_number_track album_type explicit_track
acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track
loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track
time_signature_mean_track time_signature_min_track
time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,
criterio=AIC, sinicio=12345, sfinal=12445,
fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;
proc copy inlib=work outlib=datos ;
select salefec; run;
%randomselect(data=datos.variables_originales_train,
listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,
modelo=REP_disc_number_track album_type explicit_track
acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track
loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track
time_signature_mean_track time_signature_min_track
time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,
criterio=BIC, sinicio=12345, sfinal=12445,
fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;
proc copy inlib=work outlib=datos ;
select salefec; run;
%randomselect(data=datos.variables_originales_train,
listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,
modelo=REP_disc_number_track album_type explicit_track
acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track
loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track
time_signature_mean_track time_signature_min_track
time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,
criterio=SBC, sinicio=12345,
58 sfinal=12445,
fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;
proc copy inlib=work outlib=datos ;
select salefec; run; /*BACKWARD*/ %macro randomselect(data=,listclass=,vardepen=,modelo=,criterio=,sinicio=,sfi nal=,fracciontrain=,directorio=&directorio);
options nocenter linesize=256;
proc printto print="&directorio\kk.txt";run;
data _null_;file "&directorio\cosa.txt" linesize=2000;run;
%do semilla=&sinicio %to &sfinal;
proc surveyselect data=&data rate=&fracciontrain out=sal1234 seed=&semilla;run;
ods output SelectionSummary=modelos; ods output SelectedEffects=efectos;
ods output Glmselect.SelectedModel.FitStatistics=ajuste; proc glmselect data=sal1234 plots=all seed=&semilla;
class &listclass;
model &vardepen= &modelo/ selection=backward(select=&criterio choose=&criterio) details=all stats=all;
run;
ods graphics off; ods html close;
data union;i=5;set efectos;set ajuste point=i;run;
data _null_;semilla=&semilla;file "&directorio\cosa.txt" mod linesize=2000;set union;put effects ;run;
%end;
proc printto ;run; data todos;
infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;
input efecto @@;
if efecto ne 'Intercept' then output; run;
proc freq data=todos;tables efecto /out=sal;run; proc sort data=sal;by descending count;
proc print data=sal;run; data todos;
infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;
input efecto $ &&; run;
proc freq data=todos;tables efecto /out=salefec;run; proc sort data=salefec;by descending count;
proc print data=salefec;run;
data _null_;set salefec;put efecto;run; %mend;
%randomselect(data=datos.variables_originales_train,
listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,
modelo=REP_disc_number_track album_type explicit_track
acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track
59
loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track
time_signature_mean_track time_signature_min_track
time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,
criterio=AIC, sinicio=12345, sfinal=12445,
fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;
proc copy inlib=work outlib=datos ;
select salefec; run;
%randomselect(data=datos.variables_originales_train,
listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,
modelo=REP_disc_number_track album_type explicit_track
acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track
loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track
time_signature_mean_track time_signature_min_track
time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,
criterio=BIC, sinicio=12345, sfinal=12445,
fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;
proc copy inlib=work outlib=datos ;
select salefec; run;
%randomselect(data=datos.variables_originales_train,
listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,
modelo=REP_disc_number_track album_type explicit_track
acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track
loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track
time_signature_mean_track time_signature_min_track
time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,
criterio=SBC, sinicio=12345, sfinal=12445,
fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;
proc copy inlib=work outlib=datos ;
select salefec; run;
60 %macro
randomselect(data=,listclass=,vardepen=,modelo=,criterio=,sinicio=,sfi nal=,fracciontrain=,directorio=&directorio);
options nocenter linesize=256;
proc printto print="&directorio\kk.txt";run;
data _null_;file "&directorio\cosa.txt" linesize=2000;run;
%do semilla=&sinicio %to &sfinal;
proc surveyselect data=&data rate=&fracciontrain out=sal1234 seed=&semilla;run;
ods output SelectionSummary=modelos; ods output SelectedEffects=efectos;
ods output Glmselect.SelectedModel.FitStatistics=ajuste; proc glmselect data=sal1234 plots=all seed=&semilla;
class &listclass;
model &vardepen= &modelo/ selection=forward(select=&criterio choose=&criterio) details=all stats=all;
run;
ods graphics off; ods html close;
data union;i=5;set efectos;set ajuste point=i;run;
data _null_;semilla=&semilla;file "&directorio\cosa.txt" mod linesize=2000;set union;put effects ;run;
%end;
proc printto ;run; data todos;
infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;
input efecto @@;
if efecto ne 'Intercept' then output; run;
proc freq data=todos;tables efecto /out=sal;run; proc sort data=sal;by descending count;
proc print data=sal;run; data todos;
infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;
input efecto $ &&; run;
proc freq data=todos;tables efecto /out=salefec;run; proc sort data=salefec;by descending count;
proc print data=salefec;run;
data _null_;set salefec;put efecto;run; %mend;
%randomselect(data=datos.variables_originales_train,
listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,
modelo=REP_disc_number_track album_type explicit_track
acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track
loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track
time_signature_mean_track time_signature_min_track
time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,
criterio=AIC, sinicio=12345,
61 sfinal=12445,
fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;
proc copy inlib=work outlib=datos ;
select salefec; run;
%randomselect(data=datos.variables_originales_train,
listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,
modelo=REP_disc_number_track album_type explicit_track
acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track
loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track
time_signature_mean_track time_signature_min_track
time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,
criterio=BIC, sinicio=12345, sfinal=12445,
fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;
proc copy inlib=work outlib=datos ;
select salefec; run;
%randomselect(data=datos.variables_originales_train,
listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,
modelo=REP_disc_number_track album_type explicit_track
acousticness_track danceability_track duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track liveness_track
loudness_max_track loudness_mean_track loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track
time_signature_mean_track time_signature_min_track
time_signature_track total_followers_artist total_tracks_album track_number_track valence_track,
criterio=SBC, sinicio=12345, sfinal=12445,
fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;
proc copy inlib=work outlib=datos ;
select salefec; run;
/*********************************************************VARIABLES TRANSFORMADAS********************************************************/
proc print data=datos.variables_transformadas_train;run;
data=datos.variables_transformadas_train;run; /*STEPWISE*/
62 %macro
randomselect(data=,listclass=,vardepen=,modelo=,criterio=,sinicio=,sfi nal=,fracciontrain=,directorio=&directorio);
options nocenter linesize=256;
proc printto print="&directorio\kk.txt";run;
data _null_;file "&directorio\cosa.txt" linesize=2000;run;
%do semilla=&sinicio %to &sfinal;
proc surveyselect data=&data rate=&fracciontrain out=sal1234 seed=&semilla;run;
ods output SelectionSummary=modelos; ods output SelectedEffects=efectos;
ods output Glmselect.SelectedModel.FitStatistics=ajuste; proc glmselect data=sal1234 plots=all seed=&semilla;
class &listclass;
model &vardepen= &modelo/ selection=stepwise(select=&criterio choose=&criterio) details=all stats=all;
run;
ods graphics off; ods html close;
data union;i=5;set efectos;set ajuste point=i;run;
data _null_;semilla=&semilla;file "&directorio\cosa.txt" mod linesize=2000;set union;put effects ;run;
%end;
proc printto ;run; data todos;
infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;
input efecto @@;
if efecto ne 'Intercept' then output; run;
proc freq data=todos;tables efecto /out=sal;run; proc sort data=sal;by descending count;
proc print data=sal;run; data todos;
infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;
input efecto $ &&; run;
proc freq data=todos;tables efecto /out=salefec;run; proc sort data=salefec;by descending count;
proc print data=salefec;run;
data _null_;set salefec;put efecto;run; %mend;
%randomselect(data=datos.variables_transformadas_train,
listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,
modelo=REP_disc_number_track album_type explicit_track
EXP_key_min_track EXP_loudness_mean_track EXP_mode_max_track EXP_mode_track LOG_speechiness_track LOG_tempo_mean_track LOG_tempo_min_track LOG_total_followers_artist
LOG_valence_track PWR_danceability_track PWR_key_mean_track PWR_key_track PWR_loudness_max_track PWR_loudness_min_track PWR_loudness_track PWR_tempo_track
PWR_time_signature_max_track PWR_time_signature_min_track
SQRT_energy_track SQRT_instrumentalness_track SQRT_liveness_track SQRT_total_tracks_album SQR_mode_mean_track
SQR_mode_min_track acousticness_track danceability_track
duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track
63
liveness_track loudness_max_track loudness_mean_track
loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track
tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track time_signature_mean_track
time_signature_min_track time_signature_track
total_followers_artist total_tracks_album track_number_track valence_track,
criterio=AIC, sinicio=12345, sfinal=12445,
fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;
proc copy inlib=work outlib=datos ;
select salefec; run;
%randomselect(data=datos.variables_transformadas_train,
listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,
modelo=REP_disc_number_track album_type explicit_track
EXP_key_min_track EXP_loudness_mean_track EXP_mode_max_track EXP_mode_track LOG_speechiness_track LOG_tempo_mean_track LOG_tempo_min_track LOG_total_followers_artist
LOG_valence_track PWR_danceability_track PWR_key_mean_track PWR_key_track PWR_loudness_max_track PWR_loudness_min_track PWR_loudness_track PWR_tempo_track
PWR_time_signature_max_track PWR_time_signature_min_track
SQRT_energy_track SQRT_instrumentalness_track SQRT_liveness_track SQRT_total_tracks_album SQR_mode_mean_track
SQR_mode_min_track acousticness_track danceability_track
duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track
liveness_track loudness_max_track loudness_mean_track
loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track
tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track time_signature_mean_track
time_signature_min_track time_signature_track
total_followers_artist total_tracks_album track_number_track valence_track,
criterio=BIC, sinicio=12345, sfinal=12445,
fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;
proc copy inlib=work outlib=datos ;
select salefec; run;
%randomselect(data=datos.variables_transformadas_train,
listclass=REP_disc_number_track album_type explicit_track, vardepen=popularity_track,
modelo=REP_disc_number_track album_type explicit_track
EXP_key_min_track EXP_loudness_mean_track EXP_mode_max_track EXP_mode_track LOG_speechiness_track LOG_tempo_mean_track LOG_tempo_min_track LOG_total_followers_artist
LOG_valence_track PWR_danceability_track PWR_key_mean_track PWR_key_track PWR_loudness_max_track PWR_loudness_min_track PWR_loudness_track PWR_tempo_track
PWR_time_signature_max_track PWR_time_signature_min_track
SQRT_energy_track SQRT_instrumentalness_track SQRT_liveness_track SQRT_total_tracks_album SQR_mode_mean_track
64
SQR_mode_min_track acousticness_track danceability_track
duration_ms_track energy_track instrumentalness_track key_max_track key_mean_track key_min_track key_track
liveness_track loudness_max_track loudness_mean_track
loudness_min_track loudness_track mode_max_track mode_mean_track mode_min_track mode_track speechiness_track
tempo_max_track tempo_mean_track tempo_min_track tempo_track time_signature_max_track time_signature_mean_track
time_signature_min_track time_signature_track
total_followers_artist total_tracks_album track_number_track valence_track,
criterio=SBC, sinicio=12345, sfinal=12445,
fracciontrain=0.8, directorio=C:\Users\cris\Documents\TFM); proc print data="hola";run;
proc copy inlib=work outlib=datos ;
select salefec; run; /*BACKWARD*/ %macro randomselect(data=,listclass=,vardepen=,modelo=,criterio=,sinicio=,sfi nal=,fracciontrain=,directorio=&directorio);
options nocenter linesize=256;
proc printto print="&directorio\kk.txt";run;
data _null_;file "&directorio\cosa.txt" linesize=2000;run;
%do semilla=&sinicio %to &sfinal;
proc surveyselect data=&data rate=&fracciontrain out=sal1234 seed=&semilla;run;
ods output SelectionSummary=modelos; ods output SelectedEffects=efectos;
ods output Glmselect.SelectedModel.FitStatistics=ajuste; proc glmselect data=sal1234 plots=all seed=&semilla;
class &listclass;
model &vardepen= &modelo/ selection=backward(select=&criterio choose=&criterio) details=all stats=all;
run;
ods graphics off; ods html close;
data union;i=5;set efectos;set ajuste point=i;run;
data _null_;semilla=&semilla;file "&directorio\cosa.txt" mod linesize=2000;set union;put effects ;run;
%end;
proc printto ;run; data todos;
infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;
input efecto @@;
if efecto ne 'Intercept' then output; run;
proc freq data=todos;tables efecto /out=sal;run; proc sort data=sal;by descending count;
proc print data=sal;run; data todos;
infile "&directorio\cosa.txt" linesize=2000; length efecto $ 1000;
input efecto $ &&; run;