%MACRO partial_lift (tables, varScore, varTarget, event=1, maxPct=30) ; /* version 2018.06 */ %LET nGroups = 100 ; DATA work.__data (KEEP=__ds __y &varScore __alea) ; SET &tables INDSNAME=origine NOBS=__n ; LENGTH __ds $ 32 ; __ds = SCAN(origine,2,".") ; __y = (VVALUE(&varTarget)="&event") ; __alea = RANUNI(1) ; IF NOT MISSING(&varScore) AND NOT MISSING(&varTarget) ; RUN ; PROC SQL UNDO_POLICY=NONE ; CREATE TABLE work.__data AS SELECT *, COUNT(*) AS __nTot FROM work.__data GROUP BY __ds ORDER BY __ds, &varScore DESC, __alea ; QUIT ; PROC SQL NOPRINT ; SELECT MEAN(__y) INTO : p FROM work.__data GROUP BY __ds ; QUIT ; DATA work.__data ; SET work.__data ; BY __ds ; RETAIN __rank ; IF FIRST.__ds THEN __rank = 0 ; __rank = __rank + 1 ; __centiles = MIN(INT(__rank/(__nTot/&nGroups)), &nGroups-1) ; RUN ; %IF %SYSEVALF(&p*100-&maxPct)<0 %THEN %LET maxPct = %SYSFUNC(ROUND(&p*100)) ; ODS EXCLUDE ALL ; ODS NORESULTS ; PROC FREQ DATA=work.__data (WHERE=(__y=1)) ; TABLE __centiles ; ODS OUTPUT oneWayFreqs = work.__lift (KEEP=__ds __centiles cumPercent) ; BY __ds NOTSORTED ; RUN ; DATA work.__somers ; SET work.__lift (WHERE=(__centiles < &maxPct)) ; BY __ds NOTSORTED ; RETAIN aire ; IF FIRST.__ds THEN aire = 0 ; IF LAST.__ds THEN cumPercent = cumPercent/2 ; aire = aire + cumPercent/10000 ; IF LAST.__ds THEN DO ; x = SYMGET("maxPct")/100 ; p = SYMGET("p")+0 ; aire = aire - x**2/2 ; triangle = (x**2)/(2*p) - x**2/2 ; gini = aire / triangle ; OUTPUT ; END ; RUN ; DATA work.__somers ; SET work.__somers ; start = __ds ; label = CATX(" ",__ds, CATS("(",PUT(gini,NUMX5.3),")")) ; fmtname = "DS" ; type = "C" ; RUN ; PROC FORMAT CNTLIN=work.__somers ; RUN ; ODS SELECT ALL ; ODS RESULTS ; PROC TEMPLATE ; DEFINE STATGRAPH lift / STORE=work.modeles ; NMVAR p ; BEGINGRAPH ; LAYOUT OVERLAY / XAXISOPTS=(LABEL=%SYSFUNC(IFC(&sysscp=WIN,"Pop° par score décroissant","Population par score decroissant")) LINEAROPTS=(TICKVALUEFORMAT=NLPCT10.1 VIEWMIN=0)) YAXISOPTS=(LABEL=%SYSFUNC(IFC(&sysscp=WIN,"Fraction des évènements","Fraction des evenements")) LINEAROPTS=(TICKVALUEFORMAT=NLPCT10.1 VIEWMIN=0)) ; LINEPARM X=0 Y=0 SLOPE=1 / LEGENDLABEL="Constant" NAME="c" LINEATTRS=(COLOR=GRAYC0) ; LINEPARM X=0 Y=0 SLOPE=EVAL(1/p) / LEGENDLABEL="Parfait" NAME="p" LINEATTRS=(COLOR=GRAYC0 THICKNESS=2) ; SERIESPLOT X=EVAL((__centiles+1)/100) Y=EVAL(cumPercent/100) / GROUP=__ds NAME="l" ; DISCRETELEGEND "c" "p" "l" / LOCATION=INSIDE AUTOALIGN=(BOTTOMRIGHT) ACROSS=1 BORDER=FALSE TITLE=%SYSFUNC(IFC(&sysscp=WIN,"Modèles","Modeles")) ; ENDLAYOUT ; ENTRYTITLE "Courbe de lift partielle (" {UNICODE "2264"x} " &maxPct %)" ; ENDGRAPH ; END ; RUN ; ODS PATH (PREPEND) work.modeles ; PROC SQL NOPRINT ; CREATE TABLE work.__0 AS SELECT DISTINCT __ds, -1 AS __centiles, 0 AS cumPercent FROM work.__lift ; ; QUIT ; PROC APPEND BASE=work.__lift DATA=work.__0 FORCE ; RUN ; PROC SORT DATA=work.__lift ; BY __ds __centiles ; RUN ; PROC SGRENDER DATA=work.__lift (WHERE=(__centiles < &maxPct)) TEMPLATE=lift ; FORMAT __ds $ds. ; RUN ; /* PROC DATASETS LIB=work NOLIST ;*/ /* DELETE __0 __data __lift __somers ;*/ /* RUN ; QUIT ;*/ PROC CATALOG CAT=work.formats ; DELETE ds / ENTRYTYPE=FORMATC ; RUN ; QUIT ; %MEND partial_lift ;