So here's the SAS code I use, in case this is useful.
As Zach said, I lost access to running SAS at home when, in their infinite wisdom, they dropped the "teaching version" (nerfed by limiting to 1500 rows, plenty for home use) entirely and "replaced" it with something that only worked on their data sets. The licence for my home version expired earlier this year, and in their current pricing structure to get SAS for home use would cost $6500 for the first year of an annual licence. It's like they actually WANT me to learn R instead...
So I'm doing these now at work, which involves transferring data from Windows to Linux and results from Linux to Windows to post it. My contract is not indefinite, however, and it is possible I might lose even this access to SAS.
This is unfortunate; as you'll see, a SAS program is so much neater than doing anything in the alpha-release quality mutant abortion Microsoft calls "Excel". Still, since SAS apparently wants everyone under Federal Government size to migrate from using their products, there isn't much choice out there. If I had the time I'd try to do it in R, but at first glance (I don't actually know R) it looks more declarative (like SQL) than procedural and so would be trickier. SAS is just so damned good at this stuff...
Anyway, this is the process:
For each event I right-click on the Indexed Results and export it to an Excel spreadsheet. I delete the header line, then save it as a .csv with the name Event1.csv, Event2.csv, and so on.
The
%let events = <number> line must match the number of events.
The printout of names that comes first in the output is so I can go through and look for misspellings. There aren't many during a season, but enough times Charlie enters as Charles, or Keith Quistorff as Keith Q, and these need to be corrected by editing the .csv file where the discrepancy occurs. Between that list, and going through the class by class to look for obvious aliases, I catch the vast majority of data entry discrepancies.
Code:
%let events = 6;
options nosource ps = max;
%macro read;
%do i = 1 %to &events;
filename in "Event&i..csv" lrecl = 200;
data event_&i;
infile in dsd stopover;
length pos $6 cpos $6 class $8 number $4 driver $30 car $30
raw_c $12 factor $8 diff $8 from_1st $8
lastname $30 firstname $30;
format event 1. raw 7.3 pax 7.3;
input pos cpos class number driver car raw_c factor pax ?? diff from_1st;
event = &i;
if upcase(strip(raw_c)) = 'DNF' then do;
raw = 9999;
pax = 9999;
end;
else raw = input(raw_c, ?? 12.);
i = index(driver, ',');
if i > 0 then do;
lastname = substr(driver, 1, i - 1);
firstname = substr(driver, i + 1);
driver = catx(' ', firstname, lastname);
end;
keep event class driver raw pax;
run;
%end;
data events;
set %do i=1 %to &events; event_&i %end; ;
run;
%mend read;
%read
* Process the data into the final form;
* ;
* DNS and so on, which results in missing time, counts as no points;
* Choose raw or pax time as per class;
data season (drop=pax raw);
set events;
format time 7.3;
class = upcase(class);
driver = propcase(driver);
if class =: 'TIR' or class =: 'NOV' or class =: 'LAD' then do;
time = pax;
class = substr(class, 1, 3);
end;
else if class =: 'OF' then do;
time = pax;
class = 'OF';
end;
else if class =: 'X' and class ^= 'XP' then do;
time = pax;
class = 'X';
end;
else do;
time = raw;
end;
if time ^= .;
run;
proc sort data=season;
by class event time;
run;
data season;
set season;
by class event;
format pos 3. points 3.;
retain pos;
if first.class or first.event
then pos = 1;
else pos + 1;
if time = 9999
then points = 0;
else do;
select (pos);
when (1) do; points = 9; end;
when (2) do; points = 6; end;
when (3) do; points = 4; end;
when (4) do; points = 3; end;
when (5) do; points = 2; end;
otherwise points = 1;
end;
end;
run;
proc sort data=season;
by class driver;
run;
* Resolve the number of events in each class;
proc sort data=season(keep=driver class) out=entrants;
by driver class;
run;
proc freq data=entrants noprint;
by driver;
tables class / out=entrants(keep=driver class count);
run;
proc sort data=entrants;
by driver descending count;
run;
proc sql;
create table count1 as
select A.driver, class, count,
max(count) as maxcount,
events
from entrants as A,
(select driver, sum(count) as events
from entrants
group by driver) as B
where A.driver = B.driver
and class ^= 'NOV'
group by A.driver;
create table counts as
select A.*, B.nummax
from count1 as A,
(select driver, count(*) as nummax
from count1
where count = maxcount
group by driver) as B
where A.driver = B.driver;
select distinct driver, events from counts;
quit;
data eligible;
set counts;
length E $1;
if events >= (%eval(&events + 2))/2 then do;
if count = maxcount then do;
if nummax = 1 then E = '*';
else E = '?';
end;
else E = '-';
end;
keep driver class E;
run;
proc sort data=eligible;
by class driver;
run;
* Tabulate the points;
data table;
set season;
by class driver;
retain pt1-pt8;
select (event);
when (1) do; pt1 = points; pos1 = pos; end;
when (2) do; pt2 = points; pos2 = pos; end;
when (3) do; pt3 = points; pos3 = pos; end;
when (4) do; pt4 = points; pos4 = pos; end;
when (5) do; pt5 = points; pos5 = pos; end;
when (6) do; pt6 = points; pos6 = pos; end;
when (7) do; pt7 = points; pos7 = pos; end;
when (8) do; pt8 = points; pos8 = pos; end;
end;
if last.driver then do;
output;
pt1 = .; pt2 = .; pt3 = .; pt4 = .;
pt5 = .; pt6 = .; pt7 = .; pt8 = .;
end;
keep class driver pt1-pt8;
run;
proc sort data=table;
by class driver;
run;
data adjusted;
merge table eligible;
by class driver;
total6 = 0;
do i = 3 to 8;
p = ordinal(i, of pt1-pt8);
total6 = sum(total6, p);
select (p);
when (9) wins = sum(wins, 1);
when (6) seconds = sum(seconds, 1);
when (4) thirds = sum(thirds, 1);
otherwise;
end;
end;
drop i p;
run;
proc sort data=adjusted;
by class descending total6 descending wins descending seconds descending thirds;
* Assign overall standings;
data adjusted;
set adjusted;
by class;
if first.class then overall = 1;
else overall + 1;
run;
* Resolve ties that should stand;
data adjusted;
set adjusted;
by class;
format pos 3.;
retain pos;
l_total6 = lag(total6);
l_wins = lag(wins);
l_seconds = lag(seconds);
l_thirds = lag(thirds);
if first.class then pos = 1;
else do;
if (total6 ^= l_total6 or wins ^= l_wins
or seconds ^= l_seconds or thirds ^= l_thirds)
then pos = overall;
end;
drop overall;
run;
proc print data=adjusted noobs;
by class;
var pos E driver total6 pt1-pt8 wins seconds thirds;
where class ^= 'NOV';
run;
*---------------------------------------------------------------------------------------------------;
ods listing close;
ods csv file='Standings.csv';
run;
proc print data=adjusted noobs;
by class;
var class pos E driver total6 pt1-pt8 wins seconds thirds;
where class ^= 'NOV';
run;
ods csv close;
ods listing;
run;
*---------------------------------------------------------------------------------------------------;
%PUT ***** Program terminating with SYSCC=&SYSCC *****;
%LET SYSCC=0;
%LET NAMES=;
PROC SQL NOPRINT;
SELECT NAME INTO :NAMES SEPARATED BY ' '
FROM DICTIONARY.MACROS
WHERE (SCOPE='GLOBAL'
AND NAME NET "SYS");
quit;
%SYMDEL &NAMES;
TITLE; * Reset the titles;
LIBNAME _ALL_ CLEAR; * Reset the LIBNAMES;
/*
* Remove the WORK data sets;
PROC DATASETS LIBRARY=WORK NOLIST KILL MEMTYPE=(DATA VIEW);
QUIT;
*/
%PUT ***** Program terminated *****;
options nosource nomprint nomlogic;
RUN;
_________________
Martyn Wheeler
AXing Kit's '05 Mazda 3, #29 HStock
(when
The Gonzo Symphonic allows)