*! version 1.0.0 02/02/93 extrname utility; STB-13: dm13 program define _crcexnb /* fname mname lname odd */ version 3.0 local fname "`1'" local mname "`2'" local lname "`3'" local odd "`4'" tempvar bad quietly { replace `fname'=trim(`fname') replace `mname'=trim(`mname') gen byte `bad' = length(`fname')==2 & /* */ substr(`fname',2,1)!="." & `mname'=="" capture assert `bad'==0 if _rc==0 { exit } replace `bad'=2 if `bad' & index(`fname',"a") replace `bad'=2 if `bad' & index(`fname',"e") replace `bad'=2 if `bad' & index(`fname',"i") replace `bad'=2 if `bad' & index(`fname',"o") replace `bad'=2 if `bad' & index(`fname',"u") replace `bad'=2 if `bad' & index(`fname',"y") capture assert `bad'!=1 /* no consonents only */ if _rc==0 { replace `odd'=cond(`odd',-1,11) if `bad' exit } /* fix the consonents only */ replace `mname'=substr(`fname',2,1) if `bad'==1 replace `fname'=substr(`fname',1,1) if `bad'==1 count if `bad'==1 /* consonent only */ local cons = _result(1) count if `bad'==2 /* contains vowels */ local vc = _result(2) if (`cons'/(`vc'+`cons')>.59) { replace `mname'=substr(`fname',2,1) if `bad'==2 replace `fname'=substr(`fname',1,1) if `bad'==2 replace `odd'=cond(`odd',-1,21) if `bad'==2 } else replace `odd'=cond(`odd',-1,11) if `bad'==2 } end exit /* We are considering here first names of the form "ww", "ab", etc., when the middle name is missing. This could arrise from the original name, say, "WW SMITH". When the two letters are both consonents, we will split them into a first initial and second initial. If one or both of the two letters are consonents, however, we must decide what to do. if letters are randomly distributed and vowles are {a,e,i,o,u,y} the P(vowel)=6/26. P(no vowels in 2 letters) = (1-6/26)^2 = .59171598 Thus, if consonents-only are 59% or more of 2-letter sample, we will also fix the containing-vowel subset. Odd codes are: 11 first name has two letters and middle name is blank. It was interpreted as a name because it contains vowels and, in the set of all appearantly 2-letter names, the consonent-only names were less than 60% of the sample. 21 same as 11, but it was split because more than 59% of two-letter first names contained only consonents, so this is probably a member of the class. */