Stata The Stata listserver
[Date Prev][Date Next][Thread Prev][Thread Next][Date index][Thread index]

st: the xi: command --- the ugly hack


From   Michael Ash <[email protected]>
To   [email protected]
Subject   st: the xi: command --- the ugly hack
Date   Fri, 05 Jul 2002 10:44:50 -0400 (EDT)

> I am using the xi: command to construct dummy variables
> from categorical variables. My problem is that I would
> like the xi-command to not remove one of the dummies...

> I just want to add that I've run into this problem too,
> and I too consider it a (rare) limitation.  More than
> once I've wanted to run a regression with explicit
> dummies for each category in combination with the
> -nocons- option, and without an ugly hack not worth
> repeating here (Uli's suggestion is *much* nicer) it
> isn't possible with -xi-'s dummies.

I agree that the "tab, gen()" solution is much nicer, but
here is the "ugly" hack.  The following command "xa" works
exactly like "xi" except it does not omit any category.  I
believe that it does not interfere with the normal operation
of xi. Comments welcome.

-Michael Ash




------xa.ado-----CUT BELOW THIS LINE-------
*! version 1.1.0   10/19/94
program define xa
	version 4.0
	parse "`*'", parse(" ,:()")
	if "`1'"==":" {
		mac shift
		local i 2
		local xeq "yes"
	}
	else	local i 1

	capture drop I*		/* (crude but necessary) */
	global X__in
	global X__out
	global X__cont
	while "``i''"!="" {
		if upper(substr("``i''",1,2))=="I." {
			if index("``i''","*") { xa_eii ``i'' "*" }
			else if index("``i''","|") { xa_eii ``i'' "|" }
			else	xa_ei ``i''
			local `i' "$S_1"
			if "``i''"=="." { local `i' " " }
		}
		local i=`i'+1
	}
	global X__in
	global X__out
	global X__cont
	if "`xeq'"=="yes" {
		`*'
	}
end

program define xa_ei /* I.<name> */
	version 4.0
	local orig "`1'"

	tempvar g on
	local vn = substr("`orig'",3,.)
	_crcunab `vn'
	local vn "$S_1"

	if "$X__in" != "" {
		parse "$X__in", parse(" ")
		local i 1 
		while "``i''"!="" { 
			if "``i''"=="`vn'" {
				global S_1 : word `i' of $X__out
				exit
			}
			local i=`i'+1
		}
	}

	qui egen `g' = group(`vn')
	qui summ `g'
	local ng = _result(6)
	local lowcode 1
	local topcode `ng'
	local useuser 0
	cap confirm string var `vn'
	if _rc {
		local isnumb "yes"
		cap assert `vn'==int(`vn') & `vn'<100 & `vn'>=0 if `vn'!=.
		if _rc==0 { 
			qui summ `vn'
			local lowcode = _result(5)
			local topcode = _result(6)
			local useuser 1
		}
	}
	xa_mkun `vn' `topcode'
	local svn "$S_1"

				/* user char vn[omit] containing <value> */
	local omis : char `vn'[omit]
	if "`omis'" != "" {
		tempvar umark
		if "`isnumb'"=="yes" {
			capture confirm number `omis'
			if _rc { 
				di in red /*
			*/" characteristic `vn'[omit] (`omis') invalid;" /*
			*/ _n "variable `vn' is numeric"
				exit 198
			}
			gen byte `umark'= float(`vn')==float(`omis')
		}
		else	gen byte `umark'= `vn'=="`omis'"
		capture assert `umark'==0
		if _rc==0 {
			di in gr "(characteristic `vn'[omit]: `omis'" _n /*
		*/ "yet variable `vn' never equals `omis'; characteristic ignored)"
			local umark
		}
	}

				/* code for dropping first category */
	local xamode : char _dta[omit]
	if "`umark'"=="" & "`xamode'"=="" {
		tempvar umark
		qui gen byte `umark'=(`g'==1)
	}



	local max 0 
	local jmax 0
	local j 1
	qui gen long `on'=. 
	while `j'<=`ng' {
				/* obtain value */
		qui replace `on'=cond(`g'==`j',_n,`on'[_n-1])
		local value = `vn'[`on'[_N]]

		if `useuser' { local k `value' }
		else	local k `j'
		qui gen byte `svn'`k' = `g'==`j' if `g'!=.

		label var `svn'`k' "`vn'==`value'"
		if "`umark'"=="" {
			qui count if `g'==`j'
			if _result(1)>`max' { 
				local max = _result(1)
				local jmax `k'
				local dval "`value'"
			}
		}
		else {
			capture assert `umark' if `g'==`j'
			if _rc==0 {
				local jmax `k'
				local dval "`value'"
			}
		}
		local j=`j'+1
	}
	if `useuser' {
		di in gr "`orig'" /*
			*/ _col(23) "`svn'`lowcode'-`topcode'" /*
			*/ _col(36) "(naturally coded)"
	}
	else	di in gr "`orig'" /*
			*/ _col(23) "`svn'`lowcode'-`topcode'" /*
			*/ _col(36) ""

	*drop `svn'`jmax'
	capture list `svn'* in 1
	if _rc {
		global S_1 "."
	}
	else	global S_1 "`svn'*"
	global X__in "$X__in `vn'"
	global X__out "$X__out $S_1"
end

program define xa_eic
	version 4.0
	local orig "`1'"	/* of form i.<varname>*<varname> */
	local ichar "`2'"

	local lstar = index("`orig'","`ichar'")
	local part1 = substr("`orig'",1,`lstar'-1)
	local part2 = substr("`orig'",`lstar'+1,.)
	_crcunab `part2'
	local part2 "$S_1"
	local type : type `part2'

	xa_ei `part1'
	local res1 "$S_1"

	if "`res1'"=="." {
		di in gr "`orig'" _col(36) "(requires no interaction terms)"
		xa_eicu `part2'
		exit
	}

	_crcunab `res1'
	local uab1 "$S_1"
	parse "`uab1'", parse(" ")
	local len1 0
	while "`1'"!="" {
		if length("`1'")>`len1' {
			local len1 = length("`1'")
		}
		mac shift
	}
	local len1 = `len1'-length("`res1'") + 1
	local c1 = substr("`res1'",2,1)
	local c2 = substr("`part2'",1,4-`len1')
	local stub "I`c1'X`c2'_"
	
	xa_mkun2 `stub'
	local stub "$S_1"

	parse "`uab1'", parse(" ")
	local i 1
	while "``i''"!="" {
		local num1 = substr("``i''",length("`res1'"),.)
		local lbl1 : variable label ``i''
		qui gen `type' `stub'`num1' = ``i''*`part2'
		label var `stub'`num1' "(`lbl1')*`part2'"
		local i=`i'+1
	}
	xa_eicu `part2'
	if "`ichar'"=="*" {
		global S_1 "`res1' ${S_1}`stub'*"
	}
	else	global S_1 "${S_1}`stub'*"
	di in gr "`orig'" _col(23) "`stub'#" /*
		*/ _col(36) "(coded as above)"
end

program define xa_eicu	/* <contvar_name> */
	version 4.0
	local vn "`1'"
	global S_1
	if "$X__cont" != "" {
		parse "$X__cont", parse(" ")
		local i 1
		while "``i''"!="" {
			if "``i''"=="`vn'" { exit }
			local i=`i'+1
		}
	}
	global S_1 "`vn' "		/* sic */
	global X__cont "$X__cont `vn'"
end

program define xa_eii
	version 4.0
	local orig "`1'"
	local ichar "`2'"

	local lstar = index("`orig'","`ichar'")
	local part1 = substr("`orig'",1,`lstar'-1)
	local part2 = substr("`orig'",`lstar'+1,.)
	if upper(substr("`part2'",1,2))!="I." {
		xa_eic `orig' "`ichar'"
		exit
	}
	else if "`ichar'"!="*" { 
		di in red "I.xxx|I.yyy not allowed"
		exit 198
	}
	xa_ei `part1'
	local res1 "$S_1"
	xa_ei `part2'
	local res2 "$S_1"

	if "`res1'"=="." | "`res2'"=="." { 
		di in gr "`orig'" _col(36) "(requires no interaction terms)"
		exit
	}

	_crcunab `res1'
	local uab1 "$S_1"
	parse "`uab1'", parse(" ")
	local len1 0
	while "`1'"!="" {
		if length("`1'")>`len1' {
			local len1 = length("`1'")
		}
		mac shift
	}
	_crcunab `res2'
	local uab2 "$S_1"
	parse "`uab2'", parse(" ")
	local len2 0
	while "`1'"!="" {
		if length("`1'")>`len2' {
			local len2 = length("`1'")
		}
		mac shift
	}

	local len1 = `len1'-length("`res1'") + 1
	local len2 = `len2'-length("`res2'") + 1
	local len = `len1'+`len2'
	local c1 = substr("`res1'",2,1)
	local c2 = substr("`res2'",2,1)
	if `len'==2 {
		local stub "I`c1'X`c2'_"
	}
	else if `len'==3 { 
		local stub "I`c1'`c2'_"
	}
	else if `len'==4 {
		local stub "I`c1'`c2'"
	}
	else {
		di in red "syserr: length |`len'| not right"
		exit 198
	}

	xa_mkun2 `stub'
	local stub "$S_1"

	parse "`uab2'", parse(" ")
	local i 1
	local a : word `i' of `uab1'
	while "`a'"!="" {
		local num1 = substr("`a'",length("`res1'"),.)
		local lbl1 : variable label `a'
		local j 1
		while "``j''"!="" {
			local num2 = substr("``j''",length("`res2'"),.)
			qui gen byte `stub'`num1'_`num2' = `a'*``j''
			local lbl2 : variable label ``j''
			label var `stub'`num1'_`num2' "`lbl1' & `lbl2'"
			local j=`j'+1
		}
		local i=`i'+1
		local a: word `i' of `uab1'
	}
	global S_1 "`res1' `res2' `stub'*"
	di in gr "`orig'" _col(23) "`stub'#-#" /*
		*/ _col(36) "(coded as above)"
end


program define xa_mkun /* meaning make_unique_name <suggested_name> <topcat> */
	version 4.0
	local base "`1'"
	local ng `2'

	local name "I`base'"
	if `ng'<10 {
		local name = substr("`name'",1,6) + "_"
	}
	else if `ng'<100 {
		local name = substr("`name'",1,5) + "_"
	}
	else if `ng'<1000 {
		local name = substr("`name'",1,5)
	}
	else {
		di in red "too many groups for `base'"
		exit 499
	}
	xa_mkun2 `name' `ng'
end

program define xa_mkun2 /* meaning make_unique_name <suggested_name> */
	version 4.0
	local name "`1'"

	local totry "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
	local l 0
	local len = length("`name'")
	capture list `name'* in 1		/* try name out */
	while _rc==0 {
		local l=`l'+1
		local name = substr("`name'",1,`len'-1)+substr("`totry'",`l',1)
		capture list `name'* in 1
	}
	global S_1 "`name'"
end
exit

I.myvar		means dummies for myvar, drop the most frequent
I.myvar*this	means continuous interaction (still drop most frequent)
I.myvar*I.that	means dummy interaction.


I.myvar[what]	means dummies for myvar, drop dummy for myvar==what
I.myvar*thatvar means interaction of myvar and thatvar 
I.myvar[val]*thatvar[val] means drop corresponding.
	
12345678
I123_#x#
I12##x##


For I.name*I.name
We try:
	12345
	IrXr_	e.g., IrXr_#_#   for two 1-digit numbers
	Irr_    e.g., Irr_#_## or Irr_##_# for 1 and 2 digit numbers
	Irr     e.g., Irr##_##   for two 2-digit numbers

For I.name*name
	Irr_#

I.abc*I.def

IrXr_ we try, then shorten to 
Irr_

I12345_#
I1234_##
I1234###
------xa.ado-----CUT ABOVE THIS LINE-------



*
*   For searches and help try:
*   http://www.stata.com/support/faqs/res/findit.html
*   http://www.stata.com/support/statalist/faq
*   http://www.ats.ucla.edu/stat/stata/



© Copyright 1996–2024 StataCorp LLC   |   Terms of use   |   Privacy   |   Contact us   |   What's new   |   Site index