Bookmark and Share

Notice: On April 23, 2014, Statalist moved from an email list to a forum, based at statalist.org.


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: st: Data set too large for spatwmat


From   Austin Nichols <[email protected]>
To   [email protected]
Subject   Re: st: Data set too large for spatwmat
Date   Tue, 30 Nov 2010 17:41:58 -0500

henrik andersson <[email protected]>:
You are requested to cite the source of user-written software:
-spatwmat- is from STB-60 and avail via -findit-.
The limits look to be due to the use of -svmat- and -mkmat- so one way
forward is to Mata-ize the program.
Try saving the following to a file spwm2.ado (compare to the original
ado file or just look for lines that start mata: to see the main
changes),
and check that you get the same results in the smaller dataset (I
tested this exactly once):

*! -spwm2- 1.0 - 30 Nov 2010 [email protected] posted to Statalist
* based on
* Version 1.0 - 29 January 2001 STB-60 sg162
* -spatwmat- Generates different kinds of spatial weights matrices
* Author: Maurizio Pisati
* Department of Sociology and Social Research
* University of Milano Bicocca (Italy)
* [email protected]
prog spwm2
version 9.2
syntax [using/], Name(string)                           /*
          */     [DROP(numlist min=1 >=0 sort)]         /*
          */     [Xcoord(varname numeric)]              /*
          */     [Ycoord(varname numeric)]              /*
          */     [Band(numlist min=2 max=2 >=0 sort)]   /*
          */     [Friction(real 1)]                     /*
          */     [BINary]                               /*
          */     [Standardize]                          /*
          */     [Eigenval(string)]
confirm name `name'
tempname D W V L R
if "`using'"=="" & ("`xcoord'"=="" | "`ycoord'"=="") {
 di as err "You must specify both x- and y-coordinates using options "
 di as err "{bf:{ul:x}coord({it:varname})} and " _c
 di as err "{bf:{ul:y}coord({it:varname})}"
 exit
}
if "`using'"=="" & "`band'"=="" {
    di as err "You must specify distance band using option " _c
    di as err "{bf:{ul:b}and({it:numlist})}"
    exit
}
local OUTPUT "The following matrix has been created:"
if "`using'"!="" {
 preserve
 qui use `"`using'"', clear
 /* Drop rows and columns if requested */
    if "`drop'"!="" {
        local NDROP : word count `drop'
        unab VLIST : _all
        qui generate RDROP=0
        local i=1
        while `i'<=`NDROP' {
            local D : word `i' of `drop'
            local VAR : word `D' of `VLIST'
         local CDLIST "`CDLIST'`VAR' "
         qui replace RDROP=1 in `D'
            local i=`i'+1
        }
        qui drop `CDLIST'
        qui drop if RDROP
        qui drop RDROP
    }
 /* Check if weights are binary */
    unab VLIST : _all
    local NVAR : word count `VLIST'
    local SUM=0
    local i=1
    while `i'<=`NVAR' {
        local VAR : word `i' of `VLIST'
        qui capture assert `VAR'==0 | `VAR'==1
       if _rc!=0 {
           local SUM=`SUM'+1
       }
        local i=`i'+1
    }
    if `SUM'==0 {
        local binary "binary"
    }
    else {
        local binary ""
    }
 /* Check if each location has at least one neighbor */
    qui egen ROWSUM=rsum(_all)
    qui count if ROWSUM==0
    local NN=r(N)
    qui drop ROWSUM
 /* Create intermediate matrix `W' */
   qui mkmat _all, matrix(`W')
   restore
 /* Check if matrix is square*/
   local NROW=rowsof(`W')
   local NCOL=colsof(`W')
   if `NROW'!=`NCOL' {
   	di as err "Matrix is not square"
   	exit
   }
   local N=`NROW'
 /* Create labels */
   if "`binary'"!="" {
   	local WT "Imported binary weights matrix"
   }
   else {
   	local WT "Imported non-binary weights matrix"
   }
 /* Create final matrix */
   matrix `name'=`W'
}
*********
*  5. Create distance-based weights matrix
*********
if `"`using'"'=="" {
 /* Define distance band */
 local LOWER : word 1 of `band'
 local UPPER : word 2 of `band'
 /* Check appropriateness of coordinate variables */
 capture qui assert `xcoord'!=.
 if _rc!=0 {
  di as err "Variable `xcoord' has missing values"
  exit
  }
 capture qui assert `ycoord'!=.
 if _rc!=0 {
  di as err "Variable `ycoord' has missing values"
  exit
  }
 local N=_N
 /* Create intermediate matrix */
  matrix `W'=J(`N',`N',0)
  matrix `D'=J(`N',`N',0)
  local MAXOBS=(`N'/2)*(`N'-1)
  local d=1
  local i=1
  while `i'<=`N' {
        local j=`i'+1
        while `j'<=`N' {
            local A=(`xcoord'[`i']-`xcoord'[`j'])^2
            local B=(`ycoord'[`i']-`ycoord'[`j'])^2
            local DIST=sqrt(`A'+`B')
            matrix `D'[`i',`j']=`DIST'
            matrix `D'[`j',`i']=`DIST'
            if `DIST'>`LOWER' & `DIST'<=`UPPER' {
                if "`binary'"!="" {
                    matrix `W'[`i',`j']=1
                    matrix `W'[`j',`i']=1
                }
                else {
                    matrix `W'[`i',`j']=1/(`DIST'^`friction')
                    matrix `W'[`j',`i']=1/(`DIST'^`friction')
                }
            }
           local d=`d'+1
           local j=`j'+1
        }
       local i=`i'+1
    }
mata:`W'=st_matrix("`W'")
mata:`D'=st_matrix("`D'")
    /* Generate distance statistics */
mata:st_local("MAXMIN",strofreal(colmin(rowmax(`D'))))
mata:st_local("MINMAX",strofreal(colmax(rowmin(`D'))))
    /* Check if each location has at least one neighbor */
mata:st_local("NN",strofreal(colsum(rowsum(`W'):==0)))

   /* Create labels */
   if "`binary'"!="" {
   	local WT "Distance-based binary weights matrix"
   }
   else {
   	local WT "Inverse distance weights matrix"
   }

   /* Create final matrix */
   matrix `name'=`W'
}
*********
*  6. Row-standardize weights matrix
*********
if "`standardize'"!="" {
mata:st_matrix("`W'",`W':/(rowsum(`W')+(rowsum(`W'):==0)))
 }
*********
*  7. Create weights matrix eigenvalues
*********
if "`eigenval'"!="" & `NN'>0 {
    di as err "Eigenvalues matrix cannot be computed because of the presence"
    di as err "of one or more locations with no neighbors"
}
if "`eigenval'"!="" & `NN'==0 {
mata:`L'=.
mata:`V'=.
mata:`R'=diag(rowsum(`W'):^(-1/2))
mata:eigensystem(`R'*`W'*`R',`V',`L')
mata:st_matrix("`eigenval'",sort(Re(`L''),-1))
   local OUTPUT "The following matrices have been created:"
   matrix `name'=`W'
   }
*********
*  8. Add relevant info to weights matrix
*********
if "`using'"!="" & "`binary'"!="" local ROW="SWMImpo Yes "
if "`using'"!="" & "`binary'"=="" local ROW="SWMImpo No  "
if "`using'"=="" & "`binary'"!="" local ROW="SWMDist Yes "
if "`using'"=="" & "`binary'"=="" local ROW="SWMDist No  "
if "`standardize'"!="" {
    local ROW="`ROW'Yes"
}
else {
    local ROW="`ROW'No"
}
matrix rownames `name'=`ROW'
if "`using'"=="" {
   local INT=int(`LOWER')
   local DEC=`LOWER'-`INT'
   local DEC=string(`DEC')
   local COL "`INT' `DEC'"
   local INT=int(`UPPER')
   local DEC=`UPPER'-`INT'
   local DEC=string(`DEC')
   local COL "`COL' `INT' `DEC'"
   matrix colnames `name'=`COL'
}
*********
*  9. Display report
*********
if "`standardize'"!="" {
   local S "(row-standardized)"
}
di _newline
di as txt "`OUTPUT'"
di ""
di as txt "1. `WT' " as res "`name'" as txt " `S'"
di as txt "   Dimension: " as res "`N'x`N'"
if "`using'"=="" {
 di as txt "   Distance band: " as res "`LOWER' < d <= `UPPER'"
 di as txt "   Friction parameter: " as res "`friction'"
 di as txt "   Largest minimum distance: " %-9.2f as res `MAXMIN'
 di as txt "   Smallest maximum distance: " %-9.2f as res `MINMAX'
}
if `NN'==1 {
    di ""
    di as err "   Beware! `NN' location has no neighbors"
}
else if `NN'>1 {
    di ""
    di as err "   Beware! `NN' locations have no neighbors"
}
if `NN'>0 & "`using'"=="" {
    di as err "   You are advised to extend the distance band"
}
if "`eigenval'"!="" & `NN'==0 {
   di ""
    di as txt "2. Eigenvalues matrix " as res "`eigenval'"
    di as txt "   Dimension: " as res "`N'x1"
}
di _newline
*********
*  10. End program
*********
capture matrix drop `W'
capture matrix drop `W'S
end


On Tue, Nov 30, 2010 at 11:40 AM, henrik andersson
<[email protected]> wrote:
> Hi,
>
> I'm trying to create a spatial weight matrix using spatwmat for a data set containing 3594 observations, but the matrix cannot be created. I've tested creating the weight matrix based on different subsets of my data set and the matrix can be created when I have 2364 observation, but not when I have 2798. I've therefore concluded that my data set is too large for the memory that I can allocate to Stata.
>
> I use Stata/MP 10.1 and spatwmat version 1.0.
>
> I'm able to allocate 30 GB to the memory and I have also maximized the number of variables and the matrix size.
>
> Current memory allocation
>
>                    current                                 memory usage
>    settable          value     description                 (1M = 1024k)
>    --------------------------------------------------------------------
>    set maxvar        32767     max. variables allowed          12.751M
>    set memory        30720M    max. data space             30,720.000M
>    set matsize       11000     max. RHS vars in models        924.080M
>                                                            -----------
>                                                            31,656.831M
>
> When trying to create the matrix I get the following result:
>
> . spatwmat, name(W50_1) xcoord(y_rt90x) ycoord(x_rt90y) band(0 15000) standardize eigenval(eigen50_1)
> no room to add more variables
> r(902);
>
> I have three questions:
>
> 1. Is there any possiblity to use spatwmat more efficiently than I do, i.e. to use the memory more efficiently and thereby to be able to run it on the whole sample.
> 2. One possibility when using spatwmat is to import the weight matrix from a file. If I cannot create my matrix using Stata do anyone have any suggestion about what program to use to create this matrix, preferrably a program that creates the weight matrix in an identical way as spatwmat. (I of course prefer to run all my estimations in Stata.)
>
> Thanks in advance
>
> Henrik

*
*   For searches and help try:
*   http://www.stata.com/help.cgi?search
*   http://www.stata.com/support/statalist/faq
*   http://www.ats.ucla.edu/stat/stata/


© Copyright 1996–2018 StataCorp LLC   |   Terms of use   |   Privacy   |   Contact us   |   Site index