clear
display "Introduce file's name (no commas needed) " _request(file_stata)
display "Introduce the number of columns with no distances"_request(number_stata)
display "Introduce the name of the column with the FCM you will" _request(FCM_stata)
display "Introduce the name of the column with the population you want to analyze" _request(populationcolumn_stata)
display "Introduce maximum distance between municipalities" _request(distance_stata)
display "Introduce maximum number of population per municipality: " _request(population_stata)


python
import pandas as pd
import numpy as np

#We read the data and check
file_python="$file_stata"
data = pd.read_excel(file_python)
print(data.head())


#We separate the distance matrix from other data
print("Distances Matrix: ")

#Knowing the number of columns that do not contain distance data, we get the distance matrix
number_python="$number_stata"
n = int(number_python) 
distances = data.iloc[:len(data.columns)-1,1:len(data.columns)-n]
print('Distance Matrix')
print(distances)
#We check matrix's dimensions
print(distances.shape)

#We apply the chosen maximum population restriction
population_1 = "$population_stata"
population = int(population_1)
populationcolumn_python = "$populationcolumn_stata"
distances.loc[data[populationcolumn_python]<= population]

#We apply the maximum distance restriction

y_stata = "$distance_stata"
y = int(y_stata)
matrix_minor_y= (np.where(distances<=y, distances,0))
print('MATRIX WITH DISTANCE RESTRICTION')
print(matrix_minor_y)

#Inverse
np.seterr(divide='ignore', invalid='ignore')
inverse = 1/matrix_minor_y
from numpy import inf
inverse[inverse==inf]=0 #Correcting mistake for 1/0 (equal to "if error")
print('INVERSE')
print(inverse)


#Distances * FCM (Column C)
#Check Matrix dimensions
print(inverse.shape)
FCM_python = "$FCM_stata"
print(data[FCM_python].shape)
product_FCM = np.matmul(inverse,data[FCM_python])
print('COLUMN C')
print(pd.Series(product_FCM))
print(product_FCM.shape) #We use pd.Series so results are shown in a single column

#Inverse of the sum of inversed distances (column D)
sum_inverses = np.sum(inverse,0)
print('Sum inverses per municipality')
print(pd.Series(sum_inverses))
print(sum_inverses.shape)
inverse_sum_inverses = 1/sum_inverses
print('COLUMN D')
print(pd.Series(inverse_sum_inverses))

#Multiply Column D and Column X to get V
v = inverse_sum_inverses*product_FCM
v_df = pd.DataFrame(v)
v_df.columns = ["v value"]
print(v_df)

#We clean the results, dropping municipalities that didnt satisfied our restrictions
results = pd.Series((v), index = datos['MUNICIPIO']) 
print(results)
results = pd.DataFrame(results)
results.columns = ["v value"]
print(results)


#USE THE SFI MODULE
from sfi import Data
Data.setObsTotal(len(results))

Data.addVarStr("MUNICIPALITY",len(datos['MUNICIPIO']))
Data.addVarDouble("v_value")

Data.store("MUNICIPALITY", None, datos['MUNICIPIO'],None)
Data.store("v_value",None, v ,None)

end

* Rename v_value_ for FCM_2011
*rename v_value_ v_value_2011

* Generate varibles year and id
gen year=2011
gen id = _n

save "filepath", replace