ACRO Tests#

[1]:
import os

import numpy as np
import pandas as pd
[2]:
# uncomment this line if acro is not installed
# ie you are in development mode
# sys.path.insert(0, os.path.abspath(".."))
[3]:
from acro import ACRO, add_constant, utils

Instantiate ACRO#

[4]:
acro = ACRO(suppress=False)
INFO:acro:version: 0.4.8
INFO:acro:config: {'safe_threshold': 10, 'safe_dof_threshold': 10, 'safe_nk_n': 2, 'safe_nk_k': 0.9, 'safe_pratio_p': 0.1, 'check_missing_values': False, 'survival_safe_threshold': 10, 'zeros_are_disclosive': True}
INFO:acro:automatic suppression: False

Load test data#

[5]:
path = os.path.join("../data", "test_data.dta")
df = pd.read_stata(path)
df.head()
[5]:
charity grant_type index year inc_activity inc_grants inc_donations inc_other inc_total total_costs ... sh_staff_grants_given sh_assets_grants_given sh_income_balance sh_staff_balance sh_assets_balance sh_income_assets sh_staff_assets sh_income_staff_costs sh_assets_staff_costs wgt
0 4Children R 1.0 2011 2880902.0 9603182.0 91404.0 310947.0 12886435.0 12127472.0 ... NaN NaN 0.072636 0.135971 0.767809 0.094602 0.177090 0.534203 5.646843 1.0
1 4Children R 1.0 2014 6810520.0 18768904.0 58002.0 401879.0 26039304.0 25493796.0 ... NaN NaN 0.057641 0.089150 1.001396 0.057560 0.089026 0.646561 11.232729 1.0
2 4Children R 1.0 2015 7199403.0 21638036.0 132191.0 512654.0 29482284.0 32290108.0 ... NaN NaN -0.049619 -0.079828 -0.620210 0.080004 0.128711 0.621583 7.769365 1.0
3 4Children R 1.0 2013 5573013.0 15194731.0 228844.0 267156.0 21263744.0 20989048.0 ... NaN NaN 0.045740 0.068251 1.008259 0.045365 0.067692 0.670166 14.772749 1.0
4 4Children R 1.0 2010 2056816.0 7335103.0 110256.0 424628.0 9926803.0 9769816.0 ... NaN NaN 0.057696 0.122532 0.567539 0.101660 0.215901 0.470862 4.631749 1.0

5 rows × 44 columns

Pandas crosstab#

[6]:
table = pd.crosstab(df.year, [df.survivor, df.grant_type])
table
[6]:
survivor Dead in 2015 Alive in 2015
grant_type G R G N R R/G
year
2010 3 47 12 59 24 8
2011 3 47 12 59 24 8
2012 3 47 12 59 24 8
2013 3 47 12 59 24 8
2014 3 47 12 59 24 8
2015 3 47 12 59 24 8

ACRO crosstab#

[7]:
safe_table = acro.crosstab(df.year, [df.survivor, df.grant_type])
safe_table
INFO:acro:get_summary(): fail; threshold: 12 cells may need suppressing;
INFO:acro:outcome_df:
----------------------------------------------------------------|
survivor   |Dead_in_2015     |Alive_in_2015                     |
grant_type |G            R   |G             N   R   R/G         |
year       |                 |                                  |
----------------------------------------------------------------|
2010       | threshold;   ok | ok            ok  ok  threshold; |
2011       | threshold;   ok | ok            ok  ok  threshold; |
2012       | threshold;   ok | ok            ok  ok  threshold; |
2013       | threshold;   ok | ok            ok  ok  threshold; |
2014       | threshold;   ok | ok            ok  ok  threshold; |
2015       | threshold;   ok | ok            ok  ok  threshold; |
----------------------------------------------------------------|

INFO:acro:records:add(): output_0
[7]:
survivor Dead in 2015 Alive in 2015
grant_type G R G N R R/G
year
2010 3 47 12 59 24 8
2011 3 47 12 59 24 8
2012 3 47 12 59 24 8
2013 3 47 12 59 24 8
2014 3 47 12 59 24 8
2015 3 47 12 59 24 8

same table with column hierarchy reversed to make sure spaces in variable name. dealt with properly#

[8]:
safe_table = acro.crosstab(df.year, [df.grant_type, df.survivor])
safe_table
INFO:acro:get_summary(): fail; threshold: 12 cells may need suppressing;
INFO:acro:outcome_df:
------------------------------------------------------------------------------------------------|
grant_type |G                          |N             |R                          |R/G          |
survivor   |Dead_in_2015 Alive_in_2015 |Alive_in_2015 |Dead_in_2015 Alive_in_2015 |Alive_in_2015|
year       |                           |              |                           |             |
------------------------------------------------------------------------------------------------|
2010       | threshold;   ok           | ok           | ok           ok           | threshold;  |
2011       | threshold;   ok           | ok           | ok           ok           | threshold;  |
2012       | threshold;   ok           | ok           | ok           ok           | threshold;  |
2013       | threshold;   ok           | ok           | ok           ok           | threshold;  |
2014       | threshold;   ok           | ok           | ok           ok           | threshold;  |
2015       | threshold;   ok           | ok           | ok           ok           | threshold;  |
------------------------------------------------------------------------------------------------|

INFO:acro:records:add(): output_1
[8]:
grant_type G N R R/G
survivor Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015 Alive in 2015 Alive in 2015
year
2010 3 12 59 47 24 8
2011 3 12 59 47 24 8
2012 3 12 59 47 24 8
2013 3 12 59 47 24 8
2014 3 12 59 47 24 8
2015 3 12 59 47 24 8

checking for testing purposes#

[9]:
mydata = df[(df["charity"].str[0] == "W")]
mydata = mydata[mydata["year"] < 2012]
acro.crosstab(mydata.year, mydata.survivor)
INFO:acro:get_summary(): fail; threshold: 4 cells may need suppressing;
INFO:acro:outcome_df:
-------------------------------------|
survivor |Dead_in_2015 |Alive_in_2015|
year     |             |             |
-------------------------------------|
2010     | threshold;  | threshold;  |
2011     | threshold;  | threshold;  |
-------------------------------------|

INFO:acro:records:add(): output_2
[9]:
survivor Dead in 2015 Alive in 2015
year
2010 2 2
2011 2 2

ACRO crosstab with suppression#

[10]:
acro.suppress = True

safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc="mean")
safe_table
INFO:acro:get_summary(): fail; threshold: 7 cells suppressed; p-ratio: 2 cells suppressed; nk-rule: 1 cells suppressed;
INFO:acro:outcome_df:
---------------------------------------------------------------------------|
grant_type |G   |N                     |R   |R/G                           |
year       |    |                      |    |                              |
---------------------------------------------------------------------------|
2010       | ok | threshold; p-ratio;  | ok | threshold; p-ratio; nk-rule; |
2011       | ok |                   ok | ok |                   threshold; |
2012       | ok |                   ok | ok |                   threshold; |
2013       | ok |                   ok | ok |                   threshold; |
2014       | ok |                   ok | ok |                   threshold; |
2015       | ok |                   ok | ok |                   threshold; |
---------------------------------------------------------------------------|

INFO:acro:records:add(): output_3
[10]:
grant_type G N R R/G
year
2010 9921906.0 NaN 8402284.0 NaN
2011 8502246.0 124013.859375 7716880.0 NaN
2012 11458580.0 131859.062500 6958050.5 NaN
2013 13557147.0 147937.796875 7202273.5 NaN
2014 13748147.0 133198.250000 8277525.0 NaN
2015 11133433.0 146572.187500 10812888.0 NaN

ACRO crosstab with suppression and totals#

[11]:
acro.suppress = False
table = acro.crosstab(
    df.year,
    [df.grant_type, df.survivor],
    values=df.inc_grants,
    aggfunc="count",
    margins=True,
)
print(table)
INFO:acro:Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted.
INFO:acro:get_summary(): fail; threshold: 14 cells may need suppressing; p-ratio: 8 cells may need suppressing; nk-rule: 7 cells may need suppressing;
INFO:acro:outcome_df:
------------------------------------------------------------------------------------------------------------------------------------------------|
grant_type |G                                            |N                     |R                          |R/G                            |All|
survivor   |Dead_in_2015                   Alive_in_2015 |Alive_in_2015         |Dead_in_2015 Alive_in_2015 |Alive_in_2015                  |   |
year       |                                             |                      |                           |                               |   |
------------------------------------------------------------------------------------------------------------------------------------------------|
2010       | threshold; p-ratio; nk-rule;            ok  | threshold; p-ratio;  | ok           ok           | threshold; p-ratio; nk-rule;  | ok|
2011       | threshold; p-ratio; nk-rule;            ok  |                   ok | ok           ok           |                   threshold;  | ok|
2012       | threshold; p-ratio; nk-rule;            ok  |                   ok | ok           ok           |                   threshold;  | ok|
2013       | threshold; p-ratio; nk-rule;            ok  |                   ok | ok           ok           |                   threshold;  | ok|
2014       | threshold; p-ratio; nk-rule;            ok  |                   ok | ok           ok           |                   threshold;  | ok|
2015       | threshold; p-ratio; nk-rule;   threshold;   |                   ok | ok           ok           |                   threshold;  | ok|
All        |                            ok           ok  |                   ok | ok           ok           |                            ok | ok|
------------------------------------------------------------------------------------------------------------------------------------------------|

INFO:acro:records:add(): output_4
grant_type            G                           N            R  \
survivor   Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015
year
2010                  2            12             5           40
2011                  3            12            58           45
2012                  3            12            59           45
2013                  3            12            59           47
2014                  3            12            59           43
2015                  3             9            58           28
All                  17            69           298          248

grant_type                         R/G  All
survivor   Alive in 2015 Alive in 2015
year
2010                  20             4   83
2011                  24             8  150
2012                  24             8  151
2013                  24             8  153
2014                  24             8  149
2015                  23             8  129
All                  139            44  815
[12]:
acro.suppress = False

ACRO crosstab with aggregation function#

[13]:
safe_table = acro.crosstab(df.year, df.survivor, values=df.inc_grants, aggfunc="mean")
safe_table
INFO:acro:get_summary(): pass
INFO:acro:outcome_df:
-------------------------------------|
survivor |Dead_in_2015 |Alive_in_2015|
year     |             |             |
-------------------------------------|
2010     | ok          | ok          |
2011     | ok          | ok          |
2012     | ok          | ok          |
2013     | ok          | ok          |
2014     | ok          | ok          |
2015     | ok          | ok          |
-------------------------------------|

INFO:acro:records:add(): output_5
[13]:
survivor Dead in 2015 Alive in 2015
year
2010 1320337.750 15466672.0
2011 1295468.000 7190086.5
2012 1270522.125 7119017.5
2013 1325315.500 7682584.0
2014 1282249.625 8276287.5
2015 1608412.250 8060488.5

ACRO crosstab with multiple aggregation functions and totals#

[14]:
safe_table = acro.crosstab(
    df.year, df.grant_type, values=df.inc_grants, aggfunc=["mean", "std"], margins=True
)
safe_table
INFO:acro:get_summary(): fail; threshold: 14 cells may need suppressing; p-ratio: 4 cells may need suppressing; nk-rule: 2 cells may need suppressing;
INFO:acro:outcome_df:
----------------------------------------------------------------------------------------------------------------------------------------------|
           mean                                                              |std                                                             |
grant_type G    N                     R   R/G                            All |G   N                     R   R/G                            All|
year                                                                         |                                                                |
----------------------------------------------------------------------------------------------------------------------------------------------|
2010        ok   threshold; p-ratio;   ok  threshold; p-ratio; nk-rule;   ok | ok  threshold; p-ratio;   ok  threshold; p-ratio; nk-rule;   ok|
2011        ok                     ok  ok                    threshold;   ok | ok                    ok  ok                    threshold;   ok|
2012        ok                     ok  ok                    threshold;   ok | ok                    ok  ok                    threshold;   ok|
2013        ok                     ok  ok                    threshold;   ok | ok                    ok  ok                    threshold;   ok|
2014        ok                     ok  ok                    threshold;   ok | ok                    ok  ok                    threshold;   ok|
2015        ok                     ok  ok                    threshold;   ok | ok                    ok  ok                    threshold;   ok|
All         ok                     ok  ok                             ok  ok | ok                    ok  ok                             ok  ok|
----------------------------------------------------------------------------------------------------------------------------------------------|

INFO:acro:records:add(): output_6
[14]:
mean std
grant_type G N R R/G All G N R R/G All
year
2010 9921906.0 0.000000 8402284.0 11636000.0 8308286.5 1.855055e+07 0.000000 3.059557e+07 1.701088e+07 2.727398e+07
2011 8502246.0 124013.859375 7716880.0 16047500.0 5303808.5 1.688595e+07 205959.492903 2.954322e+07 1.561638e+07 2.137658e+07
2012 11458580.0 131859.062500 6958050.5 16810000.0 5259893.5 2.061090e+07 210476.539175 2.721184e+07 1.646449e+07 2.026400e+07
2013 13557147.0 147937.796875 7202273.5 16765625.0 5605045.5 2.486844e+07 203747.417017 2.989833e+07 1.671112e+07 2.251787e+07
2014 13748147.0 133198.250000 8277525.0 17845750.0 6117054.5 3.134559e+07 181865.925580 3.546348e+07 1.741251e+07 2.641722e+07
2015 11133433.0 146572.187500 10812888.0 18278624.0 6509989.5 2.553919e+07 201602.800832 4.130935e+07 1.730471e+07 2.784636e+07
All 11412787.0 134431.890625 8098502.0 16648273.0 5997796.5 2.283220e+07 198873.726656 3.204495e+07 1.583532e+07 2.405324e+07

ACRO crosstab with missing values#

  • This is an example of a crosstab where there are missing values that have not been filled or dealt with in the data.

  • Note that you need to change the value of the CHECK_MISSING_VALUES variable in the acro object to True. Then run the crosstab command.

  • In this example, ten values in the column inc_grants were set to nan to represent missing data.

  • In this version of acro checking the disclosiveness of an output with missing values is not supported.

  • The status of the command will be “review” to indicate that the output needs to be checked by the output checker to review if the output is disclosive or not.

  • In the outcome_df each cell with missing value/values will be shown as missing.

  • The output hist will not be suppressed even if the suppress=True.

[15]:
utils.CHECK_MISSING_VALUES = True

missing = df.inc_grants.copy()
missing[0:10] = np.nan

safe_table = acro.crosstab(
    df.year, df.grant_type, values=missing, aggfunc="mean", margins=True
)
safe_table
INFO:acro:get_summary(): fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing;
INFO:acro:outcome_df:
--------------------------------------------------------------------------------|
grant_type |G   |N                     |R   |R/G                            |All|
year       |    |                      |    |                               |   |
--------------------------------------------------------------------------------|
2010       | ok | threshold; p-ratio;  | ok | threshold; p-ratio; nk-rule;  | ok|
2011       | ok |                   ok | ok |                   threshold;  | ok|
2012       | ok |                   ok | ok |                   threshold;  | ok|
2013       | ok |                   ok | ok |                   threshold;  | ok|
2014       | ok |                   ok | ok |                   threshold;  | ok|
2015       | ok |                   ok | ok |                   threshold;  | ok|
All        | ok |                   ok | ok |                            ok | ok|
--------------------------------------------------------------------------------|

INFO:acro:records:add(): output_7
[15]:
grant_type G N R R/G All
year
2010 9921906.0 0.000000 8420373.0 11636000.0 8320154.5
2011 8502246.0 125663.226562 7689140.0 16047500.0 5310392.0
2012 11458580.0 131859.062500 6896304.0 16810000.0 5220580.5
2013 13557147.0 150488.453125 7088095.5 16765625.0 5578657.0
2014 13748147.0 135494.781250 8118565.5 17845750.0 6072600.0
2015 11133433.0 149143.625000 10596385.0 18278624.0 6442131.0
All 11412787.0 136158.859375 8006361.0 16648273.0 5968295.5
[16]:
utils.CHECK_MISSING_VALUES = False

ACRO crosstab with negative values#

[17]:
negative = df.inc_grants.copy()
negative[0:10] = -10

safe_table = acro.crosstab(df.year, df.grant_type, values=negative, aggfunc="mean")
safe_table
INFO:acro:get_summary(): review; negative values found
INFO:acro:outcome_df:
----------------------------------------|
grant_type |G |N         |R         |R/G|
year       |  |          |          |   |
----------------------------------------|
2010       |  |          | negative |   |
2011       |  | negative | negative |   |
2012       |  |          | negative |   |
2013       |  | negative | negative |   |
2014       |  | negative | negative |   |
2015       |  | negative | negative |   |
----------------------------------------|

INFO:acro:records:add(): output_8
[17]:
grant_type G N R R/G
year
2010 9921906.0 0.000000 8280032.5 11636000.0
2011 8502246.0 123496.445312 7577703.5 16047500.0
2012 11458580.0 131859.062500 6796357.5 16810000.0
2013 13557147.0 147937.625000 6988263.0 16765625.0
2014 13748147.0 133198.078125 7997392.0 17845750.0
2015 11133433.0 146572.015625 10388612.0 18278624.0

ACRO pivot_table#

[18]:
table = acro.pivot_table(
    df,
    index=["grant_type"],
    columns=["year"],
    values=["inc_grants"],
    margins=True,
    aggfunc="sum",
)
table
INFO:acro:get_summary(): fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing;
INFO:acro:outcome_df:
--------------------------------------------------------------------------------------------------------------|
           inc_grants                                                                                         |
year       2010                           2011         2012         2013         2014         2015         All|
grant_type                                                                                                    |
--------------------------------------------------------------------------------------------------------------|
G                                      ok           ok           ok           ok           ok           ok  ok|
N                    threshold; p-ratio;            ok           ok           ok           ok           ok  ok|
R                                      ok           ok           ok           ok           ok           ok  ok|
R/G         threshold; p-ratio; nk-rule;   threshold;   threshold;   threshold;   threshold;   threshold;   ok|
All                                    ok           ok           ok           ok           ok           ok  ok|
--------------------------------------------------------------------------------------------------------------|

INFO:acro:records:add(): output_9
[18]:
inc_grants
year 2010 2011 2012 2013 2014 2015 All
grant_type
G 138906688.0 127533696.0 171878704.0 203357200.0 206222208.0 133601200.0 9.814997e+08
N 0.0 7192804.0 7779685.0 8728330.0 7858697.0 8501187.0 4.006070e+07
R 504137056.0 532464704.0 480105472.0 511361408.0 554594176.0 551457280.0 3.134120e+09
R/G 46544000.0 128380000.0 134480000.0 134125000.0 142766000.0 146228992.0 7.325240e+08
All 689587776.0 795571264.0 794243904.0 857571968.0 911441088.0 839788672.0 4.888204e+09
[19]:
table = acro.pivot_table(
    df, index=["grant_type"], values=["inc_grants"], aggfunc=["mean", "std"]
)
table
INFO:acro:get_summary(): pass
INFO:acro:outcome_df:
---------------------------------|
           mean       |std       |
           inc_grants |inc_grants|
grant_type            |          |
---------------------------------|
G           ok        | ok       |
N           ok        | ok       |
R           ok        | ok       |
R/G         ok        | ok       |
---------------------------------|

INFO:acro:records:add(): output_10
[19]:
mean std
inc_grants inc_grants
grant_type
G 1.141279e+07 2.283220e+07
N 1.344319e+05 1.988737e+05
R 8.098502e+06 3.204495e+07
R/G 1.664827e+07 1.583532e+07

ACRO pivot_table with missing values#

[20]:
utils.CHECK_MISSING_VALUES = True

df.loc[0:10, "inc_grants"] = np.nan

table = acro.pivot_table(
    df, index=["grant_type"], values=["inc_grants"], aggfunc=["mean", "std"]
)
table
INFO:acro:get_summary(): pass
INFO:acro:outcome_df:
---------------------------------|
           mean       |std       |
           inc_grants |inc_grants|
grant_type            |          |
---------------------------------|
G           ok        | ok       |
N           ok        | ok       |
R           ok        | ok       |
R/G         ok        | ok       |
---------------------------------|

INFO:acro:records:add(): output_11
[20]:
mean std
inc_grants inc_grants
grant_type
G 1.141279e+07 2.283220e+07
N 1.364700e+05 1.999335e+05
R 8.006361e+06 3.228216e+07
R/G 1.664827e+07 1.583532e+07
[21]:
utils.CHECK_MISSING_VALUES = False

ACRO pivot_table with negative values#

[22]:
df.loc[0:10, "inc_grants"] = -10

table = acro.pivot_table(
    df, index=["grant_type"], values=["inc_grants"], aggfunc=["mean", "std"]
)
table
INFO:acro:get_summary(): review; negative values found
INFO:acro:outcome_df:
---------------------------------|
           mean       |std       |
           inc_grants |inc_grants|
grant_type            |          |
---------------------------------|
G                     |          |
N           negative  | negative |
R           negative  | negative |
R/G                   |          |
---------------------------------|

INFO:acro:records:add(): output_12
[22]:
mean std
inc_grants inc_grants
grant_type
G 1.141279e+07 2.283220e+07
N 1.341800e+05 1.990196e+05
R 7.882230e+06 3.204558e+07
R/G 1.664827e+07 1.583532e+07

ACRO OLS#

[23]:
new_df = df[["inc_activity", "inc_grants", "inc_donations", "total_costs"]]
new_df = new_df.dropna()

y = new_df["inc_activity"]
x = new_df[["inc_grants", "inc_donations", "total_costs"]]
x = add_constant(x)

results = acro.ols(y, x)
results.summary()
INFO:acro:ols() outcome: pass; dof=807.0 >= 10
INFO:acro:records:add(): output_13
[23]:
OLS Regression Results
Dep. Variable: inc_activity R-squared: 0.894
Model: OLS Adj. R-squared: 0.893
Method: Least Squares F-statistic: 2261.
Date: Thu, 06 Mar 2025 Prob (F-statistic): 0.00
Time: 19:43:34 Log-Likelihood: -14495.
No. Observations: 811 AIC: 2.900e+04
Df Residuals: 807 BIC: 2.902e+04
Df Model: 3
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
const 3.01e+05 5.33e+05 0.565 0.572 -7.45e+05 1.35e+06
inc_grants -0.8846 0.025 -35.956 0.000 -0.933 -0.836
inc_donations -0.6647 0.016 -40.721 0.000 -0.697 -0.633
total_costs 0.8313 0.011 78.674 0.000 0.811 0.852
Omnibus: 1339.956 Durbin-Watson: 1.414
Prob(Omnibus): 0.000 Jarque-Bera (JB): 1253317.706
Skew: 9.899 Prob(JB): 0.00
Kurtosis: 194.566 Cond. No. 1.05e+08


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.05e+08. This might indicate that there are
strong multicollinearity or other numerical problems.

ACRO OLSR#

[24]:
results = acro.olsr(
    formula="inc_activity ~ inc_grants + inc_donations + total_costs", data=new_df
)
results.summary()
INFO:acro:olsr() outcome: pass; dof=807.0 >= 10
INFO:acro:records:add(): output_14
[24]:
OLS Regression Results
Dep. Variable: inc_activity R-squared: 0.894
Model: OLS Adj. R-squared: 0.893
Method: Least Squares F-statistic: 2261.
Date: Thu, 06 Mar 2025 Prob (F-statistic): 0.00
Time: 19:43:34 Log-Likelihood: -14495.
No. Observations: 811 AIC: 2.900e+04
Df Residuals: 807 BIC: 2.902e+04
Df Model: 3
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
Intercept 3.01e+05 5.33e+05 0.565 0.572 -7.45e+05 1.35e+06
inc_grants -0.8846 0.025 -35.956 0.000 -0.933 -0.836
inc_donations -0.6647 0.016 -40.721 0.000 -0.697 -0.633
total_costs 0.8313 0.011 78.674 0.000 0.811 0.852
Omnibus: 1339.956 Durbin-Watson: 1.414
Prob(Omnibus): 0.000 Jarque-Bera (JB): 1253317.706
Skew: 9.899 Prob(JB): 0.00
Kurtosis: 194.566 Cond. No. 1.05e+08


Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.05e+08. This might indicate that there are
strong multicollinearity or other numerical problems.

ACRO Probit#

[25]:
new_df = df[["survivor", "inc_activity", "inc_grants", "inc_donations", "total_costs"]]
new_df = new_df.dropna()

y = new_df["survivor"].astype("category").cat.codes  # numeric
y.name = "survivor"
x = new_df[["inc_activity", "inc_grants", "inc_donations", "total_costs"]]
x = add_constant(x)

results = acro.probit(y, x)
results.summary()
INFO:acro:probit() outcome: pass; dof=806.0 >= 10
INFO:acro:records:add(): output_15
Optimization terminated successfully.
         Current function value: 0.493791
         Iterations 10
[25]:
Probit Regression Results
Dep. Variable: survivor No. Observations: 811
Model: Probit Df Residuals: 806
Method: MLE Df Model: 4
Date: Thu, 06 Mar 2025 Pseudo R-squ.: 0.2140
Time: 19:43:34 Log-Likelihood: -400.46
converged: True LL-Null: -509.50
Covariance Type: nonrobust LLR p-value: 4.875e-46
coef std err z P>|z| [0.025 0.975]
const 0.0474 0.057 0.838 0.402 -0.063 0.158
inc_activity 1.836e-07 5.16e-08 3.559 0.000 8.25e-08 2.85e-07
inc_grants 8.576e-08 3.9e-08 2.197 0.028 9.25e-09 1.62e-07
inc_donations 2.406e-07 4.54e-08 5.297 0.000 1.52e-07 3.3e-07
total_costs -8.644e-08 3.68e-08 -2.351 0.019 -1.59e-07 -1.44e-08


Possibly complete quasi-separation: A fraction 0.18 of observations can be
perfectly predicted. This might indicate that there is complete
quasi-separation. In this case some parameters will not be identified.

ACRO Logit#

[26]:
results = acro.logit(y, x)
results.summary()
INFO:acro:logit() outcome: pass; dof=806.0 >= 10
INFO:acro:records:add(): output_16
Optimization terminated successfully.
         Current function value: 0.490836
         Iterations 12
[26]:
Logit Regression Results
Dep. Variable: survivor No. Observations: 811
Model: Logit Df Residuals: 806
Method: MLE Df Model: 4
Date: Thu, 06 Mar 2025 Pseudo R-squ.: 0.2187
Time: 19:43:35 Log-Likelihood: -398.07
converged: True LL-Null: -509.50
Covariance Type: nonrobust LLR p-value: 4.532e-47
coef std err z P>|z| [0.025 0.975]
const 0.0512 0.091 0.561 0.575 -0.128 0.230
inc_activity 2.981e-07 8.95e-08 3.330 0.001 1.23e-07 4.74e-07
inc_grants 1.351e-07 6.67e-08 2.026 0.043 4.39e-09 2.66e-07
inc_donations 5.123e-07 1.04e-07 4.927 0.000 3.08e-07 7.16e-07
total_costs -1.442e-07 6.26e-08 -2.304 0.021 -2.67e-07 -2.15e-08


Possibly complete quasi-separation: A fraction 0.18 of observations can be
perfectly predicted. This might indicate that there is complete
quasi-separation. In this case some parameters will not be identified.

ACRO Histogram without suppression#

[27]:
hist = acro.hist(df, "inc_grants")
print(hist)
INFO:acro:status: fail
INFO:acro:records:add(): output_17
acro_artifacts/histogram_0.png
../_images/notebooks_test_47_2.png

ACRO Histogram with suppression#

[28]:
acro.suppress = True
hist = acro.hist(df, "inc_grants")
WARNING:acro:Histogram will not be shown as the inc_grants column is disclosive.
INFO:acro:status: fail
INFO:acro:records:add(): output_18
<Figure size 640x480 with 0 Axes>
[29]:
acro.suppress = False

List current ACRO outputs#

[30]:
results_str = acro.print_outputs()
uid: output_0
status: fail
type: table
properties: {'method': 'crosstab'}
sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 12, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 0], [0, 5], [1, 0], [1, 5], [2, 0], [2, 5], [3, 0], [3, 5], [4, 0], [4, 5], [5, 0], [5, 5]], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}
command: safe_table = acro.crosstab(df.year, [df.survivor, df.grant_type])
summary: fail; threshold: 12 cells may need suppressing;
outcome: survivor   Dead_in_2015     Alive_in_2015
grant_type            G   R             G   N   R          R/G
year
2010        threshold;   ok            ok  ok  ok  threshold;
2011        threshold;   ok            ok  ok  ok  threshold;
2012        threshold;   ok            ok  ok  ok  threshold;
2013        threshold;   ok            ok  ok  ok  threshold;
2014        threshold;   ok            ok  ok  ok  threshold;
2015        threshold;   ok            ok  ok  ok  threshold;
output: [survivor   Dead in 2015     Alive in 2015
grant_type            G   R             G   N   R R/G
year
2010                  3  47            12  59  24   8
2011                  3  47            12  59  24   8
2012                  3  47            12  59  24   8
2013                  3  47            12  59  24   8
2014                  3  47            12  59  24   8
2015                  3  47            12  59  24   8]
timestamp: 2025-03-06T19:43:34.383332
comments: []
exception:

uid: output_1
status: fail
type: table
properties: {'method': 'crosstab'}
sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 12, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 0], [0, 5], [1, 0], [1, 5], [2, 0], [2, 5], [3, 0], [3, 5], [4, 0], [4, 5], [5, 0], [5, 5]], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}
command: safe_table = acro.crosstab(df.year, [df.grant_type, df.survivor])
summary: fail; threshold: 12 cells may need suppressing;
outcome: grant_type            G                           N            R  \
survivor   Dead_in_2015 Alive_in_2015 Alive_in_2015 Dead_in_2015
year
2010        threshold;             ok            ok           ok
2011        threshold;             ok            ok           ok
2012        threshold;             ok            ok           ok
2013        threshold;             ok            ok           ok
2014        threshold;             ok            ok           ok
2015        threshold;             ok            ok           ok

grant_type                         R/G
survivor   Alive_in_2015 Alive_in_2015
year
2010                  ok   threshold;
2011                  ok   threshold;
2012                  ok   threshold;
2013                  ok   threshold;
2014                  ok   threshold;
2015                  ok   threshold;
output: [grant_type            G                           N            R  \
survivor   Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015
year
2010                  3            12            59           47
2011                  3            12            59           47
2012                  3            12            59           47
2013                  3            12            59           47
2014                  3            12            59           47
2015                  3            12            59           47

grant_type                         R/G
survivor   Alive in 2015 Alive in 2015
year
2010                  24             8
2011                  24             8
2012                  24             8
2013                  24             8
2014                  24             8
2015                  24             8  ]
timestamp: 2025-03-06T19:43:34.405157
comments: []
exception:

uid: output_2
status: fail
type: table
properties: {'method': 'crosstab'}
sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 4, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 0], [0, 1], [1, 0], [1, 1]], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}
command: acro.crosstab(mydata.year, mydata.survivor)
summary: fail; threshold: 4 cells may need suppressing;
outcome: survivor Dead_in_2015 Alive_in_2015
year
2010      threshold;    threshold;
2011      threshold;    threshold;
output: [survivor  Dead in 2015  Alive in 2015
year
2010                 2              2
2011                 2              2]
timestamp: 2025-03-06T19:43:34.423743
comments: []
exception:

uid: output_3
status: fail
type: table
properties: {'method': 'crosstab'}
sdc: {'summary': {'suppressed': True, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]], 'all-values-are-same': []}}
command: safe_table = acro.crosstab(df.year, df.grant_type, values=df.inc_grants, aggfunc="mean")
summary: fail; threshold: 7 cells suppressed; p-ratio: 2 cells suppressed; nk-rule: 1 cells suppressed;
outcome: grant_type   G                     N   R                            R/G
year
2010        ok  threshold; p-ratio;   ok  threshold; p-ratio; nk-rule;
2011        ok                    ok  ok                    threshold;
2012        ok                    ok  ok                    threshold;
2013        ok                    ok  ok                    threshold;
2014        ok                    ok  ok                    threshold;
2015        ok                    ok  ok                    threshold;
output: [grant_type           G              N           R  R/G
year
2010         9921906.0            NaN   8402284.0  NaN
2011         8502246.0  124013.859375   7716880.0  NaN
2012        11458580.0  131859.062500   6958050.5  NaN
2013        13557147.0  147937.796875   7202273.5  NaN
2014        13748147.0  133198.250000   8277525.0  NaN
2015        11133433.0  146572.187500  10812888.0  NaN]
timestamp: 2025-03-06T19:43:34.454789
comments: []
exception:

uid: output_4
status: fail
type: table
properties: {'method': 'crosstab'}
sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 14, 'p-ratio': 8, 'nk-rule': 7, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 0], [0, 2], [0, 5], [1, 0], [1, 5], [2, 0], [2, 5], [3, 0], [3, 5], [4, 0], [4, 5], [5, 0], [5, 1], [5, 5]], 'p-ratio': [[0, 0], [0, 2], [0, 5], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], 'nk-rule': [[0, 0], [0, 5], [1, 0], [2, 0], [3, 0], [4, 0], [5, 0]], 'all-values-are-same': []}}
command: table = acro.crosstab(
summary: fail; threshold: 14 cells may need suppressing; p-ratio: 8 cells may need suppressing; nk-rule: 7 cells may need suppressing;
outcome: grant_type                              G                                   N  \
survivor                     Dead_in_2015 Alive_in_2015         Alive_in_2015
year
2010        threshold; p-ratio; nk-rule;             ok  threshold; p-ratio;
2011        threshold; p-ratio; nk-rule;             ok                    ok
2012        threshold; p-ratio; nk-rule;             ok                    ok
2013        threshold; p-ratio; nk-rule;             ok                    ok
2014        threshold; p-ratio; nk-rule;             ok                    ok
2015        threshold; p-ratio; nk-rule;    threshold;                     ok
All                                    ok            ok                    ok

grant_type            R                                          R/G All
survivor   Dead_in_2015 Alive_in_2015                  Alive_in_2015
year
2010                 ok            ok  threshold; p-ratio; nk-rule;   ok
2011                 ok            ok                    threshold;   ok
2012                 ok            ok                    threshold;   ok
2013                 ok            ok                    threshold;   ok
2014                 ok            ok                    threshold;   ok
2015                 ok            ok                    threshold;   ok
All                  ok            ok                             ok  ok
output: [grant_type            G                           N            R  \
survivor   Dead in 2015 Alive in 2015 Alive in 2015 Dead in 2015
year
2010                  2            12             5           40
2011                  3            12            58           45
2012                  3            12            59           45
2013                  3            12            59           47
2014                  3            12            59           43
2015                  3             9            58           28
All                  17            69           298          248

grant_type                         R/G  All
survivor   Alive in 2015 Alive in 2015
year
2010                  20             4   83
2011                  24             8  150
2012                  24             8  151
2013                  24             8  153
2014                  24             8  149
2015                  23             8  129
All                  139            44  815  ]
timestamp: 2025-03-06T19:43:34.523720
comments: ["Empty columns: ('N', 'Dead in 2015'), ('R/G', 'Dead in 2015') were deleted."]
exception:

uid: output_5
status: pass
type: table
properties: {'method': 'crosstab'}
sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}
command: safe_table = acro.crosstab(df.year, df.survivor, values=df.inc_grants, aggfunc="mean")
summary: pass
outcome: survivor Dead_in_2015 Alive_in_2015
year
2010               ok            ok
2011               ok            ok
2012               ok            ok
2013               ok            ok
2014               ok            ok
2015               ok            ok
output: [survivor  Dead in 2015  Alive in 2015
year
2010       1320337.750     15466672.0
2011       1295468.000      7190086.5
2012       1270522.125      7119017.5
2013       1325315.500      7682584.0
2014       1282249.625      8276287.5
2015       1608412.250      8060488.5]
timestamp: 2025-03-06T19:43:34.555960
comments: []
exception:

uid: output_6
status: fail
type: table
properties: {'method': 'crosstab'}
sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 14, 'p-ratio': 4, 'nk-rule': 2, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [0, 6], [0, 8], [1, 3], [1, 8], [2, 3], [2, 8], [3, 3], [3, 8], [4, 3], [4, 8], [5, 3], [5, 8]], 'p-ratio': [[0, 1], [0, 3], [0, 6], [0, 8]], 'nk-rule': [[0, 3], [0, 8]], 'all-values-are-same': []}}
command: safe_table = acro.crosstab(
summary: fail; threshold: 14 cells may need suppressing; p-ratio: 4 cells may need suppressing; nk-rule: 2 cells may need suppressing;
outcome:            mean                                                               \
grant_type    G                     N   R                            R/G All
year
2010         ok  threshold; p-ratio;   ok  threshold; p-ratio; nk-rule;   ok
2011         ok                    ok  ok                    threshold;   ok
2012         ok                    ok  ok                    threshold;   ok
2013         ok                    ok  ok                    threshold;   ok
2014         ok                    ok  ok                    threshold;   ok
2015         ok                    ok  ok                    threshold;   ok
All          ok                    ok  ok                             ok  ok

           std
grant_type   G                     N   R                            R/G All
year
2010        ok  threshold; p-ratio;   ok  threshold; p-ratio; nk-rule;   ok
2011        ok                    ok  ok                    threshold;   ok
2012        ok                    ok  ok                    threshold;   ok
2013        ok                    ok  ok                    threshold;   ok
2014        ok                    ok  ok                    threshold;   ok
2015        ok                    ok  ok                    threshold;   ok
All         ok                    ok  ok                             ok  ok
output: [                  mean                                                    \
grant_type           G              N           R         R/G        All
year
2010         9921906.0       0.000000   8402284.0  11636000.0  8308286.5
2011         8502246.0  124013.859375   7716880.0  16047500.0  5303808.5
2012        11458580.0  131859.062500   6958050.5  16810000.0  5259893.5
2013        13557147.0  147937.796875   7202273.5  16765625.0  5605045.5
2014        13748147.0  133198.250000   8277525.0  17845750.0  6117054.5
2015        11133433.0  146572.187500  10812888.0  18278624.0  6509989.5
All         11412787.0  134431.890625   8098502.0  16648273.0  5997796.5

                     std                                             \
grant_type             G              N             R           R/G
year
2010        1.855055e+07       0.000000  3.059557e+07  1.701088e+07
2011        1.688595e+07  205959.492903  2.954322e+07  1.561638e+07
2012        2.061090e+07  210476.539175  2.721184e+07  1.646449e+07
2013        2.486844e+07  203747.417017  2.989833e+07  1.671112e+07
2014        3.134559e+07  181865.925580  3.546348e+07  1.741251e+07
2015        2.553919e+07  201602.800832  4.130935e+07  1.730471e+07
All         2.283220e+07  198873.726656  3.204495e+07  1.583532e+07


grant_type           All
year
2010        2.727398e+07
2011        2.137658e+07
2012        2.026400e+07
2013        2.251787e+07
2014        2.641722e+07
2015        2.784636e+07
All         2.405324e+07  ]
timestamp: 2025-03-06T19:43:34.639249
comments: []
exception:

uid: output_7
status: fail
type: table
properties: {'method': 'crosstab'}
sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]], 'all-values-are-same': []}}
command: safe_table = acro.crosstab(
summary: fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing;
outcome: grant_type   G                     N   R                            R/G All
year
2010        ok  threshold; p-ratio;   ok  threshold; p-ratio; nk-rule;   ok
2011        ok                    ok  ok                    threshold;   ok
2012        ok                    ok  ok                    threshold;   ok
2013        ok                    ok  ok                    threshold;   ok
2014        ok                    ok  ok                    threshold;   ok
2015        ok                    ok  ok                    threshold;   ok
All         ok                    ok  ok                             ok  ok
output: [grant_type           G              N           R         R/G        All
year
2010         9921906.0       0.000000   8420373.0  11636000.0  8320154.5
2011         8502246.0  125663.226562   7689140.0  16047500.0  5310392.0
2012        11458580.0  131859.062500   6896304.0  16810000.0  5220580.5
2013        13557147.0  150488.453125   7088095.5  16765625.0  5578657.0
2014        13748147.0  135494.781250   8118565.5  17845750.0  6072600.0
2015        11133433.0  149143.625000  10596385.0  18278624.0  6442131.0
All         11412787.0  136158.859375   8006361.0  16648273.0  5968295.5]
timestamp: 2025-03-06T19:43:34.695759
comments: []
exception:

uid: output_8
status: review
type: table
properties: {'method': 'crosstab'}
sdc: {'summary': {'suppressed': False, 'negative': 10, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1, 'all-values-are-same': 0}, 'cells': {'negative': [[0, 2], [1, 1], [1, 2], [2, 2], [3, 1], [3, 2], [4, 1], [4, 2], [5, 1], [5, 2]], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]], 'all-values-are-same': []}}
command: safe_table = acro.crosstab(df.year, df.grant_type, values=negative, aggfunc="mean")
summary: review; negative values found
outcome: grant_type G         N         R R/G
year
2010                    negative
2011          negative  negative
2012                    negative
2013          negative  negative
2014          negative  negative
2015          negative  negative
output: [grant_type           G              N           R         R/G
year
2010         9921906.0       0.000000   8280032.5  11636000.0
2011         8502246.0  123496.445312   7577703.5  16047500.0
2012        11458580.0  131859.062500   6796357.5  16810000.0
2013        13557147.0  147937.625000   6988263.0  16765625.0
2014        13748147.0  133198.078125   7997392.0  17845750.0
2015        11133433.0  146572.015625  10388612.0  18278624.0]
timestamp: 2025-03-06T19:43:34.727227
comments: []
exception:

uid: output_9
status: fail
type: table
properties: {'method': 'pivot_table'}
sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[1, 0], [3, 0], [3, 1], [3, 2], [3, 3], [3, 4], [3, 5]], 'p-ratio': [[1, 0], [3, 0]], 'nk-rule': [[3, 0]], 'all-values-are-same': []}}
command: table = acro.pivot_table(
summary: fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing;
outcome:                                inc_grants                            \
year                                 2010         2011         2012
grant_type
G                                      ok           ok           ok
N                    threshold; p-ratio;            ok           ok
R                                      ok           ok           ok
R/G         threshold; p-ratio; nk-rule;   threshold;   threshold;
All                                    ok           ok           ok


year               2013         2014         2015 All
grant_type
G                    ok           ok           ok  ok
N                    ok           ok           ok  ok
R                    ok           ok           ok  ok
R/G         threshold;   threshold;   threshold;   ok
All                  ok           ok           ok  ok
output: [             inc_grants                                                      \
year               2010         2011         2012         2013         2014
grant_type
G           138906688.0  127533696.0  171878704.0  203357200.0  206222208.0
N                   0.0    7192804.0    7779685.0    8728330.0    7858697.0
R           504137056.0  532464704.0  480105472.0  511361408.0  554594176.0
R/G          46544000.0  128380000.0  134480000.0  134125000.0  142766000.0
All         689587776.0  795571264.0  794243904.0  857571968.0  911441088.0


year               2015           All
grant_type
G           133601200.0  9.814997e+08
N             8501187.0  4.006070e+07
R           551457280.0  3.134120e+09
R/G         146228992.0  7.325240e+08
All         839788672.0  4.888204e+09  ]
timestamp: 2025-03-06T19:43:34.782873
comments: []
exception:

uid: output_10
status: pass
type: table
properties: {'method': 'pivot_table'}
sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}
command: table = acro.pivot_table(
summary: pass
outcome:                  mean        std
           inc_grants inc_grants
grant_type
G                  ok         ok
N                  ok         ok
R                  ok         ok
R/G                ok         ok
output: [                    mean           std
              inc_grants    inc_grants
grant_type
G           1.141279e+07  2.283220e+07
N           1.344319e+05  1.988737e+05
R           8.098502e+06  3.204495e+07
R/G         1.664827e+07  1.583532e+07]
timestamp: 2025-03-06T19:43:34.814054
comments: []
exception:

uid: output_11
status: pass
type: table
properties: {'method': 'pivot_table'}
sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}
command: table = acro.pivot_table(
summary: pass
outcome:                  mean        std
           inc_grants inc_grants
grant_type
G                  ok         ok
N                  ok         ok
R                  ok         ok
R/G                ok         ok
output: [                    mean           std
              inc_grants    inc_grants
grant_type
G           1.141279e+07  2.283220e+07
N           1.364700e+05  1.999335e+05
R           8.006361e+06  3.228216e+07
R/G         1.664827e+07  1.583532e+07]
timestamp: 2025-03-06T19:43:34.844023
comments: []
exception:

uid: output_12
status: review
type: table
properties: {'method': 'pivot_table'}
sdc: {'summary': {'suppressed': False, 'negative': 4, 'missing': 0, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [[1, 0], [1, 1], [2, 0], [2, 1]], 'missing': [], 'threshold': [], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}
command: table = acro.pivot_table(
summary: review; negative values found
outcome:                  mean        std
           inc_grants inc_grants
grant_type
G
N            negative   negative
R            negative   negative
R/G
output: [                    mean           std
              inc_grants    inc_grants
grant_type
G           1.141279e+07  2.283220e+07
N           1.341800e+05  1.990196e+05
R           7.882230e+06  3.204558e+07
R/G         1.664827e+07  1.583532e+07]
timestamp: 2025-03-06T19:43:34.872024
comments: []
exception:

uid: output_13
status: pass
type: regression
properties: {'method': 'ols', 'dof': 807.0}
sdc: {}
command: results = acro.ols(y, x)
summary: pass; dof=807.0 >= 10
outcome: Empty DataFrame
Columns: []
Index: []
output: [                       inc_activity           R-squared:      0.894
Dep. Variable:
Model:                          OLS      Adj. R-squared:      0.893
Method:               Least Squares         F-statistic:   2261.000
Date:              Thu, 06 Mar 2025  Prob (F-statistic):      0.000
Time:                      19:43:34      Log-Likelihood: -14495.000
No. Observations:               811                 AIC:  29000.000
Df Residuals:                   807                 BIC:  29020.000
Df Model:                         3                  NaN        NaN
Covariance Type:          nonrobust                  NaN        NaN,                       coef     std err       t  P>|t|      [0.025       0.975]
const          301000.0000  533000.000   0.565  0.572 -745000.000  1350000.000
inc_grants         -0.8846       0.025 -35.956  0.000      -0.933       -0.836
inc_donations      -0.6647       0.016 -40.721  0.000      -0.697       -0.633
total_costs         0.8313       0.011  78.674  0.000       0.811        0.852,                 1339.956     Durbin-Watson:         1.414
Omnibus:
Prob(Omnibus):     0.000  Jarque-Bera (JB):  1.253318e+06
Skew:              9.899          Prob(JB):  0.000000e+00
Kurtosis:        194.566          Cond. No.  1.050000e+08]
timestamp: 2025-03-06T19:43:34.926753
comments: []
exception:

uid: output_14
status: pass
type: regression
properties: {'method': 'olsr', 'dof': 807.0}
sdc: {}
command: results = acro.olsr(
summary: pass; dof=807.0 >= 10
outcome: Empty DataFrame
Columns: []
Index: []
output: [                       inc_activity           R-squared:      0.894
Dep. Variable:
Model:                          OLS      Adj. R-squared:      0.893
Method:               Least Squares         F-statistic:   2261.000
Date:              Thu, 06 Mar 2025  Prob (F-statistic):      0.000
Time:                      19:43:34      Log-Likelihood: -14495.000
No. Observations:               811                 AIC:  29000.000
Df Residuals:                   807                 BIC:  29020.000
Df Model:                         3                  NaN        NaN
Covariance Type:          nonrobust                  NaN        NaN,                       coef     std err       t  P>|t|      [0.025       0.975]
Intercept      301000.0000  533000.000   0.565  0.572 -745000.000  1350000.000
inc_grants         -0.8846       0.025 -35.956  0.000      -0.933       -0.836
inc_donations      -0.6647       0.016 -40.721  0.000      -0.697       -0.633
total_costs         0.8313       0.011  78.674  0.000       0.811        0.852,                 1339.956     Durbin-Watson:         1.414
Omnibus:
Prob(Omnibus):     0.000  Jarque-Bera (JB):  1.253318e+06
Skew:              9.899          Prob(JB):  0.000000e+00
Kurtosis:        194.566          Cond. No.  1.050000e+08]
timestamp: 2025-03-06T19:43:34.954407
comments: []
exception:

uid: output_15
status: pass
type: regression
properties: {'method': 'probit', 'dof': 806.0}
sdc: {}
command: results = acro.probit(y, x)
summary: pass; dof=806.0 >= 10
outcome: Empty DataFrame
Columns: []
Index: []
output: [                          survivor No. Observations:           811
Dep. Variable:
Model:                      Probit     Df Residuals:  8.060000e+02
Method:                        MLE         Df Model:  4.000000e+00
Date:             Thu, 06 Mar 2025    Pseudo R-squ.:  2.140000e-01
Time:                     19:43:34   Log-Likelihood: -4.004600e+02
converged:                    True          LL-Null: -5.095000e+02
Covariance Type:         nonrobust      LLR p-value:  4.875000e-46,                        coef       std err      z  P>|z|        [0.025  \
const          4.740000e-02  5.700000e-02  0.838  0.402 -6.300000e-02
inc_activity   1.836000e-07  5.160000e-08  3.559  0.000  8.250000e-08
inc_grants     8.576000e-08  3.900000e-08  2.197  0.028  9.250000e-09
inc_donations  2.406000e-07  4.540000e-08  5.297  0.000  1.520000e-07
total_costs   -8.644000e-08  3.680000e-08 -2.351  0.019 -1.590000e-07

                     0.975]
const          1.580000e-01
inc_activity   2.850000e-07
inc_grants     1.620000e-07
inc_donations  3.300000e-07
total_costs   -1.440000e-08  ]
timestamp: 2025-03-06T19:43:34.982272
comments: []
exception:

uid: output_16
status: pass
type: regression
properties: {'method': 'logit', 'dof': 806.0}
sdc: {}
command: results = acro.logit(y, x)
summary: pass; dof=806.0 >= 10
outcome: Empty DataFrame
Columns: []
Index: []
output: [                          survivor No. Observations:           811
Dep. Variable:
Model:                       Logit     Df Residuals:  8.060000e+02
Method:                        MLE         Df Model:  4.000000e+00
Date:             Thu, 06 Mar 2025    Pseudo R-squ.:  2.187000e-01
Time:                     19:43:35   Log-Likelihood: -3.980700e+02
converged:                    True          LL-Null: -5.095000e+02
Covariance Type:         nonrobust      LLR p-value:  4.532000e-47,                        coef       std err      z  P>|z|        [0.025  \
const          5.120000e-02  9.100000e-02  0.561  0.575 -1.280000e-01
inc_activity   2.981000e-07  8.950000e-08  3.330  0.001  1.230000e-07
inc_grants     1.351000e-07  6.670000e-08  2.026  0.043  4.390000e-09
inc_donations  5.123000e-07  1.040000e-07  4.927  0.000  3.080000e-07
total_costs   -1.442000e-07  6.260000e-08 -2.304  0.021 -2.670000e-07

                     0.975]
const          2.300000e-01
inc_activity   4.740000e-07
inc_grants     2.660000e-07
inc_donations  7.160000e-07
total_costs   -2.150000e-08  ]
timestamp: 2025-03-06T19:43:35.004433
comments: []
exception:

uid: output_17
status: fail
type: histogram
properties: {'method': 'histogram'}
sdc: {}
command: hist = acro.hist(df, "inc_grants")
summary: Please check the minimum and the maximum values. The minimum value of the inc_grants column is: -10.0. The maximum value of the inc_grants column is: 249327008.0
outcome: Empty DataFrame
Columns: []
Index: []
output: ['acro_artifacts/histogram_0.png']
timestamp: 2025-03-06T19:43:35.149884
comments: []
exception:

uid: output_18
status: fail
type: histogram
properties: {'method': 'histogram'}
sdc: {}
command: hist = acro.hist(df, "inc_grants")
summary: Please check the minimum and the maximum values. The minimum value of the inc_grants column is: -10.0. The maximum value of the inc_grants column is: 249327008.0
outcome: Empty DataFrame
Columns: []
Index: []
output: ['acro_artifacts/histogram_1.png']
timestamp: 2025-03-06T19:43:35.204307
comments: []
exception:


Remove some ACRO outputs before finalising#

[31]:
acro.remove_output("output_1")
acro.remove_output("output_4")
INFO:acro:records:remove(): output_1 removed
INFO:acro:records:remove(): output_4 removed

Rename ACRO outputs before finalising#

[32]:
acro.rename_output("output_2", "pivot_table")
INFO:acro:records:rename_output(): output_2 renamed to pivot_table

Add a comment to output#

[33]:
acro.add_comments("output_0", "This is a cross table between year and grant_type")
acro.add_comments("output_0", "6 cells were suppressed in this table")
INFO:acro:records:a comment was added to output_0
INFO:acro:records:a comment was added to output_0

Add an unsupported output to the list of outputs#

[34]:
acro.custom_output(
    "XandY.jpeg", "This output is an image showing the relationship between X and Y"
)
INFO:acro:records:add_custom(): output_19

Request an exception for some of the outputs#

[35]:
acro.add_exception("output_0", "I really need this.")
acro.add_exception("output_3", "This one is safe. Trust me, I'm a professor.")
acro.add_exception("output_5", "It's not disclosive, I promise.")
acro.add_exception("output_6", "I need this one too")
INFO:acro:records:exception request was added to output_0
INFO:acro:records:exception request was added to output_3
INFO:acro:records:exception request was added to output_5
INFO:acro:records:exception request was added to output_6

Finalise ACRO#

[39]:
SAVE_PATH = "ACRO_RES"

# output = acro.finalise(SAVE_PATH, "xlsx")
output = acro.finalise(SAVE_PATH, "json")
INFO:acro:records:
uid: output_7
status: fail
type: table
properties: {'method': 'crosstab'}
sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]], 'all-values-are-same': []}}
command: safe_table = acro.crosstab(
summary: fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing;
outcome: grant_type   G                     N   R                            R/G All
year
2010        ok  threshold; p-ratio;   ok  threshold; p-ratio; nk-rule;   ok
2011        ok                    ok  ok                    threshold;   ok
2012        ok                    ok  ok                    threshold;   ok
2013        ok                    ok  ok                    threshold;   ok
2014        ok                    ok  ok                    threshold;   ok
2015        ok                    ok  ok                    threshold;   ok
All         ok                    ok  ok                             ok  ok
output: [grant_type           G              N           R         R/G        All
year
2010         9921906.0       0.000000   8420373.0  11636000.0  8320154.5
2011         8502246.0  125663.226562   7689140.0  16047500.0  5310392.0
2012        11458580.0  131859.062500   6896304.0  16810000.0  5220580.5
2013        13557147.0  150488.453125   7088095.5  16765625.0  5578657.0
2014        13748147.0  135494.781250   8118565.5  17845750.0  6072600.0
2015        11133433.0  149143.625000  10596385.0  18278624.0  6442131.0
All         11412787.0  136158.859375   8006361.0  16648273.0  5968295.5]
timestamp: 2025-03-06T19:43:34.695759
comments: []
exception:

The status of the record above is: fail.
Please explain why an exception should be granted.

 a reason should be provided
INFO:acro:records:
uid: output_8
status: review
type: table
properties: {'method': 'crosstab'}
sdc: {'summary': {'suppressed': False, 'negative': 10, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1, 'all-values-are-same': 0}, 'cells': {'negative': [[0, 2], [1, 1], [1, 2], [2, 2], [3, 1], [3, 2], [4, 1], [4, 2], [5, 1], [5, 2]], 'missing': [], 'threshold': [[0, 1], [0, 3], [1, 3], [2, 3], [3, 3], [4, 3], [5, 3]], 'p-ratio': [[0, 1], [0, 3]], 'nk-rule': [[0, 3]], 'all-values-are-same': []}}
command: safe_table = acro.crosstab(df.year, df.grant_type, values=negative, aggfunc="mean")
summary: review; negative values found
outcome: grant_type G         N         R R/G
year
2010                    negative
2011          negative  negative
2012                    negative
2013          negative  negative
2014          negative  negative
2015          negative  negative
output: [grant_type           G              N           R         R/G
year
2010         9921906.0       0.000000   8280032.5  11636000.0
2011         8502246.0  123496.445312   7577703.5  16047500.0
2012        11458580.0  131859.062500   6796357.5  16810000.0
2013        13557147.0  147937.625000   6988263.0  16765625.0
2014        13748147.0  133198.078125   7997392.0  17845750.0
2015        11133433.0  146572.015625  10388612.0  18278624.0]
timestamp: 2025-03-06T19:43:34.727227
comments: []
exception:

The status of the record above is: review.
Please explain why an exception should be granted.

 negative values are valid financial losses
INFO:acro:records:
uid: output_9
status: fail
type: table
properties: {'method': 'pivot_table'}
sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 7, 'p-ratio': 2, 'nk-rule': 1, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[1, 0], [3, 0], [3, 1], [3, 2], [3, 3], [3, 4], [3, 5]], 'p-ratio': [[1, 0], [3, 0]], 'nk-rule': [[3, 0]], 'all-values-are-same': []}}
command: table = acro.pivot_table(
summary: fail; threshold: 7 cells may need suppressing; p-ratio: 2 cells may need suppressing; nk-rule: 1 cells may need suppressing;
outcome:                                inc_grants                            \
year                                 2010         2011         2012
grant_type
G                                      ok           ok           ok
N                    threshold; p-ratio;            ok           ok
R                                      ok           ok           ok
R/G         threshold; p-ratio; nk-rule;   threshold;   threshold;
All                                    ok           ok           ok


year               2013         2014         2015 All
grant_type
G                    ok           ok           ok  ok
N                    ok           ok           ok  ok
R                    ok           ok           ok  ok
R/G         threshold;   threshold;   threshold;   ok
All                  ok           ok           ok  ok
output: [             inc_grants                                                      \
year               2010         2011         2012         2013         2014
grant_type
G           138906688.0  127533696.0  171878704.0  203357200.0  206222208.0
N                   0.0    7192804.0    7779685.0    8728330.0    7858697.0
R           504137056.0  532464704.0  480105472.0  511361408.0  554594176.0
R/G          46544000.0  128380000.0  134480000.0  134125000.0  142766000.0
All         689587776.0  795571264.0  794243904.0  857571968.0  911441088.0


year               2015           All
grant_type
G           133601200.0  9.814997e+08
N             8501187.0  4.006070e+07
R           551457280.0  3.134120e+09
R/G         146228992.0  7.325240e+08
All         839788672.0  4.888204e+09  ]
timestamp: 2025-03-06T19:43:34.782873
comments: []
exception:

The status of the record above is: fail.
Please explain why an exception should be granted.

 a reason should be provided
INFO:acro:records:
uid: output_12
status: review
type: table
properties: {'method': 'pivot_table'}
sdc: {'summary': {'suppressed': False, 'negative': 4, 'missing': 0, 'threshold': 0, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [[1, 0], [1, 1], [2, 0], [2, 1]], 'missing': [], 'threshold': [], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}
command: table = acro.pivot_table(
summary: review; negative values found
outcome:                  mean        std
           inc_grants inc_grants
grant_type
G
N            negative   negative
R            negative   negative
R/G
output: [                    mean           std
              inc_grants    inc_grants
grant_type
G           1.141279e+07  2.283220e+07
N           1.341800e+05  1.990196e+05
R           7.882230e+06  3.204558e+07
R/G         1.664827e+07  1.583532e+07]
timestamp: 2025-03-06T19:43:34.872024
comments: []
exception:

The status of the record above is: review.
Please explain why an exception should be granted.

 negative values are valid financial losses
INFO:acro:records:
uid: output_17
status: fail
type: histogram
properties: {'method': 'histogram'}
sdc: {}
command: hist = acro.hist(df, "inc_grants")
summary: Please check the minimum and the maximum values. The minimum value of the inc_grants column is: -10.0. The maximum value of the inc_grants column is: 249327008.0
outcome: Empty DataFrame
Columns: []
Index: []
output: ['acro_artifacts/histogram_0.png']
timestamp: 2025-03-06T19:43:35.149884
comments: []
exception:

The status of the record above is: fail.
Please explain why an exception should be granted.

 to be fair this is probably disclosive ...
INFO:acro:records:
uid: output_18
status: fail
type: histogram
properties: {'method': 'histogram'}
sdc: {}
command: hist = acro.hist(df, "inc_grants")
summary: Please check the minimum and the maximum values. The minimum value of the inc_grants column is: -10.0. The maximum value of the inc_grants column is: 249327008.0
outcome: Empty DataFrame
Columns: []
Index: []
output: ['acro_artifacts/histogram_1.png']
timestamp: 2025-03-06T19:43:35.204307
comments: []
exception:

The status of the record above is: fail.
Please explain why an exception should be granted.

 to be fair this is probably disclosive ...
INFO:acro:records:
uid: pivot_table
status: fail
type: table
properties: {'method': 'crosstab'}
sdc: {'summary': {'suppressed': False, 'negative': 0, 'missing': 0, 'threshold': 4, 'p-ratio': 0, 'nk-rule': 0, 'all-values-are-same': 0}, 'cells': {'negative': [], 'missing': [], 'threshold': [[0, 0], [0, 1], [1, 0], [1, 1]], 'p-ratio': [], 'nk-rule': [], 'all-values-are-same': []}}
command: acro.crosstab(mydata.year, mydata.survivor)
summary: fail; threshold: 4 cells may need suppressing;
outcome: survivor Dead_in_2015 Alive_in_2015
year
2010      threshold;    threshold;
2011      threshold;    threshold;
output: [survivor  Dead in 2015  Alive in 2015
year
2010                 2              2
2011                 2              2]
timestamp: 2025-03-06T19:43:34.423743
comments: []
exception:

The status of the record above is: fail.
Please explain why an exception should be granted.

 to be fair this is probably disclosive ...
INFO:acro:records:
uid: output_19
status: review
type: custom
properties: {}
sdc: {}
command: custom
summary: review
outcome: Empty DataFrame
Columns: []
Index: []
output: ['XandY.jpeg']
timestamp: 2025-03-06T19:43:35.258648
comments: ['This output is an image showing the relationship between X and Y']
exception:

The status of the record above is: review.
Please explain why an exception should be granted.

 please review- this image is not disclosive
INFO:acro:records:outputs written to: ACRO_RES

List files generated#

[40]:
files = []
for name in os.listdir(SAVE_PATH):
    if os.path.isfile(os.path.join(SAVE_PATH, name)):
        files.append(name)
files.sort()
for f in files:
    print(f)
XandY.jpeg
config.json
histogram_0.png
histogram_1.png
output_0_0.csv
output_10_0.csv
output_11_0.csv
output_12_0.csv
output_13_0.csv
output_13_1.csv
output_13_2.csv
output_14_0.csv
output_14_1.csv
output_14_2.csv
output_15_0.csv
output_15_1.csv
output_16_0.csv
output_16_1.csv
output_3_0.csv
output_5_0.csv
output_6_0.csv
output_7_0.csv
output_8_0.csv
output_9_0.csv
pivot_table_0.csv
results.json

Checksums#

[38]:
files = []
checksum_dir = os.path.join(SAVE_PATH, "checksums")
for name in os.listdir(checksum_dir):
    if os.path.isfile(os.path.join(checksum_dir, name)):
        files.append(name)
files.sort()
for f in files:
    print(f)
XandY.jpeg.txt
histogram_0.png.txt
histogram_1.png.txt
output_0_0.csv.txt
output_10_0.csv.txt
output_11_0.csv.txt
output_12_0.csv.txt
output_13_0.csv.txt
output_13_1.csv.txt
output_13_2.csv.txt
output_14_0.csv.txt
output_14_1.csv.txt
output_14_2.csv.txt
output_15_0.csv.txt
output_15_1.csv.txt
output_16_0.csv.txt
output_16_1.csv.txt
output_3_0.csv.txt
output_5_0.csv.txt
output_6_0.csv.txt
output_7_0.csv.txt
output_8_0.csv.txt
output_9_0.csv.txt
pivot_table_0.csv.txt
results.json.txt
[ ]: